!pip install --upgrade pip
import pandas as pd
import s3fs
import scipy
import matplotlib as plot
import requests as rs
import numpy as np
import zipfile
fs = s3fs.S3FileSystem(anon=True)
fs.ls('datacases/datathon-2018-2/')
fs.ls('datacases/datathon-2018-2/telenor')
#URL="https://storage.googleapis.com/global-datathon-2018/telenor/data.zip"
#print("downloading with requests")
#r = rs.get(URL)
#with open("code.zip", "wb") as code:
# code.write(r.content)
#print("done downloading")
zf = zipfile.ZipFile('code.zip')
Telenor = pd.read_csv(zf.open('data.csv'),encoding="latin",delimiter=";")
Telenor.head(10)
Telenor.shape
Telenor.dtypes
Telenor1 = Telenor[(Telenor.FIRST_GET_RESPONSE_SUCCESS_D==0) & (Telenor.PAGE_BROWSING_DELAY==0) & (Telenor.TCP_SETUP_TOTAL_DELAY==0) &
(Telenor.PAGE_CONTENT_DOWNLOAD_TOTAL_D==0) & (Telenor.FIRST_DNS_RESPONSE_SUCCESS_D ==0) &
(Telenor.DNS_RESPONSE_SUCCESS_DELAY==0) & (Telenor.FIRST_TCP_RESPONSE_SUCCESS_D==0) &
(Telenor.PAGE_SR_DELAYS==0) & (Telenor.SYN_SYN_DELAY==0) & (Telenor.TCP_CONNECT_DELAY==0) & (Telenor.PAGE_BROWSING_DELAYS ==0)]
Telenor1.head(10)
Telenor2 = Telenor[(Telenor.FIRST_GET_RESPONSE_SUCCESS_D!=0) | (Telenor.PAGE_BROWSING_DELAY!=0) | (Telenor.TCP_SETUP_TOTAL_DELAY!=0) |
(Telenor.PAGE_CONTENT_DOWNLOAD_TOTAL_D!=0) | (Telenor.FIRST_DNS_RESPONSE_SUCCESS_D !=0) |
(Telenor.DNS_RESPONSE_SUCCESS_DELAY!=0) | (Telenor.FIRST_TCP_RESPONSE_SUCCESS_D!=0) |
(Telenor.PAGE_SR_DELAYS!=0) | (Telenor.SYN_SYN_DELAY!=0) | (Telenor.TCP_CONNECT_DELAY!=0) | (Telenor.PAGE_BROWSING_DELAYS!=0)]
Telenor2.head(10)
Telenor2[['RAVEN_NAME','DATETIME']].groupby('RAVEN_NAME').count().sort_values(by = 'DATETIME', ascending = False).head(10)
Telenor1[['RAVEN_NAME','DATETIME']].groupby('RAVEN_NAME').count().sort_values(by = 'DATETIME', ascending = False).head(10)
Telenor2[['FAMILY_NAME','DATETIME']].groupby('FAMILY_NAME').count().sort_values(by = 'DATETIME', ascending = False).head(1)
Telenor2[['FAMILY_NAME','DATETIME']].groupby('FAMILY_NAME').count().sort_values(by = 'DATETIME', ascending = True).head(1)
Telenor2[['MEMBER_NAME']].groupby('MEMBER_NAME').count().sort_values(by = 'DATETIME', ascending = False).head(1)
Telenor2[['MEMBER_NAME','DATETIME']].groupby('MEMBER_NAME').count().sort_values(by = 'DATETIME', ascending = True).head(1)
Telenor2.DATETIME = pd.to_datetime(Telenor2.DATETIME)
Telenor2['DATE'] = [d.date() for d in Telenor2['DATETIME']]
Telenor2['TIME'] = [d.time() for d in Telenor2['DATETIME']]
Telenor2.head(10)
Telenor_Failures=Telenor2[['DATE','TCP_CONNECT_DELAY']].groupby('DATE').count()
Telenor_Failures.columns = ['FAILURES']
Telenor_Failures.head()
Telenor_Failures.head()
Telenor_Failures.to_csv('fail.csv')
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')
data = pd.read_csv('fail.csv', parse_dates=['DATE'], index_col='DATE',date_parser=dateparse)
data.head()
data.dtypes
ts = Telenor_Failures['FAILURES']
ts.head()
import matplotlib.pylab as plt
%matplotlib inline
plt.plot(ts)
## Check for stationarity
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
#Determing rolling statistics
rolmean = pd.rolling_mean(timeseries, window=12)
rolstd = pd.rolling_std(timeseries, window=12)
#Plot rolling statistics:
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block=False)
#Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
import numpy as np
#test_stationarity(ts)
#fig = decomposition.plot()
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
init_notebook_mode(connected = True)
cf.go_offline()
Telenor_Failures.iplot(title = 'Total Failures on aggregate of every 15 minutes')
Telenor_Failures.head()
from pyramid.arima import auto_arima
stepwise_model = auto_arima(Telenor_Failures, start_p=1, start_q=1,
max_p=3, max_q=3, m=12,
start_P=0, seasonal=True,
d=1, D=1, trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(stepwise_model.aic())
stepwise_model.predict(5)
print(stepwise_model.aic())