import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
df=pd.read_csv('min_temp_data.csv',index_col=0,parse_dates=True)
X=df.values
print("shape of data",df.shape)
print("original dataset\n",df.head())
print('After extracting only temperature:\n',X)
shape of data (3650, 1)
original dataset
Temp
Date
1981-01-01 20.7
1981-01-02 17.9
1981-01-03 18.8
1981-01-04 14.6
1981-01-05 15.8
After extracting only temperature:
[[20.7]
[17.9]
[18.8]
...
[13.5]
[15.7]
[13. ]]
df.plot()
#df[:200].plot()
<AxesSubplot:xlabel='Date'>
from statsmodels.tsa.stattools import adfuller
dftest=adfuller(df['Temp'],autolag='AIC')
#dftest
print('1. ADF',dftest[0])
print('2. P value',dftest[1])
print('3. Number of Lag',dftest[2])
print('4. Number of observations used for ADF Regression and Critical values Calculation',dftest[3])
print('critical values')
for key,val in dftest[4].items():
print('\t',key,":",'\t',val)
1. ADF -4.444804924611684
2. P value 0.0002470826300361156
3. Number of Lag 20
4. Number of observations used for ADF Regression and Critical values Calculation 3629
critical values
1% : -3.4321532327220154
5% : -2.862336767636517
10% : -2.56719413172842
dftest
(-4.444804924611684,
0.0002470826300361156,
20,
3629,
{'1%': -3.4321532327220154,
'5%': -2.862336767636517,
'10%': -2.56719413172842},
16642.822304301197)
from statsmodels.graphics.tsaplots import plot_pacf,plot_acf
pacf=plot_pacf(df['Temp'],lags=25)
acf=plot_acf(df['Temp'],lags=25)
train=X[:len(X)-7]
#train
test=X[len(X)-7:]
#test
model=AutoReg(train,lags=10,old_names=False).fit()
print(model.summary())
AutoReg Model Results
==============================================================================
Dep. Variable: y No. Observations: 3643
Model: AutoReg(10) Log Likelihood -8375.514
Method: Conditional MLE S.D. of innovations 2.427
Date: Thu, 19 May 2022 AIC 1.780
Time: 11:36:47 BIC 1.800
Sample: 10 HQIC 1.787
3643
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 0.9350 0.142 6.603 0.000 0.657 1.213
y.L1 0.6103 0.017 36.795 0.000 0.578 0.643
y.L2 -0.0832 0.019 -4.289 0.000 -0.121 -0.045
y.L3 0.0631 0.019 3.246 0.001 0.025 0.101
y.L4 0.0512 0.019 2.632 0.008 0.013 0.089
y.L5 0.0547 0.019 2.810 0.005 0.017 0.093
y.L6 0.0422 0.019 2.166 0.030 0.004 0.080
y.L7 0.0590 0.019 3.032 0.002 0.021 0.097
y.L8 0.0324 0.019 1.664 0.096 -0.006 0.070
y.L9 0.0502 0.019 2.585 0.010 0.012 0.088
y.L10 0.0360 0.017 2.175 0.030 0.004 0.069
Roots
==============================================================================
Real Imaginary Modulus Frequency
------------------------------------------------------------------------------
AR.1 1.0275 -0.0000j 1.0275 -0.0000
AR.2 0.9811 -0.7934j 1.2617 -0.1082
AR.3 0.9811 +0.7934j 1.2617 0.1082
AR.4 0.4192 -1.3007j 1.3665 -0.2004
AR.5 0.4192 +1.3007j 1.3665 0.2004
AR.6 -0.4338 -1.3906j 1.4567 -0.2981
AR.7 -0.4338 +1.3906j 1.4567 0.2981
AR.8 -1.3012 -0.8676j 1.5639 -0.4064
AR.9 -1.3012 +0.8676j 1.5639 0.4064
AR.10 -1.7501 -0.0000j 1.7501 -0.5000
------------------------------------------------------------------------------
print(len(train))
3643
pred=model.predict(start=len(train),end=len(X)-1,dynamic=False)
plt.plot(pred)
plt.plot(test,color='red')
print(pred)
[11.47679312 12.63726604 13.0574664 13.03378918 12.8259307 12.65767226
12.43273596]
from math import sqrt
from sklearn.metrics import mean_squared_error
rmse=sqrt(mean_squared_error(test,pred))
print(rmse)
1.563819622086097
pred_future=model.predict(start=len(X)+1,end=len(X)+7,dynamic=False)
print("The future prediction for the next week")
print(pred_future)
print('Number of prediction made:\t',len(pred_future))
The future prediction for the next week
[12.28608804 12.20309536 12.24915916 12.30618875 12.31199689 12.27225975
12.21843712]
Number of prediction made: 7