In [26]:
# from google.colab import drive
# drive.mount('/content/drive')
In [27]:
# %cd '/content/drive/MyDrive/CS460 ML Project /CODES/EXPERIMENTS/ARIMA'
In [28]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import array
In [29]:
df = pd.read_csv('Odisha.csv',parse_dates=['DATE'])
df = df.dropna()
In [30]:
from datetime import datetime
In [31]:
def to_integer(dt_time):
  return 10000*dt_time.year + 100*dt_time.month + dt_time.day
In [32]:
int_date = []
for i in df['DATE']:
  timestamp = int(round(i.timestamp()))
  dt_object = datetime.fromtimestamp(timestamp)
  # int_date.append(to_integer(datetime.date(dt_object)))
  int_date.append(timestamp)
df['int_date'] = int_date
In [33]:
df.index = pd.to_datetime(df.DATE)
# df = df.drop(['DATE', 'TESTED', 'RECOVERED', 'DEATH'],axis='columns')
df = df.drop(['DATE'],axis='columns')
df
Out[33]:
ACTIVE RECOVERED DEATH TESTED int_date
DATE
2020-03-16 1 0 0 0 1584316800
2020-03-17 1 0 0 0 1584403200
2020-03-18 1 0 0 0 1584489600
2020-03-19 2 0 0 0 1584576000
2020-03-20 2 0 0 0 1584662400
... ... ... ... ... ...
2021-10-18 4294 1022780 8343 21118450 1634515200
2021-10-19 4228 1023398 8347 21175476 1634601600
2021-10-20 4334 1023849 8349 21246062 1634688000
2021-10-21 4283 1024422 8351 21314981 1634774400
2021-10-22 4747 1024422 8354 21314981 1634860800

586 rows × 5 columns

In [34]:
start_test = 580
end_train = start_test - 1
end_test = 586
In [35]:
X_train = df.drop(['ACTIVE', 'RECOVERED', 'DEATH', 'TESTED'], axis='columns')[:end_train]
X_test = df.drop(['ACTIVE', 'RECOVERED', 'DEATH', 'TESTED'], axis='columns')[start_test:end_test]
y_train = df['ACTIVE'][:end_train]
y_test = df['ACTIVE'][start_test:end_test]
In [36]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, criterion="mse", min_samples_leaf=3, min_samples_split=3, max_depth=10)
model.fit(X_train, y_train)
model.score(X_test,y_test)
Out[36]:
-3.219030204121999
In [37]:
# for i in range(1,100):
#   model = RandomForestRegressor(n_estimators=i)
#   model.fit(X_train, y_train)
#   print(str(i) + ' : ' + str(model.score(X_test,y_test)))
In [38]:
y_pred = model.predict(X_test)
y_pred = pd.Series(y_pred, name='Predicted', index=df.index[start_test:end_test])
In [39]:
import matplotlib.pyplot as plt
y_pred.plot(legend=True)
y_test.plot(legend=True)
Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f6c3dcac510>
In [40]:
y_pred.plot(legend=True)
df['ACTIVE'].plot(legend=True)
Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f6c3dbabc90>