# from google.colab import drive
# drive.mount('/content/drive')
# %cd '/content/drive/MyDrive/CS460 ML Project /CODES/EXPERIMENTS/ARIMA'
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import array
df = pd.read_csv('Odisha.csv',parse_dates=['DATE'])
df = df.dropna()
from datetime import datetime
def to_integer(dt_time):
return 10000*dt_time.year + 100*dt_time.month + dt_time.day
int_date = []
for i in df['DATE']:
timestamp = int(round(i.timestamp()))
dt_object = datetime.fromtimestamp(timestamp)
# int_date.append(to_integer(datetime.date(dt_object)))
int_date.append(timestamp)
df['int_date'] = int_date
df.index = pd.to_datetime(df.DATE)
# df = df.drop(['DATE', 'TESTED', 'RECOVERED', 'DEATH'],axis='columns')
df = df.drop(['DATE'],axis='columns')
df
start_test = 580
end_train = start_test - 1
end_test = 586
X_train = df.drop(['ACTIVE', 'RECOVERED', 'DEATH', 'TESTED'], axis='columns')[:end_train]
X_test = df.drop(['ACTIVE', 'RECOVERED', 'DEATH', 'TESTED'], axis='columns')[start_test:end_test]
y_train = df['ACTIVE'][:end_train]
y_test = df['ACTIVE'][start_test:end_test]
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, criterion="mse", min_samples_leaf=3, min_samples_split=3, max_depth=10)
model.fit(X_train, y_train)
model.score(X_test,y_test)
# for i in range(1,100):
# model = RandomForestRegressor(n_estimators=i)
# model.fit(X_train, y_train)
# print(str(i) + ' : ' + str(model.score(X_test,y_test)))
y_pred = model.predict(X_test)
y_pred = pd.Series(y_pred, name='Predicted', index=df.index[start_test:end_test])
import matplotlib.pyplot as plt
y_pred.plot(legend=True)
y_test.plot(legend=True)
y_pred.plot(legend=True)
df['ACTIVE'].plot(legend=True)