Random Forest
Skeleton Code:
Skeleton Code:
#Download Total.csv from FYP drive for verification.
#Random Forest Regression!
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
# Read in data
features = pd.read_csv('Total.csv')
features.head(7)
# Labels are the values we want to predict
labels1 = np.array(features['122.57'])
labels2 = np.array(features['58.825'])
labels3=np.vstack([labels1, labels2])
label4=np.transpose(labels3)
# Remove the labels from the features
# axis 1 refers to the columns
features= features.drop('122.57', axis = 1)
features= features.drop('58.825', axis = 1)
# Saving feature names for later use
feature_list = list(features.columns)
# Convert to numpy array
features = np.array(features)
# Using Skicit-learn to split data into training and testing sets
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, label4, test_size = 0.2, random_state = 42)
# Instantiate model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
# Train the model on training data
rf.fit(train_features, train_labels);
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_labels)
# Print out the mean absolute error (mae and rmse)
errs = metrics.mean_absolute_error(predictions, test_labels)
errs1 = metrics.mean_squared_error(predictions, test_labels)
print(errs)
print(sqrt(errs1))
3 features:
- average:
- ('Mean Absolute Error:', 19.773915422577186)
- ('Mean Squared Error:', 688.2131623931618)
- ('Root Mean Squared Error:', 26.233817152544955)
- Variance score: 0.16
- bpmax:
- ('Mean Absolute Error:', 19.010930070565177)
- ('Mean Squared Error:', 620.09762743419628)
- ('Root Mean Squared Error:', 24.901759524864829)
- Variance score: 0.34
- bpmin:
- ('Mean Absolute Error:', 20.368236251208742)
- ('Mean Squared Error:', 749.56642181470431)
- ('Root Mean Squared Error:', 27.378210712438904)
- Variance score: -0.02
7 features:
- average:
- 'Mean Absolute Error:', 7.7504163296962254)
- ('Mean Squared Error:', 181.87177031613697)
- ('Root Mean Squared Error:', 13.485984217554794)
- Variance score: 0.79
- bpmax:
- ('Mean Absolute Error:', 12.222166154148635)
- ('Mean Squared Error:', 289.97170000699634)
- ('Root Mean Squared Error:', 17.02855542924873)
- Variance score: 0.69
- bpmin :
('Mean Absolute Error:', 3.555187104277004)
('Mean Squared Error:', 85.12462334840987)
('Root Mean Squared Error:', 9.2263006318030776)
Variance score: 0.88