Random Forest

Skeleton Code:


#Download Total.csv from FYP drive for verification.
#Random Forest Regression!

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from math import sqrt
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

# Read in data
features = pd.read_csv('Total.csv')
features.head(7)

# Labels are the values we want to predict

labels1 = np.array(features['122.57'])
labels2 = np.array(features['58.825'])
labels3=np.vstack([labels1, labels2])
label4=np.transpose(labels3)

# Remove the labels from the features
# axis 1 refers to the columns

features= features.drop('122.57', axis = 1)
features= features.drop('58.825', axis = 1)

# Saving feature names for later use
feature_list = list(features.columns)

# Convert to numpy array
features = np.array(features)

# Using Skicit-learn to split data into training and testing sets
# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, label4, test_size = 0.2, random_state = 42)

# Instantiate model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)

# Train the model on training data
rf.fit(train_features, train_labels);

# Use the forest's predict method on the test data
predictions = rf.predict(test_features)

# Calculate the absolute errors
errors = abs(predictions - test_labels)

# Print out the mean absolute error (mae and rmse)
errs = metrics.mean_absolute_error(predictions, test_labels)
errs1 = metrics.mean_squared_error(predictions, test_labels)
print(errs)
print(sqrt(errs1))

3 features:

  • average:
  • ('Mean Absolute Error:', 19.773915422577186)
  • ('Mean Squared Error:', 688.2131623931618)
  • ('Root Mean Squared Error:', 26.233817152544955)
  • Variance score: 0.16


  • bpmax:
  • ('Mean Absolute Error:', 19.010930070565177)
  • ('Mean Squared Error:', 620.09762743419628)
  • ('Root Mean Squared Error:', 24.901759524864829)
  • Variance score: 0.34


  • bpmin:
  • ('Mean Absolute Error:', 20.368236251208742)
  • ('Mean Squared Error:', 749.56642181470431)
  • ('Root Mean Squared Error:', 27.378210712438904)
  • Variance score: -0.02

7 features:

  • average:
  • 'Mean Absolute Error:', 7.7504163296962254)
  • ('Mean Squared Error:', 181.87177031613697)
  • ('Root Mean Squared Error:', 13.485984217554794)
  • Variance score: 0.79


  • bpmax:
  • ('Mean Absolute Error:', 12.222166154148635)
  • ('Mean Squared Error:', 289.97170000699634)
  • ('Root Mean Squared Error:', 17.02855542924873)
  • Variance score: 0.69

  • bpmin :

('Mean Absolute Error:', 3.555187104277004)

('Mean Squared Error:', 85.12462334840987)

('Root Mean Squared Error:', 9.2263006318030776)

Variance score: 0.88