#Download Total.csv from FYP drive for verification.#Random Forest Regression!import numpy as npimport pandas as pdfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_splitfrom math import sqrtfrom sklearn import metricsfrom sklearn.metrics import mean_absolute_errorfrom sklearn.metrics import mean_squared_error# Read in datafeatures = pd.read_csv('Total.csv')features.head(7)# Labels are the values we want to predictlabels1 = np.array(features['122.57'])labels2 = np.array(features['58.825'])labels3=np.vstack([labels1, labels2])label4=np.transpose(labels3)# Remove the labels from the features# axis 1 refers to the columnsfeatures= features.drop('122.57', axis = 1)features= features.drop('58.825', axis = 1)# Saving feature names for later usefeature_list = list(features.columns)# Convert to numpy arrayfeatures = np.array(features)# Using Skicit-learn to split data into training and testing sets# Split the data into training and testing setstrain_features, test_features, train_labels, test_labels = train_test_split(features, label4, test_size = 0.2, random_state = 42)# Instantiate model with 1000 decision treesrf = RandomForestRegressor(n_estimators = 1000, random_state = 42)# Train the model on training datarf.fit(train_features, train_labels);# Use the forest's predict method on the test datapredictions = rf.predict(test_features)# Calculate the absolute errorserrors = abs(predictions - test_labels)# Print out the mean absolute error (mae and rmse)errs = metrics.mean_absolute_error(predictions, test_labels)errs1 = metrics.mean_squared_error(predictions, test_labels)print(errs)print(sqrt(errs1))3 features:
7 features:
('Mean Absolute Error:', 3.555187104277004)
('Mean Squared Error:', 85.12462334840987)
('Root Mean Squared Error:', 9.2263006318030776)
Variance score: 0.88