from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
#%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as seabornInstance
from numpy import array
ServiceLevel = pd.read_csv("Service Level.csv")
#print(ServiceLevel.head())
#print(ServiceLevel.shape)
#print(ServiceLevel.describe())
# Graph for Call Volumes vs Answered
ServiceLevel.plot(x='Volume Offered', y = 'SL', style='o')
plt.title('SL vs Volume Offered')
plt.xlabel('Volume Offered')
plt.ylabel('SL')
#plt.show()
# Average SL
plt.figure(figsize=(15,10))
plt.tight_layout()
seabornInstance.distplot(ServiceLevel['SL'])
#plt.show()
y = ServiceLevel['SL'].values.reshape(-1, 1) # Create arrays for the feature / target variable
#print(y)
X = ServiceLevel['Volume Offered'].values.reshape(-1,1) # Create arrays for the response variables
#print(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42) # Split into training and test set
regressor = LinearRegression()
regressor.fit(X_train, y_train) # training the algorithm
print(regressor.intercept_) # retrieve the intercept
print(regressor.coef_) # retrieve the slop
#Result
#intercept: 0.50865504
#coefficient: 0.0002021
# For every 1 unit change in volume, the change in SL is about 0.0002021%
y_pred = regressor.predict(X_test)
#print(y_pred)
df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})
print(df)
df1 = df
df1.plot(kind='bar',figsize=(16,10))
plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
#plt.show()#
plt.scatter(X_test, y_test, color='gray')
plt.plot(X_test, y_pred, color='red', linewidth=2)
#plt.show()
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
# Mean Absolute Error: 0.14387132265451136
# Mean Squared Error: 0.03385032038495754
# Root Mean Squared Error: 0.18398456561613408
Mean Absolute Error: 0.14387132265451136
Mean Squared Error: 0.03385032038495754
Root Mean Squared Error: 0.18398456561613408