from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(housing_prepared, housing_labels)
some_data = housing.iloc[:5]
some_labels = housing_labels.iloc[:5]
some_data_prepared = full_pipeline.transform(some_data)
print("Predictions:", lin_reg.predict(some_data_prepared))
print("Labels:", list(some_labels))
Predictions: [203682.37379543 326371.39370781 204218.64588245 58685.4770482 194213.06443039]
Labels: [286600.0, 340600.0, 196900.0, 46300.0, 254500.0]
print(some_data_prepared)
[[-1.15604281 0.77194962 0.74333089 -0.49323393 -0.44543821 -0.63621141 -0.42069842 -0.61493744 -0.95445595 -0.31205452 -0.08649871 0.15531753 1. 0. 0. 0. 0. ]
[-1.17602483 0.6596948 -1.1653172 -0.90896655 -1.0369278 -0.99833135 -1.02222705 1.33645936 1.89030518 0.21768338 -0.03353391 -0.83628902 1. 0. 0. 0. 0. ]
[ 1.18684903 -1.34218285 0.18664186 -0.31365989 -0.15334458 -0.43363936 -0.0933178 -0.5320456 -0.95445595 -0.46531516 -0.09240499 0.4222004 0. 0. 0. 0. 1. ]
[-0.01706767 0.31357576 -0.29052016 -0.36276217 -0.39675594 0.03604096 -0.38343559 -1.04556555 -0.95445595 -0.07966124 0.08973561 -0.19645314 0. 1. 0. 0. 0. ]
[ 0.49247384 -0.65929936 -0.92673619 1.85619316 2.41221109 2.72415407 2.57097492 -0.44143679 -0.00620224 -0.35783383 -0.00419445 0.2699277 1. 0. 0. 0. 0. ]]
from sklearn.metrics import mean_squared_error
housing_predictions = lin_reg.predict(housing_prepared)
lin_mse = mean_squared_error(housing_labels, housing_predictions)
lin_rmse = np.sqrt(lin_mse)
print(lin_rmse)
68376.64295459937
from sklearn.metrics import mean_absolute_error
housing_predictions = lin_reg.predict(housing_prepared)
lin_mae = mean_absolute_error(housing_labels, housing_predictions)
print(lin_mae)
49508.08205970909
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(housing_prepared, housing_labels)
housing_predictions = tree_reg.predict(housing_prepared)
tree_mse = mean_squared_error(housing_labels, housing_predictions)
tree_rmse = np.sqrt(tree_mse)
print(tree_rmse)
0.0 (The model seemed to have badly overfit the data )