ROC Curve and Area under ROC
import numpy as np
from sklearn import metrics
from matplotlib import pyplot as plt
y = np.random.randint(0, 2, 100) #the actual labels 0/1 of data
scores = np.random.rand(100) #the predicted score/probability of data
# true positive rate is the percentage of actual positives which are correctly predicted, i.e. recall / sensitivity
# false positive rate / false alarm rate is the ratio of the number of false positive divided by the number of actual negative, i.e. 1 - specificity
# specificity is the true negative rate which is the percentage of actual negatives which are correctly predicted
false_positive_rate, true_positive_rate, thresholds = metrics.roc_curve(y, scores)
area_uncer_curve = metrics.roc_auc_score(y, scores)
print(area_uncer_curve)
plt.plot(false_positive_rate, true_positive_rate)
plt.xlabel('1 - specificity / false allarm rate')
plt.ylabel('sensitivity / recall')
plt.show()
Precision Recall curve
import numpy as np
from sklearn import metrics
from matplotlib import pyplot as plt
y = np.random.randint(0, 2, 100) #the actual labels 0/1 of data
scores = np.random.rand(100) #the predicted score/probability of data
precision, recall, thresholds = metrics.precision_recall_curve(y, scores)
plt.plot(recall, precision)
plt.xlabel('precision')
plt.ylabel('recall')
plt.show()
R2 score
import numpy as np
from sklearn import metrics
from matplotlib import pyplot as plt
y = np.random.rand(100) #the actual y values
scores = np.random.rand(100) #the predicted y values
#when score = 1 the model capture the data variations well
#when score = 0, the model doesn't capture the data variations well
#when score is negative, the model is even worse than random guess
r2 = metrics.r2_score(y, scores)
print(r2)