4. Logistic Regression

Logistic regression is used in classification problems
Logistic regression is for binary classification
Logistic regression outputs probabilities:
- If the probability ‘p’ is greater than 0.5: The data is labelled ‘1’
- If the probability ‘p’ is lesser than 0.5: The data is labelled ‘0’

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix, classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4, random_state=42) # Create training and test sets

logreg = LogisticRegression() # Create the classifier

logreg.fit(X_train, y_train) # Fit the classifier to the training data

y_pred = logreg.predict(X_test) # Predict the labels of the test set

print(confusion_matrix(y_test, y_pred)) # Compute and print the confusion matrix

print(classification_report(y_test, y_pred)) # Compute and print the classification report

from sklearn.metrics import roc_curve

y_pred_prob = logreg.predict_proba(X_test)[:,1] # Compute predicted probabilities

fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob) # Generate ROC curve values

# Plot ROC curve

plt.plot([0, 1], [0, 1], 'k--')

plt.plot(fpr, tpr)

plt.xlabel('False Positive Rate')

plt.ylabel('True Positive Rate')

plt.title('ROC Curve')

plt.show()

from sklearn.model_selection import cross_val_score

from sklearn.metrics import roc_auc_score

y_pred_prob = logreg.predict_proba(X_test)[:,1] # Compute predicted probabilities

print("AUC: {}".format(roc_auc_score(y_test, y_pred_prob))) # Compute and print AUC score

cv_auc = cross_val_score(logreg, X, y, cv=5, scoring='roc_auc') # Compute cross-validated AUC scores:

print("AUC scores computed using 5-fold cross-validation: {}".format(cv_auc)) # Print list of AUC scores

Google Sites

Report abuse