import seaborn as sns
Configuration
plt.figure(figsize=(width,height))
Bar Graph
sns.barplot(data=df,
x='column_name',
y='column_name',
hue = 'groupby'
order=a_list,
);
plt.xlabel("xlabel");
plt.ylabel("ylabel");
plt.title('graph title');
Boxplot
sns.boxplot(data=df,
x='xcol',
y='ycol',
order=a_list,
showfliers=False #outliers
);
plt.title('graph title');
Histogram
sns.histplot(data=df['column_name'],
binrange=(1,100),
binwidth=5,
color = '#FF00FF',
);
plt.title('graph title');
Heatmap
g=sns.histplot(df, cmap='Blues')
colorbar=g.collections[0].colorbar
colorbat.set_ticks([0,1,2,3])
colorbat.set_ticklabels(['bad','ok','good','excellent'])
plt.show()
Pairplot (Scatterplot Matrix)
sns.pairplot(df)
OLS (Ordinary Least Squares) Functions
from statsmodels.formula.api import ols
OLS = ols(formula = 'y_col ~ x_col', data = df)
OR
OLS = ols(formula = 'y_col ~ x_col + x1_col + C(x2_col)', data = df)
model = OLS.fit()
model.summary()
Regression Line
sns.regplot(x = "x_col", y = "y_col", data = df)
X = df['x_col']
Y_hat = model.predict(X)
residuals = model.resid
sns.scatterplot(x = model.fittedvalues, y = model.resid)
Q-Q Plot (Normality Check)
import matplotlib.pyplot as plt
import statsmodels.api as sm
sm.qqplot(residuals, line = 's')
plt.show()
Homoscedasticity Check
g = sns.scatterplot(x=Y_hat, y=residuals)
g.set_xlabel("Y_hat")
g.set_ylabel("Residuals")
g.axhline(0)
plt.show()
Correlation Heatmap
sns.heatmap(df.corr(method='pearson'),
vmin=-1,
vmax=1,
annot=True,
cmap='coolwarm');