Example 1: Data Visualization Dataset :- Iris Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="white", color_codes=True)
%matplotlib inline
df = pd.read_csv('D:\\Movies\\Iris.csv')
df.head()
..........................................................................................................................
df.plot(kind="scatter", x="SepalLengthCm", y="SepalWidthCm")
..........................................................................................................................
sns.jointplot(x="SepalLengthCm", y="SepalWidthCm", data=df, size=5)
..........................................................................................................................
sns.FacetGrid(df, hue="Species", size=5) \
.map(plt.scatter, "SepalLengthCm", "SepalWidthCm") \
.add.legend()
..........................................................................................................................
sns.boxplot(x="Species", y="PetalLengthCm", data=df)
..........................................................................................................................
ax=sns.boxplot(x="Species", y="PetalLengthCm", data=df)
ax=sns.stripplot(x="Species", y="PetalLengthCm", data=df, jitter=True, edgecolor="gray")
..........................................................................................................................
sns.violinplot(x="Species", y="PetalLengthCm", data=df, size=6)
..........................................................................................................................
sns.violinplot(x="Species", y="PetalLengthCm", data=df, size=6)
..........................................................................................................................
sns.pairplot(df.drop("Id", axis=1), hue="Species", size=3)
..........................................................................................................................
sns.pairplot(df.drop("Id", axis=1), hue="Species", size=3,
diag_kind="kde")
..........................................................................................................................
df.drop("Id", axis=1) .boxplot(by="Species", figsize=(12,6))
..........................................................................................................................
from pandas.plotting import andrews_curves
andrews_curves(df.drop("Id", axis=1), "Species")
..........................................................................................................................
from pandas.plotting import parallel_coordinates
parallel_coordinates(df.drop("Id", axis=1), "Species")
..........................................................................................................................
from pandas.plotting import radviz
radviz(df.drop("Id", axis=1), "Species")
Example 2:- Data Visualization dataset: San Francisco Salaries
salaries = pd.read_csv(‘./Salaries.csv’)
salaries.info()
.....................................................................
for col in ['BasePay', 'OvertimePay', 'OtherPay', 'Benefits']:
salaries[col] = pd.to_numeric(salaries[col], errors='coerce')
pay_columns = salaries.columns[3:salaries.columns.get_loc('Year')]
pay_columns=Index(['BasePay, 'OvertimePay', 'OtherPay', 'Benefits', 'TotalPay",'TotalPayBenefits'],
pays_arrangement = list(zip(*(iter(pay_columns),) * 3))
fig, axes = plt.subplots(2,3)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
# pass in axes to pandas hist
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
# axis objects have a lot of methods for customizing the look of a
plot
axes[i,j].set_title(pays_arrangement[i][j])
plt.show()
.............................................................................................
fig, axes = plt.subplots(2,3)
# set the figure height
fig.set_figheight(5)
fig.set_figwidth(12)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
# pass in axes to pandas hist
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
axes[i,j].set_title(pays_arrangement[i][j])
# add a row of emptiness between the two rows
plt.subplots_adjust(hspace=1)
# add a row of emptiness between the cols
plt.subplots_adjust(wspace=1)
plt.show()
.............................................................................................
# and here is a cleaner version using tick rotation and plot spacing
fig, axes = plt.subplots(2,3)
# set the figure height
fig.set_figheight(5)
fig.set_figwidth(12)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
axes[i,j].set_title(pays_arrangement[i][j])
# set xticks with these labels,
axes[i,j].set_xticklabels(labels=axes[i,j].get_xticks(),
# with this rotation
rotation=30)
plt.subplots_adjust(hspace=1)
plt.subplots_adjust(wspace=1)
plt.show()
.............................................................................................