import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set (style="white", color_codes=True)
%matplotlib inline
salaries = pd.read_csv("./Salaries.csv")
salaries.info()
salaries = pd.read_csv("./Salaries.csv")
salaries.info()
for col in ['BasePay', 'OvertimePay', 'OtherPay', 'Benefits']:
salaries[col] = pd.to_numeric(salaries[col], errors='coerce')
pay_columns = salaries.columns[3:salaries.columns.get_loc('Year')]
pay_columns
pays_arrangement = list(zip(*(iter(pay_columns),) * 3))
fig, axes = plt.subplots(2,3)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
# pass in axes to pandas hist
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
# axis objects have a lot of methods for customizing the look of a
plt
axes[i,j].set_title(pays_arrangement[i][j])
plt.show()
fig, axes = plt.subplots(2,3)
fig.set_figheight(5)
fig.set_figwidth(12)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
# pass in axes to pandas hist
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
axes[i,j].set_title(pays_arrangement[i][j])
# add a row of emptiness between the two r
ows
plt.subplots_adjust(hspace=1)
# add a row of emptiness between the cols
plt.subplots_adjust(wspace=1)
plt.show()
fig, axes = plt.subplots(2,3)
fig.set_figheight(5)
fig.set_figwidth(12)
for i in range(len(pays_arrangement)):
for j in range(len(pays_arrangement[i])):
salaries[pays_arrangement[i][j]].hist(ax=axes[i,j])
axes[i,j].set_title(pays_arrangement[i][j])
# set xticks with these labels,
axes[i,j].set_xticklabels(labels=axes[i,j].get_xticks(),rotation=30)
# with this rotation
plt.subplots_adjust(hspace=1)
plt.subplots_adjust(wspace=1)
plt.show()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set (style="white", color_codes=True)
%matplotlib inline
df = pd.read_csv('iris.csv')
df.head()
df.info()
df['species'].value_counts()
df['sepal_length'].hist()
colors = ['red', 'green', 'blue']
species =['setosa','versicolor','virginica']
for i in range(3):
x = df[df['species']== species[i]]
plt.scatter(x['sepal_length'],x['sepal_width'], c = colors[i], label = species[i])
plt.xlabel("sepal_length")
plt.ylabel("sepal_width")
plt.legend()
sns.jointplot(x="sepal_length", y="sepal_width", data=df, height=5)
ax = sns.boxplot(x="species", y="petal_length", data=df)
ax = sns.boxplot(x="species", y="petal_length", data=df)
ax = sns.stripplot(x="species", y="petal_length", data=df, jitter=True,edgecolor="gray")
sns.violinplot(x="species", y="petal_length", data=df, height=6)
sns.FacetGrid(df, hue="species", height=6) \
.map(sns.kdeplot, "petal_length") \
.add_legend()
pd.plotting.andrews_curves(df, 'species');
from pandas.plotting import parallel_coordinates
parallel_coordinates(df, "species")
from pandas.plotting import radviz
radviz(df, "species")