How to display a nicely ordered crosstab in Pandas
Post date: Jun 18, 2015 9:18:43 PM
This notebook shows how to output pivot table such that the number intervals of row/columns are properly sorted.
In [14]:
import pandas as pd import numpy as np
In [85]:
df = pd.DataFrame(data={'x':np.random.randint(0,10000,1000), 'y':np.random.randint(0,1000,1000), 'z':[random.choice(['a','b','c']) for i in range(1000)]})
In [86]:
mybin_1 = [0, 10, 20, 50, 1e+2, 2e+2, 5e+2, 1e+3, 2e+3, 5e+3, 1e+4, 2e+4, 5e+4] mybin_2 = [0, 10, 20, 1e+2, 2e+2, 1e+3, 1e+4, 5e+4]
In [87]:
df['x_bin'] = pd.cut(x=df.x, bins=mybin_1) df['y_bin'] = pd.cut(x=df.y, bins=mybin_2)
In [88]:
df[:10]
Out[88]:
This is how you normally use crosstab, the row and column order is based on string-order.
In [89]:
pd.crosstab(df.x_bin, df.y_bin)
Out[89]:
This is how to make the order looks good.
In [122]:
def
relabel(myitem,
right_most='inf'):
"""
This function re-label the row/column of a data frame.
Especially, the one that comes from pd.cut.
[0, 1, 2, 3] --> ['[0,1)', '[1,2)', '[2,3)', '[3,right_most)']
"""
new_item
=
['']*len(myitem)
for
i
in
np.arange(len(myitem)-1):
new_item[i]
=
"[%d,%d)"
%
(myitem[i],
myitem[i+1])
i
=
len(myitem)-1
new_item[i]
=
"[%d,%s)"
%
(myitem[i],
right_most)
return
new_item
def
crosstab2(X1,
X2,
mybin1=None,
mybin2=None):
"""
Produce pivolt table similar to crosstab except that the order of row or column is by the interval.
X1 and X2: TimeSeries.
mybin1 and mybin2: list of binning boundary.
kittipat@
June 18, 2015
"""
if
mybin1:
bin1
=
pd.cut(x=X1,
bins=mybin1,
labels=[s
for
s
in
mybin1[:-1]])
else:
bin1
=
X1
if
mybin2:
bin2
=
pd.cut(x=X2,
bins=mybin2,
labels=[s
for
s
in
mybin2[:-1]])
else:
bin2
=
X2
df_pivot
=
pd.crosstab(bin1,
bin2)
if
mybin1:
df_pivot.index
=
relabel(df_pivot.index,
str(mybin1[-1]))
if
mybin2:
df_pivot.columns
=
relabel(df_pivot.columns,
str(mybin2[-1]))
return
df_pivot
In [124]:
crosstab2(df.x,
df.y,
mybin_1,
mybin_2)
Out[124]:
In [126]:
crosstab2(df.x,
df.z,
mybin_1)
Out[126]:
In [127]:
crosstab2(df.z,
df.x,
[],
mybin_1)
Out[127]:
In [ ]: