How to display a nicely ordered crosstab in Pandas

Post date: Jun 18, 2015 9:18:43 PM

This notebook shows how to output pivot table such that the number intervals of row/columns are properly sorted.

In [14]:

import pandas as pd import numpy as np

In [85]:

df = pd.DataFrame(data={'x':np.random.randint(0,10000,1000), 'y':np.random.randint(0,1000,1000), 'z':[random.choice(['a','b','c']) for i in range(1000)]})

In [86]:

mybin_1 = [0, 10, 20, 50, 1e+2, 2e+2, 5e+2, 1e+3, 2e+3, 5e+3, 1e+4, 2e+4, 5e+4] mybin_2 = [0, 10, 20, 1e+2, 2e+2, 1e+3, 1e+4, 5e+4]

In [87]:

df['x_bin'] = pd.cut(x=df.x, bins=mybin_1) df['y_bin'] = pd.cut(x=df.y, bins=mybin_2)

In [88]:

df[:10]

Out[88]:

This is how you normally use crosstab, the row and column order is based on string-order.

In [89]:

pd.crosstab(df.x_bin, df.y_bin)

Out[89]:

This is how to make the order looks good.

In [122]:

def relabel(myitem, right_most='inf'): """ This function re-label the row/column of a data frame. Especially, the one that comes from pd.cut. [0, 1, 2, 3] --> ['[0,1)', '[1,2)', '[2,3)', '[3,right_most)'] """ new_item = ['']*len(myitem) for i in np.arange(len(myitem)-1): new_item[i] = "[%d,%d)" % (myitem[i], myitem[i+1]) i = len(myitem)-1 new_item[i] = "[%d,%s)" % (myitem[i], right_most) return new_item def crosstab2(X1, X2, mybin1=None, mybin2=None): """ Produce pivolt table similar to crosstab except that the order of row or column is by the interval. X1 and X2: TimeSeries. mybin1 and mybin2: list of binning boundary. kittipat@ June 18, 2015 """ if mybin1: bin1 = pd.cut(x=X1, bins=mybin1, labels=[s for s in mybin1[:-1]]) else: bin1 = X1 if mybin2: bin2 = pd.cut(x=X2, bins=mybin2, labels=[s for s in mybin2[:-1]]) else: bin2 = X2 df_pivot = pd.crosstab(bin1, bin2) if mybin1: df_pivot.index = relabel(df_pivot.index, str(mybin1[-1])) if mybin2: df_pivot.columns = relabel(df_pivot.columns, str(mybin2[-1])) return df_pivot

In [124]:

crosstab2(df.x, df.y, mybin_1, mybin_2)

Out[124]:

In [126]:

crosstab2(df.x, df.z, mybin_1)

Out[126]:

In [127]:

crosstab2(df.z, df.x, [], mybin_1)

Out[127]:

In [ ]: