Prev: Plotting a Series/DataFrame object | Next: -
import numpy as np
import pandas as pd
data = pd.read_excel('../../data/researchdata.xlsx', sheetname="groupby")
data.head(5)
grby_obj = data.groupby('Gender')
grby_obj
# Change the slicing below to see more of the object attributes
print(dir(grby_obj)[:20])
len(grby_obj)
grby_obj.ngroups
print(grby_obj.groups.keys(),'\n')
print(grby_obj.groups.values(),'\n')
print(grby_obj.groups['b'])
grby_obj.describe()
grby_obj.size()
grby_obj.count()
grby_obj.nth(2)
for gp_name, gp in grby_obj:
print(gp_name)
print(gp.head(3),'\n',type(gp),'\n')
bdf = grby_obj.get_group('b')
print(bdf.head(),'\n')
print(bdf.Performance.head(),'\n')
print(bdf.Performance.mean())
import numpy as np
print(grby_obj.agg(np.mean))
print(grby_obj.agg([np.mean, np.std]))
import numpy as np
# ssd() computes the sum of squared deviate from the group mean
def ssd(x):
mn = x.mean()
sm = pow(x-mn,2).sum()
return sm
# We apply ssd() only on nnumerical data of the 'Performance' column
print(grby_obj.agg({'Performance':ssd}))
grby_obj2 = data.groupby(['Gender','Level'])
grby_obj2.describe()
grby_obj2.get_group(('b','K6'))
. Free learning material
. See full copyright and disclaimer notice