nums = [872, 432, 397, 427, 388, 782, 397]
def mean(x):
return sum(x) / len(x)
print("Mean: ", mean(nums))
Mean: 527.8571428571429
from math import * # a library that provides math functions
nums = [872, 432, 397, 427, 388, 782, 397]
def median(x):
if (len(x) % 2 == 1):
return sorted(x)[int(floor(len(x)/2))]
else:
a = int(floor(len(x)/2-1))
b = int(floor(len(x)/2))
return (sorted[a] + sorted[b]) / 2
print("Median: ", median(nums))
Median: 427
nums = [872, 432, 397, 427, 388, 782, 397]
def mode(l): # assuming l is non empty
d = {}
for x in l:
if (x in d):
d[x] +=1
else:
d[x] = 1
print(d)
#print(d.items())
sorted_pairs = sorted(d.items(), key=lambda p:-p[1]) #-p[1] to sort in descending order
#print(sorted_pairs)
#print(sorted_pairs[0])
(_mode,count) = sorted_pairs[0] # (_mode,count) is key & value (Dictionary)
return (_mode if count != 1 else None)
print("Mode: ", mode(nums))
{872: 1, 432: 1, 397: 2, 427: 1, 388: 1, 782: 1}
Mode: 397
nums = [872, 432, 397, 427, 388, 782, 397]
def variance(l): # assuming l is non empty
m = mean(l)
diffsqsum = sum(map(lambda x: (x - m) ** 2, l)) # map is the loop
return diffsqsum / len(l)
print("Variance: ",variance(nums))
Variance: 36598.69387755102
nums = [872, 432, 397, 427, 388, 782, 397]
def std(l): # assuming l is non empty
return sqrt(variance(l))
print("Standard deviation: ",std(nums))
Standard deviation: 191.30785106093012
import pandas as pd
dataVal = [(10,20,30,40,50,60,70),
(10,10,40,40,50,60,70),
(10,20,30,50,50,60,80)]
dataFrame = pd.DataFrame(data=dataVal)
skewValue = dataFrame.skew(axis=1)
print("DataFrame:")
print(dataFrame)
print("Skew:")
print(skewValue)
DataFrame:
0 1 2 3 4 5 6
0 10 20 30 40 50 60 70
1 10 10 40 40 50 60 70
2 10 20 30 50 50 60 80
Skew:
0 0.000000
1 -0.340998
2 0.121467
dtype: float64
import pandas as pd
import numpy as np
dataMatrix = [(65,75,74,73,95,76,62,100),
(101,102,103,107,157,160,191,192)]
print("Data:")
dataFrame = pd.DataFrame(data=dataMatrix)
print(dataFrame)
print("Kurtosis:")
kurt = dataFrame.kurt(axis=1)
print(kurt)
Data:
0 1 2 3 4 5 6 7
0 65 75 74 73 95 76 62 100
1 101 102 103 107 157 160 191 192
Kurtosis:
0 -0.246357
1 -2.044655
dtype: float64
import pandas as pd
import numpy as np
dataMatrix2 = [(70,90,90,100,120,120,100,121,125,115,112),
(58.22,39.33,-30.44,36.77,20.80,-73.95,-39.99,91.03,-138.01,-20,None)];
print("Data2: ");
dataFrame2 = pd.DataFrame(data=dataMatrix2)
print(dataFrame2);
print("Kurtosis 2:");
kurt2 = dataFrame2.kurt(axis=1)
print(kurt2);
Data2:
0 1 2 3 4 ... 6 7 8 9 10
0 70.00 90.00 90.00 100.00 120.0 ... 100.00 121.00 125.00 115 112.0
1 58.22 39.33 -30.44 36.77 20.8 ... -39.99 91.03 -138.01 -20 NaN
[2 rows x 11 columns]
Kurtosis 2:
0 0.057451
1 0.067184
dtype: float64
import pandas as pd
df = pd.read_csv("p3_data2.tsv", "\t")
print(df.head())
s=df['Part-time Labour costs']
print("Type: ", type(s))
print("Mean: ", s.mean())
print("Median: ", s.median())
print("Mode: ", s.mode())
print("Min: ", s.min())
print("Max: ",s.max())
ID Advertising costs Part-time Labour costs
0 1 362438 8763
1 2 51725 9258
2 3 236472 9897
3 4 232080 5005
4 5 262733 9918
Type: <class 'pandas.core.series.Series'>
Mean: 7520.068181818182
Median: 7419.0
Mode: 0 8763
dtype: int64
Min: 5005
Max: 9996