from scipy import statsimport matplotlib.pyplot as plt# Read in the brain_size.csv file and display a histogram graphdata = pd.read_csv('brain_size.csv', sep=';', na_values=".")print(data.head())data['VIQ'].hist()plt.show()Unnamed: 0 Gender FSIQ VIQ PIQ Weight Height MRI_Count
0 1 Female 133 132 124 118.0 64.5 816932
1 2 Male 140 150 124 NaN 72.5 1001121
2 3 Male 139 123 150 143.0 73.3 1038437
3 4 Male 133 129 128 172.0 68.8 965353
4 5 Female 137 132 134 147.0 65.0 951545
# A two tail one sample t test on the VIQ with value 110print(stats.ttest_1samp(data['VIQ'], 110))# p-value = 0.533 which is more than (0.05/2 = 0.025), accept null hypothesisTtest_1sampResult(statistic=0.6293461053092635, pvalue=0.5327920500038907)
# A two tail one sample t test on the VIQ with value 100print(stats.ttest_1samp(data['VIQ'], 100))# p-value = 0.002 which is less than (0.05/2 = 0.025), reject null hypothesisTtest_1sampResult(statistic=3.3074146385401786, pvalue=0.002030117404781822)
# One-tailed one sample t test, less than 130m = 130results = stats.ttest_1samp(data['VIQ'], m)print(results)alpha = 0.05if (results[0] < 0) & (results[1]/2 < alpha): print("Reject null hypothesis, mean is less than {}".format(m))else: print("Qccept null hypothesis")Ttest_1sampResult(statistic=-4.726790961152567, pvalue=2.9541289074597695e-05)
Reject null hypothesis, mean is less than 130
# one-tailed one sample t test, greater than 80m = 80results = stats.ttest_1samp(data['VIQ'], m)print(results)alpha = 0.05if (results[0] > 0) & (results[1]/2 < alpha): print("reject null hypothesis, mean is greater than {}".format(m))else: print("accept null hypothesis")Ttest_1sampResult(statistic=8.663551705002009, pvalue=1.2618678412297256e-10)
reject null hypothesis, mean is greater than 80
import scipy.stats as s # provides some additional predefined statistics functions.import pandas as pdimport numpy as npfrom math import *from functools import reducedf = pd.read_csv('p4_data1.tsv','\t')print(df.head())print(df.describe())Consumer Before After
0 1 3.15 3.42
1 2 2.33 2.70
2 3 2.95 3.20
3 4 2.15 2.50
4 5 3.33 3.40
Consumer Before After
count 12.000000 12.000000 12.000000
mean 6.500000 2.752500 2.930833
std 3.605551 0.643529 0.559114
min 1.000000 1.950000 2.100000
25% 3.750000 2.150000 2.475000
50% 6.500000 2.665000 2.950000
75% 9.250000 3.245000 3.405000
max 12.000000 3.950000 3.800000
# Two-Sampled T-testresults = s.ttest_ind(df['Before'], df['After'])print(results)# pvalue = 0.48 which is greater than 0.05, accept null hypothesisalpha = 0.05if (results[0] > 0) & (results[1] < alpha/2): print("reject null hypothesis")else: print("accept null hypothesis")Ttest_indResult(statistic=-0.7246599369194594, pvalue=0.47629775671675556)
accept null hypothesis
import pandas as pdfrom scipy import statsfrom statsmodels.stats import weightstats as stestsdf = pd.read_csv('blood_pressure.csv')print(df[['bp_before','bp_after']].describe())print(df.head(5))bp_before bp_after
count 120.000000 120.000000
mean 156.450000 151.358333
std 11.389845 14.177622
min 138.000000 125.000000
25% 147.000000 140.750000
50% 154.500000 149.500000
75% 164.000000 161.000000
max 185.000000 185.000000
patient sex agegrp bp_before bp_after
0 1 Male 30-45 143 153
1 2 Male 30-45 163 170
2 3 Male 30-45 153 168
3 4 Male 30-45 153 142
4 5 Male 30-45 146 141
# Pair Sampled t testresults = stats.ttest_rel(df['bp_before'], df['bp_after']) #Calculate t-test on two related samplesprint(results)# pvalue= 0.0011 which is less than 0.05, reject null hypothesisalpha = 0.05if (results[0] > 0) & (results[1] < alpha/2): print("reject null hypothesis")else: print("accept null hypothesis")Ttest_relResult(statistic=3.3371870510833657, pvalue=0.0011297914644840823)
reject null hypothesis
import pandas as pdfrom scipy import statsfrom statsmodels.stats import weightstats as stestsdf = pd.read_csv('blood_pressure.csv')print(df.head())print(df[['bp_before', 'bp_after']].describe())patient sex agegrp bp_before bp_after
0 1 Male 30-45 143 153
1 2 Male 30-45 163 170
2 3 Male 30-45 153 168
3 4 Male 30-45 153 142
4 5 Male 30-45 146 141
bp_before bp_after
count 120.000000 120.000000
mean 156.450000 151.358333
std 11.389845 14.177622
min 138.000000 125.000000
25% 147.000000 140.750000
50% 154.500000 149.500000
75% 164.000000 161.000000
max 185.000000 185.000000
ztest, pval1 = stests.ztest(df['bp_before'], x2=df['bp_after'], value=0, alternative='two-sided')print(float(pval1))# # pvalue = 0.0022 which is lesser than 0.05, reject null hypothesis0.002162306611369422
import scipy.stats as s # provides some additional predefined statistics functions.import pandas as pdimport numpy as npfrom math import *from functools import reducedf = pd.read_csv('p4_data1.tsv','\t')print(df.head())Consumer Before After
0 1 3.15 3.42
1 2 2.33 2.70
2 3 2.95 3.20
3 4 2.15 2.50
4 5 3.33 3.40
var_before = df['Before'].var()var_after = df['After'].var()print("Var Before: ", var_before)print("Var After: ", var_after)f = var_before / var_after if (var_before > var_after) else (var_after / var_before)print(f)Var Before: 0.41412954545454556
Var After: 0.31260833333333327
1.3247552969516028
p_value_var_test = s.f.sf(f, df['Before'].count() -1 , df['After'].count() - 1) # same as 1 - cdf (Cumulative distribution function)print(p_value_var_test)# pvalue = 0.32 which is greater than 0.05, accept null hypothesis0.32449424960699924