For Doctoral College - Stats examples for all languages/tools
%% MATLAB
clear all;
clc;
%% get the data from the file
dta = csvread('d:courseData.csv');
y1 = dta(:,1);
y2 = dta(:,2);
% divide in to histogram bins
% and plot
figure(1)
yh1 = histcounts(dta(:,1),20);
yh2 = histcounts(dta(:,2),20);
plot([yh1' yh2'])
% use cdfplot to visualise better
figure(2)
cdfplot(y1');
hold on;
cdfplot(y2')
%%
% subtract the mean and divide by the SD
% is it normal?
mn1 = mean(y1);
st1 = std(y1);
yn1 = (y1-mn1)/st1;
H_0Reject1 = kstest(yn1);
% subtract the mean and divide by the SD
% is it normal?
mn2 = mean(y2);
st2 = std(y2);
yn2 = (y2-mn2)/st2;
H_0Reject2 = kstest(yn2);
% using the second column with the same mean
% and SD - is it the same distribution
yn3 = (y2-mn1)/st1;
H_0Reject3 = kstest(yn3);
# -*- coding: utf-8 -*-
# PYTHON
# libraries we might need
import csv
import numpy as np
import matplotlib.pyplot as plt
# clear the console
print("\033[H\033[J")
# open the csv file
file=csv.reader(open('d:courseData.csv','r'))
# set up variables
y1=[]
y2=[]
# read from file
for row in file:
y1.append(row[0])
y2.append(row[1])
# convert to list to array
y1_values=np.array(y1)
y2_values=np.array(y2)
# convert from elements to floats
y1_floats=y1_values.astype(np.float64)
y2_floats=y2_values.astype(np.float64)
# results
y1_mean=np.mean(y1_floats)
y2_mean=np.mean(y2_floats)
y1_std=np.std(y1_floats)
y2_std=np.std(y2_floats)
ecdf = sm.distributions.ECDF(y1_floats)
fig, ax2 = plt.subplots()
y1_bins = np.histogram(y1_floats, bins=10)
y2_bins = np.histogram(y2_floats, bins=10)
ax2.plot(y1_bins[0])
ax2.plot(y2_bins[0])
ax.set_xlabel('bins')
ax.set_title('Have a good look at it')
plt.show()
# R
# this clears the console
cat("\014")
# open and read the csv file
dta1 <- read.csv("d:\\sts\\courseData.csv")
y1_dat <- dta1[[1]]
y2_dat <- dta1[[2]]
y1_dat_mean = mean(y1_dat)
y2_dat_mean = mean(y2_dat)
y1_dat_std = sd(y1_dat)
y2_dat_std = sd(y2_dat)
cdf_y1 <- ecdf(y1_dat)
cdf_y2 <- ecdf(y2_dat)
# draw the cdf plot
plot(cdf_y1, verticals = TRUE, do.points = FALSE)
# draw the histogram
hist(y1_dat)
# do the ks test
op <- ks.test(y1_dat,pnorm)