For Doctoral College - Stats examples for all languages/tools

%% MATLAB
clear all;
clc;

%% get the data from the file
dta = csvread('d:courseData.csv');
y1 = dta(:,1);
y2 = dta(:,2);

% divide in to histogram bins
% and plot
figure(1)
yh1 = histcounts(dta(:,1),20);
yh2 = histcounts(dta(:,2),20);
plot([yh1' yh2'])

% use cdfplot to visualise better
figure(2)
cdfplot(y1');
hold on;
cdfplot(y2')

%%
% subtract the mean and divide by the SD
% is it normal?
mn1 = mean(y1);
st1 = std(y1);
yn1 = (y1-mn1)/st1;
H_0Reject1 = kstest(yn1);

% subtract the mean and divide by the SD
% is it normal?
mn2 = mean(y2);
st2 = std(y2);
yn2 = (y2-mn2)/st2;
H_0Reject2 = kstest(yn2);

%  using the second column with the same mean
%  and SD - is it the same distribution
yn3 = (y2-mn1)/st1;
H_0Reject3 = kstest(yn3);

# -*- coding: utf-8 -*-

# PYTHON


# libraries we might need

import csv

import numpy as np

import matplotlib.pyplot as plt


# clear the console

print("\033[H\033[J") 


# open the csv file

file=csv.reader(open('d:courseData.csv','r'))


# set up variables

y1=[]

y2=[]


# read from file 

for row in file:

    y1.append(row[0])

    y2.append(row[1])

    

#  convert to list to array    

y1_values=np.array(y1)

y2_values=np.array(y2)


# convert from  elements to floats

y1_floats=y1_values.astype(np.float64)

y2_floats=y2_values.astype(np.float64)


# results

y1_mean=np.mean(y1_floats)

y2_mean=np.mean(y2_floats)


y1_std=np.std(y1_floats)

y2_std=np.std(y2_floats)


ecdf = sm.distributions.ECDF(y1_floats)


fig, ax2 = plt.subplots()

y1_bins = np.histogram(y1_floats, bins=10)

y2_bins = np.histogram(y2_floats, bins=10)

ax2.plot(y1_bins[0])

ax2.plot(y2_bins[0])

ax.set_xlabel('bins')

ax.set_title('Have a good look at it')

plt.show()


# R

#  this clears the console

cat("\014")  


# open and read the csv file

dta1 <- read.csv("d:\\sts\\courseData.csv")


y1_dat <- dta1[[1]]

y2_dat <- dta1[[2]]


y1_dat_mean = mean(y1_dat)

y2_dat_mean = mean(y2_dat)


y1_dat_std = sd(y1_dat)

y2_dat_std = sd(y2_dat)


cdf_y1 <- ecdf(y1_dat)

cdf_y2 <- ecdf(y2_dat)


# draw the cdf plot 

plot(cdf_y1, verticals = TRUE, do.points = FALSE)


# draw the histogram

hist(y1_dat)


# do the ks test

op <- ks.test(y1_dat,pnorm)