For Doctoral College - Stats examples for all languages/tools

%% MATLAB
clear all;
clc;

%% get the data from the file
dta = csvread('d:courseData.csv');
y1 = dta(:,1);
y2 = dta(:,2);

% divide in to histogram bins
% and plot
figure(1)
yh1 = histcounts(dta(:,1),20);
yh2 = histcounts(dta(:,2),20);
plot([yh1' yh2'])

% use cdfplot to visualise better
figure(2)
cdfplot(y1');
hold on;
cdfplot(y2')

%%
% subtract the mean and divide by the SD
% is it normal?
mn1 = mean(y1);
st1 = std(y1);
yn1 = (y1-mn1)/st1;
H_0Reject1 = kstest(yn1);

% subtract the mean and divide by the SD
% is it normal?
mn2 = mean(y2);
st2 = std(y2);
yn2 = (y2-mn2)/st2;
H_0Reject2 = kstest(yn2);

% using the second column with the same mean
% and SD - is it the same distribution
yn3 = (y2-mn1)/st1;
H_0Reject3 = kstest(yn3);

# -*- coding: utf-8 -*-

# PYTHON

# libraries we might need

import csv

import numpy as np

import matplotlib.pyplot as plt

# clear the console

print("\033[H\033[J")

# open the csv file

file=csv.reader(open('d:courseData.csv','r'))

# set up variables

y1=[]

y2=[]

# read from file

for row in file:

y1.append(row[0])

y2.append(row[1])

# convert to list to array

y1_values=np.array(y1)

y2_values=np.array(y2)

# convert from elements to floats

y1_floats=y1_values.astype(np.float64)

y2_floats=y2_values.astype(np.float64)

# results

y1_mean=np.mean(y1_floats)

y2_mean=np.mean(y2_floats)

y1_std=np.std(y1_floats)

y2_std=np.std(y2_floats)

ecdf = sm.distributions.ECDF(y1_floats)

fig, ax2 = plt.subplots()

y1_bins = np.histogram(y1_floats, bins=10)

y2_bins = np.histogram(y2_floats, bins=10)

ax2.plot(y1_bins[0])

ax2.plot(y2_bins[0])

ax.set_xlabel('bins')

ax.set_title('Have a good look at it')

plt.show()

# R

# this clears the console

cat("\014")

# open and read the csv file

dta1 <- read.csv("d:\\sts\\courseData.csv")

y1_dat <- dta1[[1]]

y2_dat <- dta1[[2]]

y1_dat_mean = mean(y1_dat)

y2_dat_mean = mean(y2_dat)

y1_dat_std = sd(y1_dat)

y2_dat_std = sd(y2_dat)

cdf_y1 <- ecdf(y1_dat)

cdf_y2 <- ecdf(y2_dat)

# draw the cdf plot

plot(cdf_y1, verticals = TRUE, do.points = FALSE)

# draw the histogram

hist(y1_dat)

# do the ks test

op <- ks.test(y1_dat,pnorm)

Google Sites

Report abuse