Week 3

Code

#code begins here

#importing relevent libraries

import numpy as np

import pandas as pd

import matplotlib . pyplot as plt

#reading the given unemployment file into a panda dataframe

datapandas=pd.read_table('unemploymentrate.csv', delimiter=',')

#creating a numpy array using the above panda dataframe

datanumpy=datapandas.values

#extracting the rows that contain the relevent data of UP

DataUnemprate=datanumpy[0:36,4]

#reading the given crimerate file into a panda dataframe

datapandas=pd.read_table('crimerate.csv', delimiter=',')

#creating a numpy array using the above panda dataframe

datanumpy=datapandas.values

#Extracting the rows that contain the relevent data of crimerate.

#Comparing the crime rate and unemployment file, we see that Delhi is in different positions

#and that the positions of Uttarakhand and Uttar Pradesh are interchanged.

#We hence modify the array storing crime rates to match the array storing unemployment rates

DataCrimeTillChatt=datanumpy[0:5,9]

DataCrimeStateDelhi=datanumpy[34:35,9]

DataCrimeTillTripura=datanumpy[5:26,9]

DataCrimeUttarakhand=datanumpy[27:28,9]

DataCrimeUttarpradesh=datanumpy[26:27,9]

DataCrimeWestbengal=datanumpy[28:29,9]

DataCrimeUT1=datanumpy[30:34,9]

DataCrimeUT2=datanumpy[35:37,9]

DataCrimerate= np.concatenate((DataCrimeTillChatt,DataCrimeStateDelhi,DataCrimeTillTripura,DataCrimeUttarakhand,DataCrimeUttarpradesh,DataCrimeWestbengal,DataCrimeUT1,DataCrimeUT2))

#the discrepencies have now been removed

#N is the total number of states and UT combined(36)

N=36

#1.Calculating the sample means for unemployment and crime rates

i=0

SumOFunemp=0

SumOFcrime=0

while(i<N):

   SumOFunemp=SumOFunemp+(DataUnemprate[i])

   SumOFcrime=SumOFcrime+(DataCrimerate[i])

   i=i+1

meanOFunemp=SumOFunemp/N

print("mean of unemployment rate is ",meanOFunemp)

meanOFcrime=SumOFcrime/N

print("mean of crime rate is ",meanOFcrime)

#2.Next, we find the variance and then the standard deviations

sumU=0

sumC=0

i=0

#entering a while loop to store the relevant data

while(i<N):

    sumU=sumU+((DataUnemprate[i]-meanOFunemp)**(2))

    sumC=sumC+((DataCrimerate[i]-meanOFcrime)**(2))

    i=i+1

#dividing the final sum of squares by N-1 to get the variance of the sample

UnempVariance=(sumU/((N-1)))

CrimeVariance=(sumC/((N-1)))

#taking the square-root of the variance of the sample to get the standard deviation of sample

unempStandardD=UnempVariance**(0.5)

crimeStandardD=CrimeVariance**(0.5)

print("standard deviation of unemployment is", unempStandardD)

print("standard deviation of crime rate is",crimeStandardD)

#3.Estimating Covariance using the given formula

i=0

Sumcov=0

while(i<N):

    Sumcov=Sumcov+((DataCrimerate[i]-meanOFcrime)*(DataUnemprate[i]-meanOFunemp))

    i=i+1

cov=Sumcov/(N-1)

print (cov)

#4.calculating the correlation coefficient(CorrelCoeff) using the given formula

den=((crimeStandardD*unempStandardD))**(0.5)

CorrelCoeff=(cov)**(0.5)/den

print("Correlation coefficient between unemployment rate and crime rate is",CorrelCoeff)

#The direct approach of plotting histograms of the arrays DataCrimerate and DataUnemprate fails

#This is possibly due to the non specification of variable type early on in the array intialisation.

#To plot the histograms, we transfer the data from our old arrays to two new arrays of datatype float

unemprateperstate=np.zeros(36,dtype=float)

crimerateperstate=np.zeros(36,dtype=float)

i=0

#assigning the values as per the data

while(i<36):

    unemprateperstate[i]=DataUnemprate[i]

    crimerateperstate[i]=DataCrimerate[i]

    i=i+1

#5.Plotting histogram of unemployment rate,taking 12 bins of size 1 units,

plt.hist(unemprateperstate,bins=[0,1,2,3,4,5,6,7,8,9,10,11,12],rwidth=0.95,color="orange")

#assigning a title to the plot

plt.title('Number of states vs Unemployment rate')

#labelling the axes

plt.xlabel('Unemployment rate')

plt.ylabel('Number of states')

#marking the estimated mean and standard deviation of unemployment on the histogram

plt.axvline(meanOFunemp, color="indigo", linestyle='dashed', linewidth=2)

plt.text(meanOFunemp+0.02, 10, 'Mean of unemployment rate', color='indigo')

plt.axhline(unempStandardD, color="blue", linestyle='dashed', linewidth=2)

plt.text(unempStandardD+9.9, 3, 'Standard Deviation of unemployment rate', color='blue')

plt.show()

#6.Plotting the histogram of unemployment rate,taking 10 bins of size 100 units,

plt.hist(crimerateperstate,bins=[0,100,200,300,400,500,600,700,800,900,1000],rwidth=0.95,color="green")

plt.title('Number of states vs Crime rate')

plt.xlabel('Crime rate (per 100,000)')

plt.ylabel('Number of states')

#marking the estimated mean and standard deviation of unemployment on the histogram

plt.axvline(meanOFcrime, color="indigo", linestyle='dashed', linewidth=2)

plt.text(meanOFcrime+0.02, 10, 'Mean of crimerate', color='indigo')

#marking standard deviation in vertical direction(in x-axis) due to its high value compared to no of states

plt.axvline(crimeStandardD, color="blue", linestyle='dashed', linewidth=2)

plt.text(crimeStandardD+0.02, 16, 'Standard Deviation of crimerate', color='blue')

plt.show()

#7.Making the Scatterplot

x = DataUnemprate

y = DataCrimerate

#labelling the axes

plt.xlabel('Unemployment-rate')

plt.ylabel('Crime-rate (per 100,000)')

plt.title('Scatter Plot of unemployment rate and crime rate')

plt.scatter(x, y, alpha=0.5,color='blue')

plt.show()

#8.plotting the 2D histogram with crimerate on Y-axis and unemployment on X-axis

plt.hist2d(unemprateperstate,crimerateperstate, bins=(30,30), cmap=plt.cm.Reds)

plt.xlabel("Unemployment rate")

plt.ylabel("Crime Rate (per 100,000)")

plt.title("2D Histogram showing the unemployment and crime rates")

plt.show()

#End of code

Google Sites

Report abuse

Week 3

*Code*

Code