##code begins here
##importing relevant libraries
import numpy as np
import pandas as pd
import matplotlib . pyplot as plt
##creating a numpy array and reading the xls file into this array
##first we read the data file into a pandas dataframe and then create a numpy array using
##this dataframe
datapandas=pd.read_table('assign1.csv', delimiter=',')
datanumpy=datapandas.values
##We next extract the columns of the array which give us the number of male teachers and
##number of female teachers per pre-primary school
dataMteachers=datanumpy[:,13]
dataFteachers=datanumpy[:,14]
dataNschools=datanumpy[:,12]
##getting the total number of teachers per pre-primary school
dataTteachers=np.add(dataMteachers,dataFteachers)
##appending this into the old array to get the modified array
datafinal=np.c_[datanumpy,dataTteachers]
##Since we must plot histograms for rural,urban and total separately, we create
##3 arrays to carry out this task
datarural=np.zeros(35,dtype=float)
dataurban=np.zeros(35,dtype=float)
datatotal=np.zeros(35,dtype=float)
##and now we use a while loop to add required elements to these arrays
##(notice that the total number of teachers across India, which is not needed for
##the histograms is omitted by carefully selecting the size of these new arrays )
i=0
while (i<35):
if(dataNschools[3*i]!=0):
datarural[i]=dataTteachers[3*i]/dataNschools[3*i]
##some states have a value of 0 for dataNschools, hence the if condition
else:
datarural[i]=0
if(dataNschools[(3*i)+1]!=0):
dataurban[i]=dataTteachers[(3*i)+1]/dataNschools[(3*i)+1]
else:
datarural[i]=0
if(dataNschools[(3*i)+2]!=0):
datatotal[i]=dataTteachers[(3*i)+2]/dataNschools[(3*i)+2]
else:
datarural[i]=0
i=i+1
##before plotting the histograms, we must first select a good range.
## We set the range from the arrays minimum to its maximum .
##We now plot histograms for all three arrays. First, for rural
plt.hist(datarural,bins=[0,1,2,3,4,5,6],range=(min(datarural),max(datarural)),rwidth=0.95)
plt.title('Distribution of Pre-primary teachers in India (in rural areas) across states' )
plt.xlabel('number of teachers per school')
plt.ylabel('number of states')
plt.show()
##now urban
plt.hist(dataurban,bins=[0,1,2,3,4,5,6,7,8,9,10,11,12],range=(min(dataurban),max(dataurban)), color="green",rwidth=0.95)
plt.title('Distribution of Pre-primary teachers in India (in urban areas) across states' )
plt.xlabel('number of teachers per school')
plt.ylabel('number of states')
##and finally total
plt.show()
plt.hist(datatotal,bins=[0,1,2,3,4,5,6],range=(min(datatotal),max(datatotal)), color="purple",rwidth=0.95)
plt.title('Distribution of Pre-primary teachers in India (total) across states' )
plt.xlabel('number of teachers per school')
plt.ylabel('number of states')
plt.show()
##end of code