#code begins here
#importing relevent libraries
import numpy as np
import pandas as pd
import matplotlib . pyplot as plt
#reading the given xlsx file (after converting it into .csv type) into a panda dataframe
datapandas=pd.read_table('swachhbharat.csv', delimiter=',')
#creating a numpy array using the above panda dataframe
datanumpy=datapandas.values
#extracting the rows that contain the relevent data of UP
#which are from row number 5617 to 6437
#the reading in array starts from 0, hence we take out 1 from the actual row number,
#ie, (5617-1)=5616 and (6437-1)=6436
#in python, the code datanumpy[x:y,z] extracts the rows AFTER x and till y of column z
#hence, we subtract 1 from the the first row number to include x, which is (5617-1)-1 = 5615
dataTotalV=datanumpy[5615:6436,4]
#dataTotalV = data of total number of villages
dataTotalODF=datanumpy[5615:6436,5]
#dataTotalODF = data of total number of ODF villages
#defining an array to store the fraction of villages that are ODF per district
dataFractionVODF=np.zeros(821,dtype=float)
#the size of the array is equal to the number of districts in UP, i.e., 6437-5617+1=821
#running a while loop to store the relevant entries into the newly defined array
i=0
while(i<821):
dataFractionVODF[i]=dataTotalODF[i]/dataTotalV[i]
#each district has atleast one village, so the denominator is never equal to zero
i=i+1
#to calculate the mean of the sample
i=0
#initializing a function that stores the sum of all villages that are ODF, which is initially 0
sumODF=0
#entering a while loop to store the sum of all villages that are ODF in the sumODF function
while(i<821):
sumODF=sumODF+dataFractionVODF[i]
i=i+1
#dividing the final sum by the number of entities (districts) to get the mean of the sample
SampleMean=sumODF/821
#to calculate the variance of the sample
i=0
#initializing a function that stores the sum of the square of the difference between an entry
#and the mean of all entries for all different entries
sumV=0
#entering a while loop to store the relevant data
while(i<821):
sumV=sumV+((dataFractionVODF[i]-SampleMean)**(2))
#the syntax (x)**(y) for a function x and a constant y means the function x to the power y
i=i+1
#dividing the final sum of squares by total entries minus one, as given in the formula,
#ie 821-1=820 to get the variance of the sample
SampleVariance=(sumV/820)
#taking the square-root of the variance of the sample to get the standard deviation of sample
SampleStandardD=((sumV/820)**(0.5))
#plotting the histogram of number of districts vs fraction of villages that are ODF
#taking the color of bars as orange, and the width of each bar equal to 95% of the actual width
#so as to leave a gap of width that is 5% of the original width of bar to separate the bars
plt.hist(dataFractionVODF, rwidth=0.95, color="orange")
#giving the plot a title
plt.title('The fraction of villages that are declared ODF in each district in UP')
#labeling the x and the y axis individually
plt.xlabel('Fraction of villages that are ODF')
plt.ylabel('Number of Districts')
#drawing the line for 'mean of sample' (vertically) of indigo color and width = 2 units
plt.axvline(SampleMean, color="indigo", linestyle='dashed', linewidth=2)
#drawing the line of 'standard deviation of sample' (horizontally) of blue color and width = 2
plt.axhline(SampleStandardD, color="blue", linestyle='dashed', linewidth=2)
#in the syntax axhline and axvline, 'ax' represent 'Axis' and 'h' and 'v' just after 'ax'
#represent 'horizontal' and 'vertical' respectively
#labeling the 'mean of sample' line with appropriate name, coordinates and color
plt.text(SampleMean+0.02, 140, 'Mean of Sample', color='indigo')
#labeling the 'standard deviation of sample' line with appropriate name, coordinates and color
plt.text(SampleStandardD+0.79, -1, 'Standard Deviation of Sample', color='blue')
#printing the plot
plt.show()
#printing the values of the Mean, Variance and the Standard Deviation of the sample separately
print("The Sample Mean is", SampleMean)
print("The Sample Variance is", SampleVariance)
print("The Sample Standard Deviation is", SampleStandardD)
#code ends here