#code starts here
import numpy as np
import pandas as pd
import matplotlib . pyplot as plt
datapandas=pd.read_table('recoilenergydata_EP219.csv', delimiter=',')
datanumpy=datapandas.values
##Extracting the data into a pandas file and eventually a numpy array.
EnergiesExtracted=datanumpy[0:40,0]
NumEventsExtracted=datanumpy[0:40,1]
NumEventsB=np.zeros(40)
##This array will store the expected values of the number of events in the various energy bins
##assuming only background processes.
plot=plt.bar(EnergiesExtracted, NumEventsExtracted,align='center',color="purple" , width=1, edgecolor='black')
plt.title('Obsereved Frequency Histogram for Recoil Energies')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
##Plotting the data given in the csv file.
EnergiesCal=np.zeros(41)
i=1
while(i<41):
EnergiesCal[i]=EnergiesExtracted[i-1]+0.5
i=i+1
EnergiesExp=np.exp(-1*(1/10)*EnergiesCal)
##Integration will be required to find the number of events falling in the various energy bins
##The limits of the integration will be integer energies as the bins are centred around half integers
##For simplifying computation, we therefore create a new array storing the integer values from 0 to 40(the relevant limits).
i=0
while(i<40):
NumEventsB[i]=10000*1.0171*(EnergiesExp[i]-EnergiesExp[i+1])
i=i+1
##We must first normalise the background function as there are 10171 events (observed), while integrating the background function
##over all positive energies gives us a total of 10000 events. This is the origin of the 1.0171 in the numerator of the expression
##That a simple normalisation can indeed provide the correct probability distribution function for 10171 nuclei is simply an assumption .
plt.bar(EnergiesExtracted, NumEventsB,align='center' ,color="indigo", width=1, edgecolor='black')
plt.title('Frequency Histogram for Recoil Energies assuming only background processes')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
##Plotting the histogram assuming only background processes.
NumEventsDark=np.zeros((105,40))
##This array will store the expected values of the number of events in the various energy bins
##assuming that scattering occurs only due to dark matter.
NumEvents=np.zeros((105,40))
##This array will store the expected values of the number of events in the various energy bins
##assuming scattering due to both dark matter and background processes.
Sigma=np.zeros(5)
ProbEvents=np.zeros((105,40))
ExpectedEvents=np.zeros((105,40))
Sigma[0]=0.01
Sigma[1]=0.1
Sigma[2]=1
Sigma[3]=10
Sigma[4]=100
##Sigma stores the parameter values for which computations must be performed and histograms must be created
j=0
while(j<5):
i=5
while(i<15):
NumEventsDark[j][i]=10*Sigma[j]*(((EnergiesCal[i+1]-5)**2) - (EnergiesCal[i]-5)**2)
i=i+1
while(i<25):
NumEventsDark[j][i]=(10*Sigma[j]*(((25-EnergiesCal[i])**2)-((25-EnergiesCal[i+1])**2)))
i=i+1
##The formula used stems from the integrating the expression given in the problem statement
i=0
while(i<40):
NumEvents[j][i]=NumEventsDark[j][i]+NumEventsB[i]
##Scattering due to both dark matter and background processes is the sum of scattering due to
##background processes alone and scattering due to dark matter alone
ProbEvents[j][i]=(NumEvents[j][i])/((2000*Sigma[j])+10000)
##There is a normalisation factor in the denominator for reasons similar to those mentioned
##in the calculation of Number of events assuming only background processes. 2000sigma +10000
##is the number of events obtained by integrating the two expressions (Dark matter and Scattering)
##given in the problem statement.
ExpectedEvents[j][i]=ProbEvents[j][i]*10171
##The expected value is of course the probability times the total number of events. These two steps
##were combined into a single statement in the calculation of NumEventsB previously
i=i+1
j=j+1
plt.bar(EnergiesExtracted, ExpectedEvents[0],align='center' ,color="blue", width=1, edgecolor='black')
##Now we plot thee 5 required histograms
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = 0.01')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
plt.bar(EnergiesExtracted, ExpectedEvents[1],align='center' ,color="green", width=1, edgecolor='black')
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = 0.1')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
mybar=plt.bar(EnergiesExtracted, ExpectedEvents[2],align='center' , color="yellow",width=1, edgecolor='black')
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = 1.0')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
mybar=plt.bar(EnergiesExtracted, ExpectedEvents[3],align='center' , width=1,color="orange", edgecolor='black')
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = 10')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
mybar=plt.bar(EnergiesExtracted, ExpectedEvents[4],align='center' , width=1,color="red", edgecolor='black')
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = 100')
plt.xlabel('Recoil Energy (in KeV)')
plt.ylabel('Number of Events')
plt.show()
j=0
SigmaCom=np.zeros(100)
loglike=np.zeros(100)
##To plot the graph of the log likelihood fuunction, we simply compute its value for a large
##dataset and then use python to extrapolate the set of points to get a graph
##The absicca and ordinates of the datasets form 2 arrays: SigmaCom and loglike.
while(j<100):
logtemp=0
SigmaCom[j]=j
i=5
while(i<15):
NumEventsDark[j+5][i]=10*SigmaCom[j]*(((EnergiesCal[i+1]-5)**2) - (EnergiesCal[i]-5)**2)
i=i+1
while(i<25):
NumEventsDark[j+5][i]=(10*SigmaCom[j]*(((25-EnergiesCal[i])**2)-((25-EnergiesCal[i+1])**2)))
i=i+1
##This is the exact same procedure that was followed while making histograms for the specified
##sigma values. This section of the code has been isolated from the previous section for clarity .
i=0
while(i<40):
NumEvents[j+5][i]=NumEventsDark[j+5][i]+NumEventsB[i]
ProbEvents[j+5][i]=(NumEvents[j+5][i])/((2000*SigmaCom[j])+10000)
ExpectedEvents[j+5][i]=ProbEvents[j+5][i]*10171
i=i+1
k=0
while(k<40):
logtemp=(NumEventsExtracted[k])*(np.log(ProbEvents[j+5][k]))
loglike[j]=loglike[j]+logtemp
k=k+1
j=j+1
plt.plot(SigmaCom,loglike)
plt.title("Log likelihood as a function of cross section with range 0 to 100 fb")
plt.xlabel("Cross section in fb")
plt.ylabel("Log likelihood")
plt.show()
##This graph shows variation of the log likelihood over a large range of sigma (1,100)
##However, no clear maxima can be observed here. Hence, we confine sigma to a smaller range
##via the creation of the following plot.
j=0
SigmaCom=np.zeros(100)
loglike=np.zeros(100)
while(j<100):
logtemp=0
SigmaCom[j]=j/200
##The highly shrunken range of sigma.
i=5
while(i<15):
NumEventsDark[j+5][i]=10*SigmaCom[j]*(((EnergiesCal[i+1]-5)**2) - (EnergiesCal[i]-5)**2)
i=i+1
while(i<25):
NumEventsDark[j+5][i]=(10*SigmaCom[j]*(((25-EnergiesCal[i])**2)-((25-EnergiesCal[i+1])**2)))
i=i+1
i=0
while(i<40):
NumEvents[j+5][i]=NumEventsDark[j+5][i]+NumEventsB[i]
ProbEvents[j+5][i]=(NumEvents[j+5][i])/((2000*SigmaCom[j])+10000)
ExpectedEvents[j+5][i]=ProbEvents[j+5][i]*10171
i=i+1
k=0
while(k<40):
logtemp=(NumEventsExtracted[k])*(np.log(ProbEvents[j+5][k]))
loglike[j]=loglike[j]+logtemp
k=k+1
j=j+1
plt.plot(SigmaCom,loglike)
plt.title("Log likelihood as a function of cross section with range 0 to 0.5 fb")
plt.xlabel("Cross section in fb")
plt.ylabel("Log likelihood")
plt.show()
plt.bar(EnergiesExtracted, NumEvents[36], width=1,color="yellow" ,edgecolor="red" )
##We now plot the histogram with the parameter value set to the MLE.
plt.title('Frequency Histogram for Recoil Energies with Background Processes and Sigma = MLE')
plt.xlabel("Recoil energies(in KeV)")
plt.ylabel("Number of Events")
plt.show()
print("The value of Log likelihood at j = 47 is", loglike[47])
print("The value of Log likelihood at j = 26 is", loglike[26])
onesigma=(47-26)/400
print("The 1-Sigma interval obtained is thus" , onesigma)
##j values of 47 and 26 are the values of j that (when inputted), best approximate loglikelihood max-1
##The one sigma interval that is consistent with the data is then easily computed from the formula for j
logmax=loglike.argmax()
print("The Maximum value of Log likelihood is at sigma =",SigmaCom[logmax], " and the maximum value is ", loglike[36])
##This gives the maximum likelihood estimate.
##End of code