##code starts here
import numpy as np
import pandas as pd
datapandas=pd.read_table('recoilenergydata_EP219.csv', delimiter=',')
datanumpy=datapandas.values
##Extracting the data into a pandas file and eventually a numpy array.
EnergiesExtracted=datanumpy[0:40,0]
NumEventsExtracted=datanumpy[0:40,1]
##We plan to conduct 4000 pseudo-experiments and find the test statistic/log likelihood for each of the pseudo-experiments. The number of observed events is 10171. These two pieces of information explain the dimensions of the array below which generates numbers from the uniform distribution from 0 to 1.
Random=np.random.rand(4000,10171)
RandomExp=-10*(np.log(1-Random))
##This gives a new array where the elements are exponentially distributed. These follow the distribution assuming only background data and hence form the data sets for the pseudo-experiment.
print(Random)
print(RandomExp)
RandomExp=np.floor(RandomExp)
##This effectively 'bins' the data.
print(RandomExp)
RandomExp=RandomExp.astype(int)
##We convert the data elements of the array from float to int. This will be crucial later.
print(RandomExp)
loglike=np.zeros(4000)
##This will of course store the log likelihoods
EnergiesCal=np.zeros(51)
##Creating energy bins
i=0
while(i<51):
EnergiesCal[i]=i
i=i+1
EnergiesExp=np.exp(-1*(1/10)*EnergiesCal)
##For computation of probabilities.
NumEventsB=np.zeros(50)
i=0
while(i<50):
NumEventsB[i]=10000*(EnergiesExp[i]-EnergiesExp[i+1])
i=i+1
ProbExp=(NumEventsB)/10171
##This array stores the probability of landing in a given bin.
sumP=0
i=0
while(i<50):
sumP=sumP+ProbExp[i]
i=i+1
print (sumP)
##The variable ProbExtra stores the probability of getting an energy more than 50 KeV. Since the probability of getting higher energies is so slim, we can consider this to be a single bin with the probability of landing in this bin being 1-(the probability of not landing in the bin). That is the function of the above computation.
ProbExtra=1-sumP
i=0
while(i<4000):
j=0
k=0
Binstore=np.zeros(50)
Finalbin=0
while(j<10171):
if(RandomExp[i][j]<50):
temp=RandomExp[i][j]
Binstore[temp]=Binstore[temp]+1
##This statement counts the number of events falling in each energy bin (except the E > 50 KeV bin).
else:
Finalbin=Finalbin+1
##number of events in the E > 50 KeV bin.
j=j+1
print(Binstore)
while(k<50):
loglike[i]=loglike[i]+(Binstore[k]*(np.log(ProbExp[k])))
k=k+1
loglike[i]=loglike[i]+(Finalbin*np.log(ProbExtra))
i=i+1
##Calculating the log likelihood.
print(loglike)
TS=0
i=0
while(i<40):
TS=TS +((NumEventsExtracted[i]*(np.log(ProbExp[i]))))
i=i+1
##Calculating the measured test statistic.
print("TS is", TS)
j=0
count=0
while(j<4000):
if(loglike[j]>TS):
count=count+1
j=j+1
##The final portion of the code counts the number of pseudo-experiments that have a larger log likelihood than the observed test statistic. This will give a simple method to accurately determine the p value (because of the large number of pseudo-experiments).
print("count is ", count)
print ("The p value is ", count/4000)
##End of code