Plots and data analysis

Histogram of dihedral angles

From a file dihedral angle (from MD simulation), plot a histogram of the data with a set number of bins. Also allows fitting of normal distribution.

#!/usr/bin/env python3import osimport sysimport scipy.stats as statsimport matplotlib.pyplot as pltimport numpy as npnoplot="unset"filename=sys.argv[1]dihedrals=[]xvals=[]with open(filename) as file: for line in file: dihedrals.append(float(line.split()[0]))with open("times") as file2: for line in file2: xvals.append(float(line.split()[0]))dihedrals_sorted=sorted(dihedrals)#fit = stats.norm.pdf(dihedrals_sorted, np.mean(dihedrals_sorted), np.std(dihedrals_sorted))#plt.plot(dihedrals_sorted,fit,'-o')plt.hist(dihedrals_sorted,100, normed=True, facecolor='g', alpha=0.75)plt.show()

Plotting energy/gradient profile of single-point energy jobs

From a collection of single-point ORCA outputfiles in directory (that are alpha/numerically ordered) plot the energy profile.

If gradient is present (i.e. an Engrad job) the gradient can be plotted as well and shown on plot with shared x-axis.

#!/usr/bin/env python3import subprocessimport matplotlib.pyplot as pltimport oshartokcal=627.5096080305927dir="."#Getting all ORCA outputfiles in dirfilelist=[]for file in sorted(os.listdir(dir)): if file.endswith(".out") and file!="template.out": filelist.append(file)#Getting all energiesxlabels=[]energies=[]rmsgrad=[]for outfile in filelist: proc = subprocess.Popen(['grep', "FINAL SINGLE POINT", outfile],stdout=subprocess.PIPE) proc2 = subprocess.Popen(['grep', "RMS gradient", outfile],stdout=subprocess.PIPE) for line in proc.stdout.readlines(): string=line.decode("utf-8").strip().split() en=float(string[4]) energies.append(en) xlabels.append(outfile[:-4][7:]) for line in proc2.stdout.readlines(): string2=line.decode("utf-8").strip().split() grad=float(string2[3]) rmsgrad.append(grad)#Rel energiesrefenergy=float(min(energies))rele=[]for numb in energies: rele.append((numb-refenergy)*hartokcal)#Plotting#Relative energy#plt.figure(1)f, figs = plt.subplots(2, sharex=True)figs[0].set_title('Energy and gradient of ponts')figs[0].scatter(list(range(1,len(rele)+1)), rele, color='blue', marker = 'o', s=40, linewidth=2,label="energy" )figs[0].plot(list(range(1,len(rele)+1)), rele, linestyle='-', color='blue', linewidth=1,label="energy")figs[0].set_ylabel('Energy')#RMS gradient#plt.figure(2)figs[1].scatter(list(range(1,len(rmsgrad)+1)), rmsgrad, color='red', marker = 'o', s=40, linewidth=2, label="grad" )figs[1].plot(list(range(1,len(rmsgrad)+1)), rmsgrad, linestyle='-', color='red', linewidth=1, label="grad")figs[1].set_ylabel('RMS grad')plt.xlabel('Step')plt.show()

Plotting multiple absorption spectra from ORCA orca_mapspc .dat files in dir


- Human sorting of filenames

- Automatic filename-based legends (removes suffix)

- Easy modification of colors.

- Option to shift spectra by a constant energy.

- Option to set y-value 0 if larger than some value. Useful for XAS spectra if one is not interested in the usually-wrong-looking rising edges.

- Removes scientific notation in x-axis.

#!/usr/bin/env python3import reimport osimport sysimport matplotlib.pyplot as plt# Manual optional shift parametershiftsetting=Trueshift=-465#Nullsetting. Sets y-value to 0 if larger then nullvaluenullsetting=Truenullvalue=11940# Human sorting of list#http://stackoverflow.com/questions/4836710/does-python-have-a-built-in-function-for-string-natural-sort#http://www.davekoelle.com/alphanum.htmldef natural_sort(l): convert = lambda text: int(text) if text.isdigit() else text.lower() alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] return sorted(l, key = alphanum_key)dir="."datlist=[]stklist=[]#Getting all dat files in dirfor file in sorted(os.listdir(dir)): if file.endswith(".dat"): datlist.append(file) if file.endswith(".stk"): stklist.append(file)#################### Matplotlib part###################colors=['black', 'green', 'blue', 'orange', 'orange', 'indigo', 'lavender', 'hotpink', 'khaki', 'saddlebrown', 'magenta', 'limegreen', 'maroon', 'olive', 'orchid', 'navy', 'purple', 'rosybrown', 'silver']#Human sorting of listdatlist=natural_sort(datlist)for datfile, color in zip(datlist,colors): dat_e=[] dat_totintens=[] with open(datfile) as sdatfile: label=datfile.split(".")[0] for line in sdatfile: if shiftsetting==True: shiftedvalue=float(line.split()[0])+shift dat_e.append(shiftedvalue) else: dat_e.append(float(line.split()[0])) if nullsetting==True and shiftedvalue>nullvalue: dat_totintens.append(None) else: dat_totintens.append(float(line.split()[1])) plt.plot(dat_e, dat_totintens, linewidth=2, label=label,color=color)#rangesplt.xlim(11915,11955)#Axes labelsplt.xlabel('Energy (eV)')plt.ylabel('Calculated intensity')#Remove offset and scientific notation of x-axisplt.ticklabel_format(useOffset=False, style='plain')#Legendplt.legend(shadow=True, fontsize='small')#Saving figureplt.savefig('Plot-XAS-Au-all.png', dpi=200)plt.show()

Matplotlib Script to read simulation trajectory data from file and plot (to be refined)

Usage:

./plot_traj.py simulation.prot -temp/-epot/-ekin/-etot

where simulation.prot contains tabulated data:

#Step time[ps] E_pot[au] E_kin[au] E_tot[au] T[K] F_c[au] Fric 1 0.001000 120856.223512 6.129615 120862.353127 264.48 0.0000e+00 0.000e+00 2 0.002000 120855.634232 6.319493 120861.953725 272.68 0.0000e+00 0.000e+00 3 0.003000 120855.382219 6.599810 120861.982029 284.77 0.0000e+00 0.000e+00 4 0.004000 120855.232159 6.746517 120861.978677 291.10 0.0000e+00 0.000e+00 5 0.005000 120855.150638 6.836964 120861.987602 295.00 0.0000e+00 0.000e+00 6 0.006000 120855.060382 6.927220 120861.987601 298.90 0.0000e+00 0.000e+00 7 0.007000 120854.958470 7.028841 120861.987311 303.28 0.0000e+00 0.000e+00
#!/usr/bin/env python3import osimport sysimport matplotlib.pyplot as pltnoplot="unset"filename=sys.argv[1]time_ps=[]temp=[]e_tot=[]e_kin=[]e_pot=[]with open(filename) as file: next(file) for line in file: time_ps.append(float(line.split()[1])) temp.append(float(line.split()[5])) e_pot.append(float(line.split()[2])) e_kin.append(float(line.split()[3])) e_tot.append(float(line.split()[4]))#################### Matplotlib part####################Axes labelsplt.xlabel('Simulation time')plt.ylabel('Temp (K)')#X-axis limits#plt.xlim([1,4.5])if sys.argv[2] == "-temp": plt.plot(time_ps, temp, linestyle='-', linewidth=1) plt.ylabel('Temp (K)')elif sys.argv[2] == "-etot": plt.plot(time_ps, e_tot, linestyle='-', linewidth=1) plt.ylabel('E_tot (au)')elif sys.argv[2] == "-epot": plt.plot(time_ps, e_pot, linestyle='-', linewidth=1) plt.ylabel('E_pot (au)')elif sys.argv[2] == "-ekin": plt.plot(time_ps, e_kin, linestyle='-', linewidth=1) plt.ylabel('E_kin (au)')else: noplot="yes" print("Do ./plot_traj filename -ekin/-etot/-epot/-temp")if noplot!="yes": #Legend #plt.legend(shadow=True, fontsize='small') #Saving figure #plt.savefig(simpname+"-Final-States"+'.png', dpi=200) plt.show()

Simple scatterplot with connecting lines and removed borders

Molecules rendered in VMD with Povray. Assembled using Keynote.

import matplotlib.pyplot as plt#http://stackoverflow.com/questions/20130227/matplotlib-connect-scatterplot-points-with-line-python# Data. Fake x-values and sets of y-dataxval=[1,2,3,4,5,6]sch_e =[-0.147, -0.539, -0.616, -0.6295, -0.581, -0.79]ch_e =[0.126, 0.372, 0.44, 0.483, 0.497, -0.03]#Necessary for getting rid of borders I thinkfig, (ax0) = plt.subplots(1)#First defining scatterplot (points), then lines between thempointsize=40plt.scatter(xval, sch_e, color='blue', marker = 'o', s=pointsize, linewidth=2 )plt.scatter(xval, ch_e, color='red', marker = 's', s=pointsize, linewidth=2)plt.plot(xval, sch_e, linestyle='--', color='blue', linewidth=1)plt.plot(xval, ch_e,linestyle='--', color='red', linewidth=1)# Removing borders and ticksax0.spines['right'].set_visible(False)ax0.spines['top'].set_visible(False)ax0.spines['bottom'].set_visible(False)ax0.yaxis.set_ticks_position('left')# Ticks and limitsplt.xticks([])plt.xlim([0,6.5])plt.ylim([-0.9,0.6])plt.savefig('PLOT.png', dpi=150)plt.show()

Simple vertical energy plot

import matplotlib.pyplot as pltpointsize=4000y_1 =[-120, -100,-50, -30, -19, -14, -10, -6]plt.scatter([1]*len(y_1), y_1, color='blue', marker = '_', s=pointsize, linewidth=2 )plt.xticks([])plt.ylim([-125,0])plt.savefig('PLOT.png', dpi=150)plt.show()

Matplotlib energy diagram.

Modified version of energy_plot by Felix Plasser: http://homepage.univie.ac.at/felix.plasser/chemprogs/python.htm

Alternative energy diagram:

On GitHub

Matplotlib scatterplot with fixed x-axis:

import numpy as npimport matplotlib.pyplot as plt#Parametersnumval = 7alphapar=1.0pointsize=40#Experimental valuesx_EXP =[0.26, 0.17, 0.26, 0.24, 0.32, 0.32, 0.15]x_pos1_fullcal=[0.168941575, 0.179361448, 0.20929276, 0.236673033, 0.175314036, 0.234101261, 0.251987502]x_neg1_fullcal=[0.109209414, 0.134456062, 0.164712074, 0.19479343, 0.140459364, 0.181745533, 0.205329896]x_neg3_fullcal=[-0.001517625, 0.061410167, 0.122576919, 0.153397527, 0.042930299, 0.136413475, 0.161294682]#############################Plottingplt.scatter(x_pos1_fullcal, [1] * numval, alpha=alphapar, color='red',s=pointsize)plt.scatter(x_neg1_fullcal, [2] * numval, alpha=alphapar, color='green',s=pointsize)plt.scatter(x_neg3_fullcal, [3] * numval, alpha=alphapar, color='blue',s=pointsize, marker=">")plt.scatter(x_EXP, [0] * numval, alpha=alphapar, color='black', s=pointsize)plt.savefig('PLOT.png', dpi=200)plt.show()

Matplotlib contour plot for 2D PES plot

(x and y values are the values of the reaction coordinates or constraints in constrained optimizations). Energy is the color dimension.

File "files" here contains list of files (one per line) with the name of the x and y coordinates, e.g. 100_-100.mpi12.out (100 and -100)

File "energies" here contains list of energies (one per line) in hartrees.

import numpy as npimport matplotlib.pyplot as pltfrom numpy import *import pylabhartoeV=27.211399#List of energies and relenergies heree=[]filename="energies"with open(filename) as file: for line in file: if line and (not line.isspace()): num=line.split()[0] e.append(float(num))refenergy=float(min(e))rele=[]for numb in e: rele.append((numb-refenergy)*hartoeV)# X and Y coordinatesfx=[]fy=[]with open("files") as zfile: for line in zfile: bz=line.split(sep="_") fx.append(int(bz[0])) fy.append(int(bz[1].split(sep=".")[0]))#Creating structured arrayst=[]for ind in range(0,len(fx)): st.append((fx[ind], fy[ind], rele[ind])) #Adding mirrored version unless abs(x) and abs(y) are the same (diagonal). if abs(fx[ind]) != abs(fy[ind]): st.append((-fx[ind], -fy[ind], rele[ind]))x = np.array([70,80,90,100,110,120,130,140,150,160,170,175])y = np.array([-70,-80,-90,-100,-110,-120,-130,-140,-150,-160,-170,-175])rele_new=[]curr=[]for xitem in x: for yitem in y: for nz in range(0,len(st)): if xitem in st[nz] and yitem in st[nz]: curr.append(st[nz][2]) energy=st[nz][2] rele_new.append(curr) curr=[]#Now we create contour plotX, Y = np.meshgrid(x, y)cp=plt.contourf(X, Y, rele_new, 50, alpha=.75, cmap='jet')C = plt.contour(X, Y, rele_new, 50, colors='black', linewidth=.5)plt.colorbar(cp)plt.xlabel('Dihedral (°)')plt.ylabel('Dihedral (°)')pylab.savefig('EOM-IP-CC-root1.png', format='png', dpi=200)plt.show()

Matplotlib bar graph with scatter data

By Bardi Benediktsson.

Nice way to plot two different kind of data. In this case, we are interested in number of unbound electrons and RMSD as a function of a functional.

import numpy as npimport matplotlib.pyplot as plt################################################ Change the look of the figure ################################################filename="MyPLot" # The name of your plotplt.rc('font', family='serif') # Change the font to something more fancyBarYAxisLabel="Unbound \u03B1 and \u03B2 electrons" # The desired label of the BarsBarColor="grey" # Color of the barsBarEdges="black" # Color of the edges of the bars. Leave blank if none are desiredLowerBarLimit=0.0HigherBarLimit=50.0width = 0.35 # Width of the barsLineYAxisLabel="RMSD [Å]"LineYAxisColor="red" # Color of the second y-axis AND the color of the line/scatterScatterEdges="black" #Color around the circles on the scatterplot. Leave blank if undesiredLowerLineLimit= 0HigerLineLimit= 0.30# Define the values and the x-axis labels we want to useAxis=("BP86 (0%)", "TPSSh (10%)", "B3LYP (20%)", "BHLYP (50%)", "BHLYP* (70%)", "\u03C9B97M", "\u03C9B97M* \n \u03B1=0.05 \n \u03BC=0.20")RMSD=[0.0877382486668597, 0.07995171219679, 0.147178950989864, 0.224075991543893, 0.244376953701958, 0.160843279635689, 0.106640114352514]UnboundElectrons=[46, 29, 21, 2, 0, 0, 0]########################################################## What is below should not be needed to be touched ###########################################################Defining parameters for plottingN = len(Axis) # Number of functionalsprint(N)ind = np.arange(N) # Enumerate the number of functionalsprint(ind)fig, ax = plt.subplots()ax.tick_params(axis="y",direction="in")# If you want to include the ticks for the x-axis, uncomment the following line#ax.tick_params(axis="x",bottom=True, direction="in")ax.tick_params(axis="x",bottom=False) # If we do not want ticks at the bottomax.spines['top'].set_visible(False) # To remove the top line so the graph/line plot doesn't look like it is in a box plt.ylim(LowerBarLimit, HigherBarLimit) # The y-limit axisplt.bar(ind, UnboundElectrons, width, color=BarColor, edgecolor=BarEdges) # Plotting the barsplt.ylabel(BarYAxisLabel) # Labelling the y-axisplt.xticks(ind, Axis, rotation=45) # Changing the values on the x-axis to axes2 = plt.twinx() # Defining the second axisaxes2.plot(ind, RMSD, color=LineYAxisColor) # Defining the second y-axis and changing the color to redaxes2.set_ylim(LowerLineLimit, HigerLineLimit) # Defining the limit of the second y-axisaxes2.set_ylabel(LineYAxisLabel) # Setting the label of the second y-axisaxes2.tick_params(axis="y",direction="in", color=LineYAxisColor) # Making the ticks point invardsaxes2.scatter(ind, RMSD, s=20, color=LineYAxisColor, marker="o", edgecolors=ScatterEdges) # Introduce points to the line as a scatter plotaxes2.yaxis.label.set_color(LineYAxisColor) # Label to red coloraxes2.spines["right"].set_color(LineYAxisColor) # Changing the color of the right y-axis to redaxes2.tick_params(axis='y', colors=LineYAxisColor) # Changing the ticks to red color axes2.spines['top'].set_visible(False) #Hiding the top part of the boxplt.tight_layout() # Make it all fit nicely fig1=plt.gcf()fig.savefig(filename+".svg", format='svg', frameon=None)plt.show()


Matplotlib bar graph with multiple bars with both negative and positive y-values.

By Bardi Benediktsson.

Multiple bars for each functional to depict the deviation of calculated values to the crystal structure. The way that the x-axis is made is rather dumb as I couldn't find information of how to easily implement at the 0.0 Å. The line is composed of very small and wide bars, which makes it look like a line. This can definitely be improved or performed in a better way.

import numpy as npimport matplotlib.pyplot as plt################################################ Change the look of the figure ################################################plt.rc('font', family='serif') # Change the font Filename="MyBarPlot"#Change the colors of each columncol1=("#F2C078")col2=("#E28413")col3=("#DE3C4B")col4=("#C42847")col5=("#A31621") # Data inputAxis=("BP86", "TPSS", "TPSSh", "B3LYP", "PBE0", "BHLYP", "M06-2X", "\u03C9B97M-D3BJ", "B3LYP \n cluster model")FeFe=[-0.06654, -0.06843, -0.00582, 0.07187, 0.06783, 0.22118, 0.24781, 0.10108, 0.16684]MoFe=[-0.05270, -0.06267, -0.02453, 0.07650, 0.07381, 0.32504, 0.32481, 0.25785, 0.32434]FeC=[-0.04787, -0.04560, -0.00284, 0.04231, 0.04032, 0.14382, 0.16431, 0.07049, 0.08005]FeS=[-0.03960, -0.02956, 0.00585, 0.04432, 0.03812, 0.09893, 0.12447, 0.07285, 0.11086]MoS=[-0.01484, -0.01292, -0.01532, 0.00491, -0.00765, 0.01336, 0.01899, -0.00134, 0.05664]#Labelslabel1="Fe-Fe"label2="Mo-Fe"label3="Fe-C"label4="Fe-S"label5="Mo-S"Ylabel="Mean deviation [Å]" # Axes labelsxticksfinetuning=-15 # Move the text that labels each columnLineWidth=0.001 #Change the width of the baselinebarwidth=0.15 # width of the bars# Limitsxminlim=-0.55xmaxlim=8.55yminlim=-0.10ymaxlim=0.40############################################################### What is below does not need to be changed (generally) ################################################################ So the tickmarks point inwardsfig, ax = plt.subplots()ax.tick_params(axis="y",direction="in")ax.tick_params(axis="x",direction="in",color="white")ax.spines['top'].set_visible(False)ax.spines['right'].set_visible(False)ax.spines['bottom'].set_visible(False)Wxaxis = LineWidthnegWxaxis = -LineWidth# Rather stupid, but to inroduce a fake x-axis, I make bunch of bars with height of Wxaxisfakepos=[Wxaxis, Wxaxis, Wxaxis, Wxaxis, Wxaxis, Wxaxis, Wxaxis, Wxaxis, Wxaxis]fakeneg=[-Wxaxis, -Wxaxis, -Wxaxis, -Wxaxis, -Wxaxis, -Wxaxis, -Wxaxis, -Wxaxis,- Wxaxis]print(fakepos)print(fakeneg)print(Axis)N = len(Axis) # Number of functionalsind = np.arange(N) # Enumerate the number of functionalswidth = barwidth # Width of the barsplt.bar(ind-width*2, FeFe, width, label=label1, color=col1, edgecolor="black")plt.bar(ind-width*1, MoFe, width, label=label2, color=col2, edgecolor="black")plt.bar(ind+width*0, FeC, width, label=label3, color=col3, edgecolor="black")plt.bar(ind+width*1, FeS, width, label=label4, color=col4, edgecolor="black")plt.bar(ind+width*2, MoS, width, label=label5, color=col5, edgecolor="black")plt.bar(ind, fakepos, width*9, color="black", edgecolor="black", linewidth=barwidth*2)plt.bar(ind, fakeneg, width*9, color="black", edgecolor="black", linewidth=barwidth*2)plt.xlim([xminlim,xmaxlim])plt.ylim([yminlim,ymaxlim])plt.xticks(ind, Axis, rotation=45)plt.ylabel(Ylabel)plt.legend(loc="best", edgecolor="black")plt.tight_layout()# Move the labels closer to the barsax.tick_params(axis="x",direction="in", pad=xticksfinetuning)fig1=plt.gcf()fig.savefig(Filename+".svg", format='svg', frameon=None)plt.show()