Talk:Mod:Hunt Research Group/extract optimized geom
Jump to navigation
Jump to search
Python Script to Extract the Optimized Geometry from a Gaussian/ORCA output file and write the coordinates to an XYZ or com file
- Copy the code into a script called "Extract_Optimized_Geometry_Gaussian+ORCA.py"
- To execute the script type "python3 Extract_Optimized_Geometry_Gaussian+ORCA.py file_name.log output_file.xyz xyz"
- It takes the name of output file (.xyz or .com) in command line second argument and the type of file (xyz or com) in the third argument.
- It will produce the required output file (here output_file.xyz)
- If you want to use the script for all the .log files in the present directory, you can use it with the command below:
for i in *.log; do base_name=$(basename "$i" .log); python3 Extract_Optimized_Geometry_Gaussian+ORCA.py "$i" "${base_name}.xyz" xyz; done
import sys # Import sys to write to output files
print("#----------------------------------------------------------------------------------------------#")
print("#----------------------------------------------------------------------------------------------#")
print("# Script to Extract the Optimized Geometry from a Gaussian or ORCA output File #")
print("#----------------------------------------------------------------------------------------------#")
print("#----------------------------------------------------------------------------------------------#")
# ASCII FONTS from: http://patorjk.com/software/taag/
# Font = "Big", "Ivrit"
def Stamp_Hashmi():
print('\n')
print(' _____ _ _ _')
print('| __ \ | | | | | |')
print('| | | | _____ _____| | ___ _ __ ___ __| | | |__ _ _')
print("| | | |/ _ \ \ / / _ \ |/ _ \| '_ \ / _ \/ _` | | '_ \| | | |")
print('| |__| | __/\ V / __/ | (_) | |_) | __/ (_| | | |_) | |_| |')
print('|_____/ \___| \_/ \___|_|\___/| .__/ \___|\__,_| |_.__/ \__, |')
print('| | | | | | | |(_) __/ |')
print('| |__| | __ _ ___| |__ _ __ _|_| _ |___/')
print("| __ |/ _` / __| '_ \| '_ ` _ \| |")
print('| | | | (_| \__ \ | | | | | | | | |')
print('|_| |_|\__,_|___/_| |_|_| |_| |_|_|')
print("Last Updated: August 08, 2024\n")
# A Dictionary to Convert Atomic Symbols to Atomic Numbers
SymbolToNumber = {
"H" :1, "He" :2, "Li" :3, "Be" :4, "B" :5, "C" :6, "N" :7, "O" :8, "F" :9,
"Ne" :10, "Na" :11, "Mg" :12, "Al" :13, "Si" :14, "P" :15, "S" :16, "Cl" :17,
"Ar" :18, "K" :19, "Ca" :20, "Sc" :21, "Ti" :22, "V" :23, "Cr" :24,
"Mn" :25, "Fe" :26, "Co" :27, "Ni" :28, "Cu" :29, "Zn" :30, "Ga" :31,
"Ge" :32, "As" :33, "Se" :34, "Br" :35, "Kr" :36, "Rb" :37, "Sr" :38,
"Y" :39, "Zr" :40, "Nb" :41, "Mo" :42, "Tc" :43, "Ru" :44, "Rh" :45,
"Pd" :46, "Ag" :47, "Cd" :48, "In" :49, "Sn" :50, "Sb" :51, "Te" :52,
"I" :53, "Xe" :54, "Cs" :55, "Ba" :56, "La" :57, "Ce" :58, "Pr" :59,
"Nd" :60, "Pm" :61, "Sm" :62, "Eu" :63, "Gd" :64, "Tb" :65, "Dy" :66,
"Ho" :67, "Er" :68, "Tm" :69, "Yb" :70, "Lu" :71, "Hf" :72, "Ta" :73,
"W" :74, "Re" :75, "Os" :76, "Ir" :77, "Pt" :78, "Au" :79, "Hg" :80,
"Tl" :81, "Pb" :82, "Bi" :83, "Po" :84, "At" :85, "Rn" :86, "Fr" :87,
"Ra" :88, "Ac" :89, "Th" :90, "Pa" :91, "U" :92, "Np" :93, "Pu" :94,
"Am" :95, "Cm" :96, "Bk" :97, "Cf" :98, "Es" :99, "Fm" :100, "Md" :101,
"No" :102, "Lr" :103, "Rf" :104, "Db" :105, "Sg" :106, "Bh" :107,
"Hs" :108, "Mt" :109, "Ds" :110, "Rg" :111, "Cn" :112, "Uut":113,
"Fl" :114, "Uup":115, "Lv" :116, "Uus":117, "Uuo":118}
# Invert the Above: Atomic Numbers to Atomic Symbols
NumberToSymbol = {v: k for k, v in SymbolToNumber.items()}
class ExtractCoords(object):
"""A Class for Extracting the Optimized Coordinates from a Gaussian or ORCA ouput File"""
def __init__(self, file):
self.file = file
def extractCoordinates(self, file):
f = open(file, 'r')
program = "N/A"
for line in f:
if line.find("Entering Gaussian System, Link 0=g16") != -1 or line.find("Copyright (c) 1988-2019, Gaussian, Inc. All Rights Reserved.") != -1:
print("Reading Gaussian output file: ", file, '\n')
program = "g16"
break
elif line.find("* O R C A *") != -1:
print("Reading ORCA output file: ", file, '\n')
program = "orca"
break
f.close()
geom = []
if program == "g16":
f = open(file, 'r')
for line in f:
if line.find("Standard orientation:") != -1:
del geom[:]
for i in range(0, 4):
readStructure = f.__next__()
while True:
readStructure = f.__next__()
if readStructure.find("-----------") == -1:
readStructure = readStructure.split()
geom.append(readStructure)
else:
break
for i in geom:
del i[0:3:2]
i[0] = NumberToSymbol[int(i[0])]
i[1] = float(i[1])
i[2] = float(i[2])
i[3] = float(i[3])
return geom
elif program == "orca":
f = open(file, 'r')
for line in f:
if line.find("CARTESIAN COORDINATES (ANGSTROEM)") != -1:
del geom[:]
readStructure = f.__next__()
while True:
readStructure = f.__next__()
if readStructure and readStructure.strip():
readStructure = readStructure.split()
geom.append(readStructure)
else:
break
for i in geom:
i[1] = float(i[1])
i[2] = float(i[2])
i[3] = float(i[3])
return geom
def extractEnergies(self, file):
f = open(file, 'r')
program = "N/A"
for line in f:
if line.find("Entering Gaussian System, Link 0=g16") != -1 or line.find(
"Copyright (c) 1988-2019, Gaussian, Inc. All Rights Reserved.") != -1:
program = "g16"
break
elif line.find("* O R C A *") != -1:
program = "orca"
break
f.close()
Charge = 0
Multiplicity = 1
SCF_Energy = "--SCF Energy NOT FOUND--"
Zero_point_energy = "--ZPE NOT FOUND--"
Gibbs_Energy = "--Delta-G NOT FOUND--"
Rxn_Enthalpy = "--Enthalpy NOT FOUND--"
SCF_E_Kcal = "--SCF_E_Kcal NOT FOUND--"
Gibbs_E_Kcal = "--Gibbs_E_Kcal NOT FOUND--"
Zero_point_energy_Kcal = "--ZPE_Kcal NOT FOUND--"
Enthalpy_of_Rxn_Kcal = "--Enthalpy_Kcal NOT FOUND--"
if program == "g16":
f = open(file, 'r')
input = f.readlines()
for line in input:
if "Charge =" in line:
complete_line = line.split()
Charge = int(complete_line[2])
Multiplicity = int(complete_line[5])
if "SCF Done:" in line:
complete_line = line.split()
SCF_Energy = float(complete_line[4])
SCF_E_Kcal = float(complete_line[4]) * 627.5095
if "Zero-point correction=" in line:
complete_line = line.split()
Zero_point_energy = float(complete_line[2])
Zero_point_energy_Kcal = float(complete_line[2]) * 627.5095
if "Sum of electronic and thermal Free Energies=" in line:
complete_line = line.split()
Gibbs_Energy = float(complete_line[7])
Gibbs_E_Kcal = float(complete_line[7]) * 627.5095
if "Sum of electronic and thermal Enthalpies=" in line:
complete_line = line.split()
Rxn_Enthalpy = float(complete_line[6])
Enthalpy_of_Rxn_Kcal = float(complete_line[6]) * 627.5095
return Charge, Multiplicity, SCF_Energy, SCF_E_Kcal, Zero_point_energy, Zero_point_energy_Kcal, Gibbs_Energy, Gibbs_E_Kcal, Rxn_Enthalpy, Enthalpy_of_Rxn_Kcal
elif program == "orca":
f = open(file, 'r')
input = f.readlines()
for line in input:
if "Total Charge" in line:
complete_line = line.split()
Charge = int(complete_line[3])
Multiplicity = int(complete_line[6])
if "FINAL SINGLE POINT ENERGY" in line:
complete_line = line.split()
SCF_Energy = float(complete_line[4])
SCF_E_Kcal = float(complete_line[4]) * 627.5095
if "Total Zero-Point Correction" in line:
complete_line = line.split()
Zero_point_energy = float(complete_line[4])
Zero_point_energy_Kcal = float(complete_line[4]) * 627.5095
if "Final Gibbs free energy" in line:
complete_line = line.split()
Gibbs_Energy = float(complete_line[5])
Gibbs_E_Kcal = float(complete_line[5]) * 627.5095
if "Final Enthalpy" in line:
complete_line = line.split()
Rxn_Enthalpy = float(complete_line[3])
Enthalpy_of_Rxn_Kcal = float(complete_line[3]) * 627.5095
return Charge, Multiplicity, SCF_Energy, SCF_E_Kcal, Zero_point_energy, Zero_point_energy_Kcal, Gibbs_Energy, Gibbs_E_Kcal, Rxn_Enthalpy, Enthalpy_of_Rxn_Kcal
def SCF_Energy(self, file):
f = open(file, 'r')
Charge = 0
Multiplicity = 1
SCF_Energy = "--NOT FOUND--"
SCF_E_kJ = "--NOT FOUND--"
input = f.readlines()
#filename = os.path.splitext(os.path.basename(input_file))[0]
for line in input:
if "Charge =" in line:
complete_line = line.split()
Charge = int(complete_line[2])
Multiplicity = int(complete_line[5])
if "SCF Done:" in line:
complete_line = line.split()
SCF_Energy = complete_line[4]
SCF_E_kJ = float(SCF_Energy) * 2625.50
return [Charge, Multiplicity, SCF_Energy, SCF_E_kJ]
if __name__ == "__main__":
import sys
Stamp_Hashmi()
print("Usage: 1. Extraction of Optimized Geometry: python3 GausORCA.py inputfile outputfile format")
print(" 2. Extraction of Thermochemistry Data: python3 GausORCA.py inputfile outputfile -thermo\n")
# Check the number of arguments
if len(sys.argv) < 4:
print("Error: Missing arguments.")
print("Usage: python3 GausORCA.py inputfile outputfile format")
print(" format: Specify 'xyz' for XYZ format or 'com' for Gaussian input format")
sys.exit(1)
inputfile = sys.argv[1]
outputfile = sys.argv[2]
format = sys.argv[3]
# Create an instance of ExtractCoords class
EC = ExtractCoords(inputfile)
# Extract Coordinates
geom = EC.extractCoordinates(inputfile)
scf_energy = EC.SCF_Energy(inputfile)
if format == 'xyz':
with open(outputfile, 'w') as f:
f.write(f"{len(geom)}\nCharge = {scf_energy[0]} Multiplicity = {scf_energy[1]} SCF Energy = {scf_energy[2]} au\n")
for atom in geom:
f.write(f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n")
elif format == 'com':
with open(outputfile, 'w') as f:
f.write("%nprocshared=4\n")
f.write("%mem=4GB\n")
f.write("# opt freq b3lyp/6-31g(d) geom=connectivity\n\n")
f.write("Generated by GausORCA.py\n\n")
f.write("0 1\n")
for atom in geom:
f.write(f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n")
f.write("\n")
print(f"Optimized geometry extracted and saved to {outputfile} in {format} format.")