Talk:Mod:Hunt Research Group/extract optimized geom

From wiki
Revision as of 01:13, 8 August 2024 by Hashmi (talk | contribs) (Created page with "===Python Script to Extract the Optimized Geometry from a Gaussian/ORCA output file and write the coordinates to an XYZ or com file=== *Copy the code into a script called "Ext...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Python Script to Extract the Optimized Geometry from a Gaussian/ORCA output file and write the coordinates to an XYZ or com file

  • Copy the code into a script called "Extract_Optimized_Geometry_Gaussian+ORCA.py"
  • To execute the script type "python3 Extract_Optimized_Geometry_Gaussian+ORCA.py file_name.log output_file.xyz xyz"
  • It takes the name of output file (.xyz or .com) in command line second argument and the type of file (xyz or com) in the third argument.
  • It will produce the required output file (here output_file.xyz)
import sys # Import sys to write to output files

print("#----------------------------------------------------------------------------------------------#")
print("#----------------------------------------------------------------------------------------------#")
print("#         Script to Extract the Optimized Geometry from a Gaussian or ORCA output File         #")
print("#----------------------------------------------------------------------------------------------#")
print("#----------------------------------------------------------------------------------------------#")

# ASCII FONTS from: http://patorjk.com/software/taag/
# Font = "Big", "Ivrit"
def Stamp_Hashmi():
    print('\n')
    print(' _____                 _                      _     _')
    print('|  __ \               | |                    | |   | |')
    print('| |  | | _____   _____| | ___  _ __   ___  __| |   | |__  _   _')
    print("| |  | |/ _ \ \ / / _ \ |/ _ \| '_ \ / _ \/ _` |   | '_ \| | | |")
    print('| |__| |  __/\ V /  __/ | (_) | |_) |  __/ (_| |   | |_) | |_| |')
    print('|_____/ \___| \_/ \___|_|\___/| .__/ \___|\__,_|   |_.__/ \__, |')
    print('| |  | |         | |          | |(_)                       __/ |')
    print('| |__| | __ _ ___| |__  _ __ _|_| _                       |___/')
    print("|  __  |/ _` / __| '_ \| '_ ` _ \| |")
    print('| |  | | (_| \__ \ | | | | | | | | |')
    print('|_|  |_|\__,_|___/_| |_|_| |_| |_|_|')

    print("Last Updated: August 08, 2024\n")

# A Dictionary to Convert Atomic Symbols to Atomic Numbers
SymbolToNumber = {
"H" :1, "He" :2, "Li" :3, "Be" :4, "B" :5, "C" :6, "N" :7, "O" :8, "F" :9,
"Ne" :10, "Na" :11, "Mg" :12, "Al" :13, "Si" :14, "P" :15, "S"  :16, "Cl" :17,
"Ar" :18, "K"  :19, "Ca" :20, "Sc" :21, "Ti" :22, "V"  :23, "Cr" :24,
"Mn" :25, "Fe" :26, "Co" :27, "Ni" :28, "Cu" :29, "Zn" :30, "Ga" :31,
"Ge" :32, "As" :33, "Se" :34, "Br" :35, "Kr" :36, "Rb" :37, "Sr" :38,
"Y"  :39, "Zr" :40, "Nb" :41, "Mo" :42, "Tc" :43, "Ru" :44, "Rh" :45,
"Pd" :46, "Ag" :47, "Cd" :48, "In" :49, "Sn" :50, "Sb" :51, "Te" :52,
"I"  :53, "Xe" :54, "Cs" :55, "Ba" :56, "La" :57, "Ce" :58, "Pr" :59,
"Nd" :60, "Pm" :61, "Sm" :62, "Eu" :63, "Gd" :64, "Tb" :65, "Dy" :66,
"Ho" :67, "Er" :68, "Tm" :69, "Yb" :70, "Lu" :71, "Hf" :72, "Ta" :73,
"W"  :74, "Re" :75, "Os" :76, "Ir" :77, "Pt" :78, "Au" :79, "Hg" :80,
"Tl" :81, "Pb" :82, "Bi" :83, "Po" :84, "At" :85, "Rn" :86, "Fr" :87,
"Ra" :88, "Ac" :89, "Th" :90, "Pa" :91, "U"  :92, "Np" :93, "Pu" :94,
"Am" :95, "Cm" :96, "Bk" :97, "Cf" :98, "Es" :99, "Fm" :100, "Md" :101,
"No" :102, "Lr" :103, "Rf" :104, "Db" :105, "Sg" :106, "Bh" :107,
"Hs" :108, "Mt" :109, "Ds" :110, "Rg" :111, "Cn" :112, "Uut":113,
"Fl" :114, "Uup":115, "Lv" :116, "Uus":117, "Uuo":118}

# Invert the Above: Atomic Numbers to Atomic Symbols
NumberToSymbol = {v: k for k, v in SymbolToNumber.items()}

class ExtractCoords(object):
    """A Class for Extracting the Optimized Coordinates from a Gaussian or ORCA ouput File"""
    def __init__(self, file):
        self.file = file

    def extractCoordinates(self, file):
        f = open(file, 'r')
        program = "N/A"
        for line in f:
            if line.find("Entering Gaussian System, Link 0=g16") != -1 or line.find("Copyright (c) 1988-2019, Gaussian, Inc.  All Rights Reserved.") != -1:
                print("Reading Gaussian output file: ", file, '\n')
                program = "g16"
                break
            elif line.find("* O   R   C   A *") != -1:
                print("Reading ORCA output file: ", file, '\n')
                program = "orca"
                break
        f.close()

        geom = []
        if program == "g16":
            f = open(file, 'r')
            for line in f:
                if line.find("Standard orientation:") != -1:
                    del geom[:]
                    for i in range(0, 4):
                        readStructure = f.__next__()
                    while True:
                        readStructure = f.__next__()
                        if readStructure.find("-----------") == -1:
                            readStructure = readStructure.split()
                            geom.append(readStructure)
                        else:
                            break
            for i in geom:
                del i[0:3:2]
                i[0] = NumberToSymbol[int(i[0])]
                i[1] = float(i[1])
                i[2] = float(i[2])
                i[3] = float(i[3])
            return geom
        elif program == "orca":
            f = open(file, 'r')
            for line in f:
                if line.find("CARTESIAN COORDINATES (ANGSTROEM)") != -1:
                    del geom[:]
                    readStructure = f.__next__()
                    while True:
                        readStructure = f.__next__()
                        if readStructure and readStructure.strip():
                            readStructure = readStructure.split()
                            geom.append(readStructure)
                        else:
                            break
            for i in geom:
                i[1] = float(i[1])
                i[2] = float(i[2])
                i[3] = float(i[3])
            return geom

    def extractEnergies(self, file):
        f = open(file, 'r')
        program = "N/A"
        for line in f:
            if line.find("Entering Gaussian System, Link 0=g16") != -1 or line.find(
                    "Copyright (c) 1988-2019, Gaussian, Inc.  All Rights Reserved.") != -1:
                program = "g16"
                break
            elif line.find("* O   R   C   A *") != -1:
                program = "orca"
                break
        f.close()
        Charge = 0
        Multiplicity = 1
        SCF_Energy = "--SCF Energy NOT FOUND--"
        Zero_point_energy = "--ZPE NOT FOUND--"
        Gibbs_Energy = "--Delta-G NOT FOUND--"
        Rxn_Enthalpy = "--Enthalpy NOT FOUND--"
        SCF_E_Kcal = "--SCF_E_Kcal NOT FOUND--"
        Gibbs_E_Kcal = "--Gibbs_E_Kcal NOT FOUND--"
        Zero_point_energy_Kcal = "--ZPE_Kcal NOT FOUND--"
        Enthalpy_of_Rxn_Kcal = "--Enthalpy_Kcal NOT FOUND--"
        if program == "g16":
            f = open(file, 'r')
            input = f.readlines()
            for line in input:
                if "Charge =" in line:
                    complete_line = line.split()
                    Charge = int(complete_line[2])
                    Multiplicity = int(complete_line[5])
                if "SCF Done:" in line:
                    complete_line = line.split()
                    SCF_Energy = float(complete_line[4])
                    SCF_E_Kcal = float(complete_line[4]) * 627.5095
                if "Zero-point correction=" in line:
                    complete_line = line.split()
                    Zero_point_energy = float(complete_line[2])
                    Zero_point_energy_Kcal = float(complete_line[2]) * 627.5095
                if "Sum of electronic and thermal Free Energies=" in line:
                    complete_line = line.split()
                    Gibbs_Energy = float(complete_line[7])
                    Gibbs_E_Kcal = float(complete_line[7]) * 627.5095
                if "Sum of electronic and thermal Enthalpies=" in line:
                    complete_line = line.split()
                    Rxn_Enthalpy = float(complete_line[6])
                    Enthalpy_of_Rxn_Kcal = float(complete_line[6]) * 627.5095
            return Charge, Multiplicity, SCF_Energy, SCF_E_Kcal, Zero_point_energy, Zero_point_energy_Kcal, Gibbs_Energy, Gibbs_E_Kcal, Rxn_Enthalpy, Enthalpy_of_Rxn_Kcal
        elif program == "orca":
            f = open(file, 'r')
            input = f.readlines()
            for line in input:
                if "Total Charge" in line:
                    complete_line = line.split()
                    Charge = int(complete_line[3])
                    Multiplicity = int(complete_line[6])
                if "FINAL SINGLE POINT ENERGY" in line:
                    complete_line = line.split()
                    SCF_Energy = float(complete_line[4])
                    SCF_E_Kcal = float(complete_line[4]) * 627.5095
                if "Total Zero-Point Correction" in line:
                    complete_line = line.split()
                    Zero_point_energy = float(complete_line[4])
                    Zero_point_energy_Kcal = float(complete_line[4]) * 627.5095
                if "Final Gibbs free energy" in line:
                    complete_line = line.split()
                    Gibbs_Energy = float(complete_line[5])
                    Gibbs_E_Kcal = float(complete_line[5]) * 627.5095
                if "Final Enthalpy" in line:
                    complete_line = line.split()
                    Rxn_Enthalpy = float(complete_line[3])
                    Enthalpy_of_Rxn_Kcal = float(complete_line[3]) * 627.5095
            return Charge, Multiplicity, SCF_Energy, SCF_E_Kcal, Zero_point_energy, Zero_point_energy_Kcal, Gibbs_Energy, Gibbs_E_Kcal, Rxn_Enthalpy, Enthalpy_of_Rxn_Kcal

    def SCF_Energy(self, file):
        f = open(file, 'r')
        Charge = 0
        Multiplicity = 1
        SCF_Energy = "--NOT FOUND--"
        SCF_E_kJ = "--NOT FOUND--"
        input = f.readlines()
        #filename = os.path.splitext(os.path.basename(input_file))[0]
        for line in input:
            if "Charge =" in line:
                complete_line = line.split()
                Charge = int(complete_line[2])
                Multiplicity = int(complete_line[5])
            if "SCF Done:" in line:
                complete_line = line.split()
                SCF_Energy = complete_line[4]
                SCF_E_kJ = float(SCF_Energy) * 2625.50
        return [Charge, Multiplicity, SCF_Energy, SCF_E_kJ]

if __name__ == "__main__":
    import sys
    Stamp_Hashmi()
    print("Usage: 1. Extraction of Optimized Geometry:   python3 GausORCA.py inputfile outputfile format")
    print("       2. Extraction of Thermochemistry Data: python3 GausORCA.py inputfile outputfile -thermo\n")

    # Check the number of arguments
    if len(sys.argv) < 4:
        print("Error: Missing arguments.")
        print("Usage: python3 GausORCA.py inputfile outputfile format")
        print("       format: Specify 'xyz' for XYZ format or 'com' for Gaussian input format")
        sys.exit(1)

    inputfile = sys.argv[1]
    outputfile = sys.argv[2]
    format = sys.argv[3]

    # Create an instance of ExtractCoords class
    EC = ExtractCoords(inputfile)

    # Extract Coordinates
    geom = EC.extractCoordinates(inputfile)
    scf_energy = EC.SCF_Energy(inputfile)

    if format == 'xyz':
        with open(outputfile, 'w') as f:
            f.write(f"{len(geom)}\nCharge = {scf_energy[0]} Multiplicity = {scf_energy[1]} SCF Energy = {scf_energy[2]} au\n")
            for atom in geom:
                f.write(f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n")
    elif format == 'com':
        with open(outputfile, 'w') as f:
            f.write("%nprocshared=4\n")
            f.write("%mem=4GB\n")
            f.write("# opt freq b3lyp/6-31g(d) geom=connectivity\n\n")
            f.write("Generated by GausORCA.py\n\n")
            f.write("0 1\n")
            for atom in geom:
                f.write(f"{atom[0]} {atom[1]} {atom[2]} {atom[3]}\n")
            f.write("\n")

    print(f"Optimized geometry extracted and saved to {outputfile} in {format} format.")