Talk:Mod:Hunt Research Group/extract GOAT conformer to com

From wiki
Jump to navigation Jump to search
'''Andrew (Psi) Booth script 16-01-2026.  Takes the .finalensemble.xyz output of an Orca GOAT computation and allows user to extract only desired conformers
from the output into Gaussian input .com files.  Based on Muhammad Ali Hashmi's make_opt_calc_from_xyz_gaussian.py.'''

import sys
import re
import ast

print("#-------------------------------------------------------------------#")
print("Script to create Gaussian Input Files From an Orca GOAT Ensemble File")
print("#-------------------------------------------------------------------#")

class ExtractCoords:
    """Class for extracting coordinates from the ensemble file"""
    def __init__(self, file, iteration):
        self.file=file
        self.iteration=iteration

    def extractCoordinates(self):
        with open(self.file, 'r') as xyz_file:
            lines=xyz_file.readlines()

        self.n_atoms=int(lines[0].strip())
        geometry=[]
        
        '''finalensemble.xyz files have two lines indicating number of atoms and energy&convergence respectively.  In order to skip these in our geometry extraction,
        we must add 2 lines before starting our extraction of the geometry for each xyz block we are skipping, hence we begin our for loop at (self.n_atoms[+2])*self.iteration
         as opposed to just self.n_atoms*self.iteration+2.  A similar argument holds for the ending index.'''
        for line in lines[(self.n_atoms+2)*self.iteration-self.n_atoms:(self.n_atoms+2)*self.iteration]:
            parts=line.split()
            if len(parts)==4:
                element, x, y, z = parts[0], float(parts[1]), float(parts[2]), float(parts[3])
                geometry.append([element, x, y, z])
            else:
                print(f"Line skipped (not in format): {line.strip()}")

        return geometry

class WriteInputFiles:
    """Class for writing geometries to Gaussian Input File"""
    def __init__(self, charge, multiplicity, geometry, filename, routecard, title, prefix="", suffix=""):
        self.charge=charge
        self.multiplicity=multiplicity
        self.geometry=geometry
        self.routecard=routecard
        base_filename=filename.split('\\').pop().split('/').pop().rsplit('.', 1)[0]
        self.output_filename=f"{prefix}{base_filename}{suffix}"

    def write_gaussian_file(self, geometry):
        output_file=sys.stdout
        
        #In case something goes wrong with opening the file
        try:
          sys.stdout=open(f"{self.output_filename}.com", "w")
        except:
          print("Unexpected error", sys.exc_info()[0])
          raise
        
        #main function resumes

        print(f"%chk={self.output_filename}.chk")
        
        #user-configured routecard options.  Add more using additional elif statements if needed.
        if("opt" in self.routecard):
            print(Route_Card_Opt+"\n")
        elif("freq" in self.routecard):
            print(Route_Card_Freq+"\n")
        elif("ecd" in self.routecard):
            print(Route_Card_ECD+"\n")
        else:
            raise Exception("WARNING: No route card given.  This must be manually added before running the job.  Script terminated with com file(s) partially written.")
        
        
        print(title+"\n")
        print(self.charge, self.multiplicity)
        for atom in geometry:
            print(f" {atom[0]:<3}   {atom[1]: .8f}   {atom[2]: .8f}   {atom[3]: .8f}")
        print("\n")
        sys.stdout.close()
        sys.stdout = output_file
        
        
        
##################################################
#               Routecards                       #
##################################################

Route_Card_Opt = "# opt=tight m062x 6-311g(d,p) int=ultrafine scf=conver=10 scrf=(smd, solvent=water)"
Route_Card_Freq = "# freq=raman m062x 6-311g(d,p) int=ultrafine scf=conver=10 scrf=(smd, solvent=water) pop=(full, NBO7, dipole, chelpg) prop"
Route_Card_ECD = "# "

####################################
#    Main program starts here      #
####################################

if len(sys.argv) < 2:
    print("""Usage: python3 script.py <xyz file> <routecard> <title> <structurenumbers> <charge=0> <multiplicity=1> <prefix=''> <suffix=''>")
    structurenumbers can be python list (NO SPACES), single integer, or 'all'. """)
    sys.exit(1)


xyz_file=sys.argv[1]
routecard=sys.argv[2]
title=sys.argv[3]
try:
  iteration=ast.literal_eval(sys.argv[4]) if len(sys.argv) >= 5 else [1] #IMPORTANT: will default to making a com file for only the lowest energy conformer outputted by GOAT.  Be careful with conformer selection!
except SyntaxError:
  print("""==============================================================
           Try again and make sure that there are no spaces in your list!
           ==============================================================""")
if type(iteration)==list:
  pass
elif type(iteration)==int:
  iteration=[iteration] #convert int to single-element list for ease of processing
else:
  print(type(iteration))
  print("Wrong input type for structurenumbers - should be pythonic list.  Defaulting to structure 1 only.")
  iteration=[1]
charge=sys.argv[5] if len(sys.argv) > 5 else 0
multiplicity=sys.argv[6] if len(sys.argv) > 6 else 1
prefix=sys.argv[7] if len(sys.argv) > 7 else ""
suffix=sys.argv[8] if len(sys.argv) > 8 else ""
print(len(sys.argv))

#Extract xyz coordinates from the file and write to new .com file
if iteration !="all":
  for i in iteration:
      molecule=ExtractCoords(xyz_file, int(i)) 
      geometry=molecule.extractCoordinates()
      write_input=WriteInputFiles(charge, multiplicity, geometry, xyz_file, routecard, title, prefix, str(int(i))+suffix)
      write_input.write_gaussian_file(geometry)
      print("Input file for conformer "+str(i)+" written.")
else:
  with open(xyz_file, 'r') as f:
    linecount=sum(1 for line in f)
  molecule=ExtractCoords(xyz_file, 0)
  geometry=molecule.extractCoordinates()
  n_atoms=molecule.n_atoms
  it=0
  while ((n_atoms+1)*it+2<linecount):
    molecule=ExtractCoords(xyz_file, it)
    geometry=molecule.extractCoordinates()
    write_input=WriteInputFiles(charge, multiplicity, geometry, xyz_file, routecard, title, prefix, suffix)
    write_input.write_gaussian_file(geometry)

print("All done!  The input files are written.")