Mod:Hunt Research Group:nmr of conformers
Jump to navigation
Jump to search
Boltzmann NMR of Conformers Script
- script to extract NMR data from all conformed log files into an excel file (csv) for with individual shielding tensors, Boltzmann averaged tensors, Boltzmann factor etc
- for ALL the (log) files in a directory
- simply copy and paste this script into a file boltzmann_NMR_of_conformers.py
- then run with "python3 boltzmann_NMR_of_conformers.py"
import os
import csv
import time
import math
from collections import Counter
# Constants
T = 298.15
R = 0.0083144626 # kJ/mol·K
RT = R * T # ≈ 2.47895702419
# Header
print("#====================== NMR Shielding Tensor Processing Script =========================#")
print("# Extracts energies/tensors, computes Boltzmann (consistent energy basis) #")
print("#========================================================================================#\n")
# Function to extract NMR data from Gaussian log file
def NMR_shielding_tensors(input_file):
with open(input_file, 'r') as f:
lines = f.readlines()
filename = os.path.splitext(os.path.basename(input_file))[0]
atom_nos, symbols, tensors = [], [], []
scf_kj = None
gibbs_kj = None
# Read energies (reverse to get latest)
for line in reversed(lines):
if ("Sum of electronic and thermal Free Energies" in line) and (gibbs_kj is None):
parts = line.split()
gibbs_kj = float(parts[7]) * 2625.5 # Hartree -> kJ/mol
elif ("SCF Done" in line) and (scf_kj is None):
parts = line.split()
scf_kj = float(parts[4]) * 2625.5
if gibbs_kj is not None and scf_kj is not None:
break
# Read shielding tensors
for line in lines:
if "Isotropic =" in line:
parts = line.split()
atom_nos.append(int(parts[0]))
symbols.append(parts[1])
tensors.append(float(parts[4]))
return {
"filename": filename,
"gibbs_kj": gibbs_kj,
"scf_kj": scf_kj,
# energy_kj will be set later based on global energy_mode
"energy_kj": None,
"atom_nos": atom_nos,
"symbols": symbols,
"tensors": tensors
}
# Extract data from all .log files
conformer_data = []
for file in os.listdir():
if file.endswith(".log") or file.endswith(".LOG"):
data = NMR_shielding_tensors(file)
conformer_data.append(data)
if len(data["tensors"]) == 0:
print(f"⚠️ {file}: No NMR shielding tensors found. It will be EXCLUDED from Boltzmann calculations (kept in CSV).")
else:
print(f"Processed tensors: {file}")
if len(conformer_data) == 0:
print("❌ No .log files found in the current directory.")
raise SystemExit(1)
# Decide energy basis: ONLY use Gibbs if EVERY conformer has it; otherwise use SCF for ALL
all_have_gibbs = all(c["gibbs_kj"] is not None for c in conformer_data)
energy_mode = "gibbs" if all_have_gibbs else "scf"
if energy_mode == "gibbs":
print("\n✅ Using Gibbs Free Energies for ALL conformers (all files had Gibbs).")
else:
print("\nℹ️ Not all conformers had Gibbs Free Energy; using SCF energies for ALL conformers to keep the basis consistent.")
# Set the energy used according to chosen basis
for c in conformer_data:
c["energy_kj"] = c["gibbs_kj"] if energy_mode == "gibbs" else c["scf_kj"]
# Safety: ensure we have energies for sorting; if any file lacks even SCF, warn
missing_energy = [c["filename"] for c in conformer_data if c["energy_kj"] is None]
if missing_energy:
print("⚠️ These files had no usable energy (SCF/Gibbs not found) and will be placed last in the CSV:", ", ".join(missing_energy))
# Sort by used energy, placing missing at end
conformer_data.sort(key=lambda x: (x["energy_kj"] is None, x["energy_kj"] if x["energy_kj"] is not None else float('inf')))
# Build list of "valid" conformers for Boltzmann: must have tensors and matching atom count
nonempty = [c for c in conformer_data if len(c["tensors"]) > 0]
if len(nonempty) == 0:
print("\n❗ No conformers with shielding tensors found. Will write energies only; no Boltzmann averages possible.")
num_atoms = 0
valid_conformers = []
else:
# Choose the most common tensor length to avoid mismatches silently breaking averages
length_counts = Counter(len(c["tensors"]) for c in nonempty)
target_num_atoms, _ = max(length_counts.items(), key=lambda kv: kv[1])
# Anything not matching target count is excluded (warn)
valid_conformers = []
for c in nonempty:
if len(c["tensors"]) == target_num_atoms:
valid_conformers.append(c)
else:
print(f"⚠️ {c['filename']}: Tensor length {len(c['tensors'])} differs from majority ({target_num_atoms}). Excluding from Boltzmann calc (kept in CSV).")
num_atoms = target_num_atoms if valid_conformers else 0
# Compute Boltzmann only over valid_conformers
partition_function = 0.0
min_energy = None
if valid_conformers:
# Min energy among valid set (basis: selected energy_mode)
min_energy = min(c["energy_kj"] for c in valid_conformers if c["energy_kj"] is not None)
# Compute factors
for c in valid_conformers:
rel_E = c["energy_kj"] - min_energy
c["rel_energy"] = rel_E
c["boltzmann_factor"] = math.exp(-rel_E / RT)
partition_function += c["boltzmann_factor"]
# Compute percentages
for c in valid_conformers:
c["boltzmann_percent"] = 100.0 * c["boltzmann_factor"] / partition_function if partition_function > 0 else None
# Boltzmann-averaged shielding tensors
boltz_avg_tensors = [0.0 for _ in range(num_atoms)]
for c in valid_conformers:
weight = c["boltzmann_factor"] / partition_function if partition_function > 0 else 0.0
for i in range(num_atoms):
boltz_avg_tensors[i] += weight * c["tensors"][i]
else:
boltz_avg_tensors = []
# For conformers excluded from Boltzmann (no tensors or mismatched), still compute/display relative energy
# relative to min_energy of valid set (if available)
for c in conformer_data:
if min_energy is not None and c["energy_kj"] is not None:
c["rel_energy"] = c["energy_kj"] - min_energy
else:
c["rel_energy"] = None
# Fill Boltzmann fields only for the valid set
if c in valid_conformers:
# already set above
pass
else:
c["boltzmann_factor"] = None
c["boltzmann_percent"] = None
# Get atom numbers and symbols from first valid conformer (if any)
atom_numbers = valid_conformers[0]["atom_nos"] if valid_conformers else []
atom_symbols = valid_conformers[0]["symbols"] if valid_conformers else []
# CSV Output
out_name = 'NMR_Boltzmann_Averaged.csv'
with open(out_name, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
# Header
header = [
"Conformer",
"SCF_Energy_kJ/mol",
"Gibbs_Energy_kJ/mol",
"Used_Energy_kJ/mol",
"Relative_Energy_kJ/mol",
"Boltzmann_Factor",
"Boltzmann_%"
]
header += [f"Atom_{i+1}" for i in range(num_atoms)]
writer.writerow(header)
# Data rows
for c in conformer_data:
row = [
c["filename"],
round(c["scf_kj"], 6) if c["scf_kj"] is not None else "",
round(c["gibbs_kj"], 6) if c["gibbs_kj"] is not None else "",
round(c["energy_kj"], 6) if c["energy_kj"] is not None else "",
round(c["rel_energy"], 6) if c["rel_energy"] is not None else "",
(f"{c['boltzmann_factor']:.6f}" if c["boltzmann_factor"] is not None else ""),
(f"{c['boltzmann_percent']:.4f}" if c["boltzmann_percent"] is not None else "")
]
# Only print tensors if the row has the expected count
if num_atoms and len(c["tensors"]) == num_atoms:
row += [round(t, 6) for t in c["tensors"]]
writer.writerow(row)
# Extra spacing row
writer.writerow([])
# If we have valid conformers (and thus atom metadata), write the two helper rows and the average
if valid_conformers:
# Atom number row
atom_num_row = ["Atom_Numbers", "", "", "", "", "", ""] + atom_numbers
writer.writerow(atom_num_row)
# Atom symbol row
atom_sym_row = ["Atom_Symbols", "", "", "", "", "", ""] + atom_symbols
writer.writerow(atom_sym_row)
# Boltzmann average row
avg_row = ["Boltzmann_Averaged_Shielding_Tensors", "", "", "", "", "", ""]
avg_row += [round(x, 6) for x in boltz_avg_tensors]
writer.writerow(avg_row)
else:
writer.writerow(["(No valid NMR tensors found among conformers — averages not computed)"])
print(f"\n✅ All Done Boss!!! I Wrote: {out_name} file for your consideration.")
print("📅 Completed at :", time.strftime("Time: %X, Date: %d/%m/%Y"))
### A script by Muhammad Ali Hashmi