Source code for pysoftk.torsional.mol_conformer

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdDistGeom as molDG
from rdkit.Chem import TorsionFingerprints
from pysoftk.tools.utils_rdkit import *
import numpy as np
import os
import sys
from openbabel import pybel, openbabel as ob

import os
import sys
from openbabel import pybel, openbabel as ob

[docs] class ConformerGenerator: """ A class to generate and save molecular conformers using Open Babel. Includes methods for genetic algorithm (GA) based conformer generation (modified from the original 'generate_conformers'), diversity-based conformer generation (confab), and saving conformers to separate files. """ def __init__(self, num_conformers: int = 10, forcefield: str = "mmff94", make3D_steps: int = 50, convergence: int = 5): """ Initializes the ConformerGenerator with default parameters for conformer generation. """ self.default_num_conformers = num_conformers self.default_forcefield = forcefield self.default_make3D_steps = make3D_steps self.default_convergence = convergence
[docs] def ga_generate_conformers(self, input_file: str = None, smiles: str = None, output_dir: str = None, output_format: str = 'xyz', base_filename: str = None, num_conformers: int = None, forcefield: str = None, make3D_steps: int = None, mutability: int = 5, convergence: int = None, num_children: int = None) -> int: """ Generates conformers using Open Babel's weighted rotor search (GA). Saves conformers to separate files in a specified directory. Returns the number of generated conformers. """ num_conformers = num_conformers if num_conformers is not None else self.default_num_conformers forcefield = forcefield if forcefield is not None else self.default_forcefield make3D_steps = make3D_steps if make3D_steps is not None else self.default_make3D_steps convergence = convergence if convergence is not None else self.default_convergence if num_conformers < 1: raise ValueError("Number of conformers (num_conformers) must be at least 1.") molecule = None if input_file: if not os.path.exists(input_file): raise FileNotFoundError(f"Input file not found: {input_file}") try: molecule = next(pybel.readfile(os.path.splitext(input_file)[1][1:], input_file)) base_name, _ = os.path.splitext(os.path.basename(input_file)) molecule.title = base_name.capitalize() print(f"Loaded molecule from: {input_file} ({molecule.title})") except Exception as e: print(f"Error reading input file '{input_file}': {e}", file=sys.stderr) return 0 elif smiles: try: molecule = pybel.readstring("smi", smiles) molecule.title = "Untitled" print(f"Loaded molecule from SMILES: {smiles}") except Exception as e: print(f"Error reading SMILES string '{smiles}': {e}", file=sys.stderr) return 0 else: raise ValueError("Either 'input_file' or 'smiles' must be provided.") try: molecule.addh() molecule.make3D(forcefield=forcefield, steps=make3D_steps) if not molecule.OBMol.Has3D(): print(f"Error: Failed to generate 3D coordinates for molecule. Check your SMILES or force field.", file=sys.stderr) return 0 ff = pybel._forcefields[forcefield] if not ff.Setup(molecule.OBMol): print(f"Warning: Force field '{forcefield}' setup failed on the molecule. This can cause the conformer search to fail.", file=sys.stderr) return 0 except Exception as e: print(f"Error during molecule preparation (3D coords or FF setup): {e}", file=sys.stderr) return 0 mol_clone = pybel.Molecule(molecule.OBMol) children = num_children if num_children is not None else num_conformers * 2 cs = ob.OBConformerSearch() setup_successful = cs.Setup(mol_clone.OBMol, num_conformers, children, mutability, convergence) if not setup_successful: print(f"Error: OBConformerSearch setup failed for molecule: {molecule.title}. This is likely due to the molecule having no rotatable bonds or an invalid state.", file=sys.stderr) return 0 print(f"Starting GA conformer search for {molecule.title}...") cs.Search() print("GA conformer search finished.") cs.GetConformers(mol_clone.OBMol) actual_conformers = mol_clone.OBMol.NumConformers() # --- NEW LOGIC: Call the helper method to save to separate files --- if actual_conformers > 0: base = base_filename if base_filename is not None else "molecule" self.save_conformers_to_separate_files(mol_clone, base, output_dir, output_format) print(f"Successfully generated and saved {actual_conformers} conformers.") return actual_conformers else: print("No conformers found.") return 0
# --- END OF NEW LOGIC --- def save_conformers_to_separate_files(self, molecule_with_conformers: pybel.Molecule, base_name: str, output_dir: str = None, file_format: str = "xyz"): """ Splits a pybel.Molecule object containing multiple conformers into separate files, one for each conformer. """ if not molecule_with_conformers or molecule_with_conformers.OBMol.NumConformers() == 0: print("No conformers found in the input molecule, nothing to save.") return num_found = molecule_with_conformers.OBMol.NumConformers() print(f"\nSaving {num_found} conformers for {molecule_with_conformers.title} to separate files.") if output_dir is None: output_dir = f"{base_name}_conformers" os.makedirs(output_dir, exist_ok=True) print(f"Saving individual conformers into directory: '{output_dir}/'") padding = len(str(num_found)) for i in range(num_found): molecule_with_conformers.OBMol.SetConformer(i) single_conformer_mol = pybel.Molecule(molecule_with_conformers.OBMol) single_conformer_mol.title = f"{molecule_with_conformers.title}_conformer_{i+1}" output_filename = os.path.join( output_dir, f"{base_name}_conf_{str(i+1).zfill(padding)}.{file_format}" ) try: single_conformer_mol.write(file_format, output_filename, overwrite=True) print(f" Saved: {os.path.basename(output_filename)}", end='\r') except Exception as e: print(f"\nError saving conformer {i+1} to file '{output_filename}': {e}", file=sys.stderr) print("\nFinished saving all conformers to separate files.")
[docs] def confab_generate_conformers(self, molecule_input: str, output_dir: str, output_format: str = 'xyz', base_filename: str = None, forcefield: str = None, nconfs: int = 10000, rmsd: float = 0.5, energy_gap: float = 50.0, initial_opt_steps: int = 50, verbose: bool = False) -> int: """ Generates diverse conformers using a method similar to Confab. Args: molecule_input: A SMILES string representing the molecule. output_dir: The directory to save the generated conformers. output_format: The file format for saving (default: 'xyz'). base_filename: The base name for the output files. forcefield: The force field to use for initial 3D generation (default: initialized value). nconfs: The maximum number of diverse conformers to generate (default: 10000). rmsd: The RMSD threshold for considering conformers as distinct (default: 0.5). energy_gap: The energy window (in kJ/mol) to consider for diverse conformers (default: 50.0). initial_opt_steps: The number of optimization steps for the initial 3D structure (default: 50). verbose: A boolean to control the verbosity (default: False). Returns: The number of conformers successfully generated and saved. """ forcefield = forcefield if forcefield is not None else self.default_forcefield mol = None try: mol = pybel.readstring("smi", molecule_input) mol.addh() mol.make3D(forcefield=forcefield, steps=initial_opt_steps) ff = pybel._forcefields[forcefield] if not ff.Setup(mol.OBMol): print(f"Warning: Force field '{forcefield}' setup failed.") return 0 ff.DiverseConfGen(rmsd, nconfs, energy_gap, verbose) ff.GetConformers(mol.OBMol) num_conformers = mol.OBMol.NumConformers() os.makedirs(output_dir, exist_ok=True) base = base_filename if base_filename else "molecule" # Use the save_conformers_to_separate_files method self.save_conformers_to_separate_files(mol, base, output_dir, output_format) print(f"Successfully generated and saved {num_conformers} conformers.") return num_conformers except ValueError as e: print(f"Error: {e}") return 0 except KeyError as e: print(f"Error: {e}") return 0 except RuntimeError as e: print(f"Error: {e}") return 0 except OSError as e: print(f"Error: {e}") return 0 except Exception as e: print(f"An unexpected error occurred: {e}") return 0
[docs] def save_conformers_to_separate_files(self, molecule_with_conformers: pybel.Molecule, base_name: str, output_dir: str = None, file_format: str = "xyz"): """ Splits a pybel.Molecule object containing multiple conformers into separate files, one for each conformer. Args: molecule_with_conformers: A pybel.Molecule object that contains multiple conformers (e.g., the output from generate_conformers()). base_name: The base name to use for the output filenames (e.g., "propanol"). output_dir: The directory where the individual conformer files will be saved. If None, a directory named "{base_name}_conformers" will be created in the current working directory (default: None). file_format: The file format to use for saving the conformers (default: "xyz"). Common options include "mol", "pdb", "xyz". """ if not molecule_with_conformers or molecule_with_conformers.OBMol.NumConformers() == 0: print("No conformers found in the input molecule, nothing to save.") return num_found = molecule_with_conformers.OBMol.NumConformers() print(f"\nSaving {num_found} conformers for {molecule_with_conformers.title} to separate files.") # Determine the output directory if output_dir is None: output_dir = f"{base_name}_conformers" os.makedirs(output_dir, exist_ok=True) print(f"Saving individual conformers into directory: '{output_dir}/'") # Determine padding for filenames padding = len(str(num_found)) # Loop through each found conformer index for i in range(num_found): # Set the molecule's coordinates to the i-th conformer molecule_with_conformers.OBMol.SetConformer(i) # Create a *new* molecule object containing ONLY the current conformer single_conformer_mol = pybel.Molecule(molecule_with_conformers.OBMol) # Optional: Set a unique title for the individual conformer file single_conformer_mol.title = f"{molecule_with_conformers.title}_conformer_{i+1}" # Define the output filename output_filename = os.path.join( output_dir, f"{base_name}_conf_{str(i+1).zfill(padding)}.{file_format}" ) # Save the single-conformer molecule to its own file try: single_conformer_mol.write(file_format, output_filename, overwrite=True) print(f" Saved: {os.path.basename(output_filename)}", end='\r') except Exception as e: print(f"\nError saving conformer {i+1} to file '{output_filename}': {e}", file=sys.stderr) print("\nFinished saving all conformers to separate files.")
[docs] class Mcon(object): """ A class designed to **generate and manage molecular conformers** for a given RDKit molecule. This class provides functionality to compute a specified number of 3D conformers (different spatial arrangements) for a molecule, leveraging RDKit's conformer generation algorithms. The generated conformers, along with their calculated energies, can then be saved to a common chemical file format (SDF). Example -------- >>> from rdkit import Chem >>> from pysoftk.conformer.conformer import Mcon >>> >>> # Create an RDKit molecule from a SMILES string >>> mol = Chem.MolFromSmiles("CCCC") >>> mol = Chem.AddHs(mol) # Add hydrogens for more realistic conformers >>> >>> # Initialize the Mcon class to compute 10 conformers >>> conformer_generator = Mcon(mol=mol, num_conf=10) >>> >>> # Generate and save the conformers to an SDF file >>> conformer_generator.conformer(output_name="butane_conformers") Number of conformers created: 10 Note ----- This class **requires the RDKit package to be installed** in your Python environment. The quality and diversity of the generated conformers depend on the underlying RDKit algorithms and the complexity of the molecule. """ __slots__ = ['mol', 'num_conf'] def __init__(self, mol, num_conf): """ Initializes the `Mcon` class with the molecule for which conformers are to be generated and the desired number of conformers. Parameters ---------- mol : rdkit.Chem.rdchem.Mol The **RDKit Mol object** representing the molecule for which conformers will be calculated. It's often beneficial to add hydrogens to the molecule (`Chem.AddHs(mol)`) before passing it to this class for more accurate conformer generation. num_conf : int The **total number of conformers** requested to be computed for the provided molecule. The actual number of conformers generated might be less if the algorithm cannot find the requested amount within its internal limits. """ self.mol = mol self.num_conf = num_conf
[docs] def conformer(self, output_name): """ Generates molecular conformers for the initialized molecule using RDKit's ETKDGv3 algorithm and saves them to an SDF (Structure-Data File) file. This method orchestrates the conformer generation process, including the calculation of energies for each conformer. It then writes all generated conformers into a single SDF file, making them accessible for further analysis or visualization. By default, it returns all generated conformers, including the highest-energy ones, allowing for a comprehensive conformational analysis. Parameters ---------- output_name : str The **base name** for the output SDF file. The file will be saved as `<output_name>.sdf` in the current working directory. Return ------- list of rdkit.Chem.Mol objects This function doesn't explicitly return a list of molecules directly. Instead, it **writes the conformers to an SDF file**. The SDF file contains all generated conformers as separate entries, each representing an `rdkit.Chem.Mol` object with 3D coordinates. A message indicating the number of created conformers is printed to the console. """ mol = self.mol num_conf = self.num_conf mol, cids, energies = etkdgv3_energies(mol, num_conf) full_name = [output_name, ".sdf"] w = AllChem.SDWriter("".join(full_name)) for cid in cids: w.write(mol, confId=cid) w.close() print("Number of conformers created: {} ".format(len(cids)))