"""
Utilities for the 2D-to-3D `*sdf` to `*mae` file conversion for a
metal complex.
Copyright Schrodinger, LLC. All rights reserved.
"""
import argparse
import random
import re
import numpy
import itertools
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import fast3d
from schrodinger.test import ioredirect
from schrodinger.utils import fileutils
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import minimize
from schrodinger.structutils import ringspear
from schrodinger.structutils import rmsd
from schrodinger.structutils import transform
from schrodinger.application.matsci import buildcomplex
from schrodinger.application.matsci import jobutils
from schrodinger.application.matsci import parserutils
from schrodinger.application.matsci import etatoggle as tesu
from schrodinger.application.matsci import reaction_workflow_utils as rxnwfu
from schrodinger.application.matsci import sculptcomplex as scu
from schrodinger.application.matsci import swap_fragments_utils as sfu
my_random = random.Random()
my_random.seed(parserutils.RANDOM_SEED_DEFAULT)
COORDINATION_DICT = {buildcomplex.TETRAHEDRAL: 4}
# the following is used in Schrodinger's V3000 SDF writer as well
# as well as other programs
STD_V3000_SDF_V_LINE = ' 0 0 0 0 0 999 V3000\n'
# instead of using OPLS_2005 FF which is used in the complex builder
# GUI use OPLS3 (see test2 in MATSCI-6325 and OMC002332 in MATSCI-6955
# which experience issues with OPLS_2005)
FFLD_VERSION = minimize.OPLS3
DEFAULT_EPSILON = 2 # kcal/mol
[docs]class DummiesToHydrogens:
"""
Context manager to temporarily convert dummy atoms to hydrogens.
"""
[docs] def __init__(self, st):
# look for dummy atoms that have at most one single bond
self.dummy_atoms = []
for atom in st.atom:
if atom.atomic_number != ComplexSdfToMae.DUMMY_ATOMIC_NUMBER:
continue
state = False
for bond in atom.bond:
if bond.order == 1:
if not state:
state = True
else:
state = False
break
if state:
self.dummy_atoms.append(atom)
def __enter__(self):
for dummy_atom in self.dummy_atoms:
dummy_atom.atomic_number = 1
def __exit__(self, *args):
for dummy_atom in self.dummy_atoms:
dummy_atom.atomic_number = ComplexSdfToMae.DUMMY_ATOMIC_NUMBER
[docs]class ComplexSdfToMaeException(Exception):
pass
# the eta parts of the following class are modeled after custom script
# sdf2etacomplex.py (see MATSCI-4423)
[docs]class ComplexSdfToMae:
"""
Manage the 2D-to-3D `*sdf` to `*mae` file conversion for a
metal complex.
"""
SD_PROP_PATTERN = re.compile(r'><(.*)>')
SD_PROP_STARTER = 's_sd_'
DUMMY_ATOMIC_NUMBER = -2
BIND_PROP = 'b_user_metal_neighbor'
HAPTIC_ATOM_PROP = 'b_user_haptic_atom'
[docs] def __init__(self, sdf_file, logger=None):
"""
Create an instance.
:type sdf_file: str
:param sdf_file: the 2D `*sdf` file
:type logger: logging.Logger or None
:param logger: output logger or None if there isn't one
"""
self.sdf_file = sdf_file
self.logger = logger
self.st = self.read_sdf(self.sdf_file)
[docs] @staticmethod
def read_sdf(sdf_file):
"""
Wrapper to read an `*sdf` file.
:type sdf_file: str
:param sdf_file: the `*sdf` file
:raise ComplexSdfToMaeException: if there is an issue
:rtype: `schrodinger.structure.Structure`
:return: the structure
"""
sdf_file_type = type(sdf_file).__name__
with open(sdf_file) as afile:
lines = afile.readlines()
# try to read the file as is, a StopIteration will be raised if
# there is a formatting issue, attempt to fix a known formatting
# issue and read the newly formatted file, if there is still a
# formatting issue then give up
try:
st = structure.Structure.read(sdf_file)
# see MATSCI-8823
if st.atom_total == 0:
raise StopIteration
except StopIteration:
with fileutils.tempfilename(prefix='tmp',
suffix='.sdf') as tmp_sdf_file:
with open(tmp_sdf_file, 'w') as afile:
if not re.match(r'\s*\n', lines[0]):
afile.write('\n')
for line in lines:
if 'V3000' in line:
afile.write(STD_V3000_SDF_V_LINE)
continue
line = re.sub(r'^M\s+', r'M ',
re.sub(r'^\s+', r'', line))
if not line:
line = '\n'
afile.write(line)
try:
st = structure.Structure.read(tmp_sdf_file)
# see MATSCI-8823
if st.atom_total == 0:
raise StopIteration
except StopIteration:
msg = (f'The {sdf_file} file has an invalid format.')
# raise the following exception with no previous context,
# this suppresses error messages from exceptions raised
# during the raising of the exception that is currently
# being caught
raise ComplexSdfToMaeException(msg) from None
except Exception as err:
msg = str(err)
raise
except Exception as err:
msg = str(err)
raise
# structure.Structure.read `*sdf` meta-data like "><X>\nY\n" is captured
# as st.property['s_sd_X'] = 'Y', but go through the file and manually
# set properties that haven't been set, for example like when 'Y' is a
# single integer
for idx, line in enumerate(list(lines)):
match = re.match(ComplexSdfToMae.SD_PROP_PATTERN, line)
if match:
key = ComplexSdfToMae.SD_PROP_STARTER + match.group(1).replace(
' ', '_')
if st.property.get(key) is None:
value = lines[idx + 1].strip('\n')
st.property[key] = value
st.title = fileutils.get_basename(sdf_file)
return st
def _getClosestRing(self, dummy_atom, all_ring_idxs):
"""
Return the atom indices of the ring that is closest to the
given dummy atom.
:type dummy_atom: `schrodinger.structure._StructureAtom`
:param dummy_atom: the dummy atom
:type all_ring_idxs: list
:param all_ring_idxs: contains lists of atom indices for rings
:rtype: list
:return: the atom indices for the closest ring
"""
# handle the case where neighbor is in multiple rings
_, _, neighbor = measure.get_shortest_distance(self.st,
atoms=[dummy_atom.index])
closest_dist = numpy.inf
closest_ring_idxs = []
for ring_idxs in all_ring_idxs:
if neighbor in ring_idxs:
avg_dist = numpy.mean([
self.st.measure(dummy_atom.index, idx) for idx in ring_idxs
])
if avg_dist < closest_dist:
closest_dist = avg_dist
closest_ring_idxs = ring_idxs
return closest_ring_idxs
def _getClosestNonRingEtaIdxs(self, dummy_atom):
"""
Return the atom indices of the non-ring eta atoms that are closest
to the given dummy atom.
:type dummy_atom: `schrodinger.structure._StructureAtom`
:param dummy_atom: the dummy atom
:rtype: list
:return: the atom indices for the closest non-ring eta atoms
"""
_, _, neighbor = measure.get_shortest_distance(self.st,
atoms=[dummy_atom.index])
neighbor = self.st.atom[neighbor]
idxs = [neighbor.index]
for bond in neighbor.bond:
if bond.order > 1:
idxs.append(bond.atom2.index)
return idxs
def _bondDummies(self):
"""
Bond dummy atoms to the corresponding ligands.
"""
all_ring_idxs = [ring.getAtomIndices() for ring in self.st.ring]
for atom in self.st.atom:
if not atom.property.get(self.BIND_PROP):
continue
if atom.atomic_number == self.DUMMY_ATOMIC_NUMBER:
idxs = self._getClosestRing(atom, all_ring_idxs)
for idx in idxs:
ring_atom = self.st.atom[idx]
# handle carbanions
if ring_atom.atomic_number == 6:
for bond in ring_atom.bond:
if bond.order > 1:
break
else:
ring_atom.formal_charge = -1
if not idxs:
idxs = self._getClosestNonRingEtaIdxs(atom)
if not idxs:
continue
for idx in idxs:
haptic_atom = self.st.atom[idx]
haptic_atom.property[self.HAPTIC_ATOM_PROP] = True
# use zob bond to not take up a valence slot
self.st.addBond(atom, haptic_atom, structure.BondType.Zero)
else:
# use single bond to take up a valence slot
marker = self.st.addAtom('DU', *self.metal_xyz)
self.st.addBond(atom.index, marker.index,
structure.BondType.Single)
def _defineMetalAttrs(self, idx):
"""
Define metal attributes.
:type idx: int
:param idx: the index
"""
metal_atom = self.st.atom[idx]
self.metal_element = metal_atom.element
self.metal_xyz = metal_atom.xyz
[docs] def getCenterIdx(self):
"""
Return the atom index that serves as the complex center, typically
a metal atom.
:raise ComplexSdfToMaeException: if there is an issue
:rtype: int
:return: the atom index that serves as the complex center
"""
metal_idxs = analyze.evaluate_asl(self.st, 'metals')
if not metal_idxs:
centroid = transform.get_centroid(self.st)[:-1]
# from sdf reader complexes with eta bound ligands come
# with a dummy atom representation where the dummy is only
# bound to the metal
pairs = []
for atom in self.st.atom:
if atom.atomic_number == -2:
neighbors = list(atom.bonded_atoms)
metal_idxs = [neighbors[0].index]
break
pairs.append((atom.index,
transform.get_vector_magnitude(
numpy.array(atom.xyz) - centroid)))
else:
# pick the atom closest to the centroid that when deleted
# breaks the molecule into multiple molecules
pairs = sorted(pairs, key=lambda x: x[1])
for idx in list(zip(*pairs))[0]:
tst = self.st.copy()
tst.deleteAtoms([idx])
if tst.mol_total > 1:
metal_idxs = [idx]
break
if not metal_idxs:
msg = ('Complex must contain a single metal center.')
raise ComplexSdfToMaeException(msg)
metal_idxs = sorted(metal_idxs,
key=lambda x: self.st.atom[x].atomic_number,
reverse=True)
return metal_idxs[0]
[docs] def prepare(self):
"""
Prepare the structure.
"""
metal_idx = self.getCenterIdx()
metal_atom = self.st.atom[metal_idx]
self._defineMetalAttrs(metal_atom.index)
# mark neighboring atoms
for atom in metal_atom.bonded_atoms:
atom.property[self.BIND_PROP] = True
# make ligand molecules
self.st.deleteAtoms([metal_atom])
# handle ligand dummy atom bonding
self._bondDummies()
def _getBindingSites(self, st):
"""
Return binding sites for the given structure.
:type st: `schrodinger.structure.Structure`
:param st: the structure
:rtype: list
:return: contains atom index pair tuples, the first index is for the
atom that binds to the metal and is negated for eta-coordinated
ligands, the second index is for the atom that represents the
metal positioning relative to the ligand and is 0 for
eta-coordinated ligands
"""
sites = []
for atom in st.atom:
if not atom.property.get(self.BIND_PROP):
continue
if atom.atomic_number == self.DUMMY_ATOMIC_NUMBER:
sign, marker_idx = -1, 0
else:
for neighbor in atom.bonded_atoms:
if neighbor.atomic_number == self.DUMMY_ATOMIC_NUMBER:
sign, marker_idx = 1, neighbor.index
break
sites.append((sign * atom.index, marker_idx))
return sites
[docs] def addHydrogens(self, st):
"""
Add hydrogens to the given structure.
:type st: `schrodinger.structure.Structure`
:param st: the structure
"""
# see MATSCI-7031 - build.add_hydrogens doesn't necessarily
# add hydrogens to haptic carbanions
idxs = []
for atom in st.atom:
if atom.property.get(self.HAPTIC_ATOM_PROP) and \
atom.formal_charge == -1 and atom.atomic_number == 6:
n_bonds = sum(1 for bond in atom.bond
if bond.type != structure.BondType.Zero)
if n_bonds == 2:
idxs.append(atom.index)
for idx in idxs:
atom = st.atom[idx]
summed_bond_vec = numpy.array([0., 0., 0.])
for neighbor in atom.bonded_atoms:
summed_bond_vec += numpy.array(neighbor.xyz) - numpy.array(
atom.xyz)
summed_bond_vec = numpy.array(atom.xyz) - summed_bond_vec
h_atom = st.addAtom('H', *summed_bond_vec, 'white')
st.addBond(atom.index, h_atom.index, structure.BondType.Single)
# it is not problematic to add hydrogens to structures
# with dummy atoms
build.add_hydrogens(st)
def _getPreparedLigand(self, st):
"""
Return a prepared ligand.
:type st: `schrodinger.structure.Structure`
:param st: the ligand structure
:rtype: `schrodinger.structure.Structure`
:return: the prepared ligand structure
"""
self.addHydrogens(st)
# adopt the same strategy used in builderwidgets.ItemRow.setStructure
min_energy = numpy.inf
min_energy_struct = st
for count in range(10):
scopy = st.copy()
# break symmetry
for atom in scopy.atom:
atom.z = (my_random.random() - 0.5) / 10.
with DummiesToHydrogens(scopy):
with ioredirect.IOSilence():
try:
mizer = minimize.Minimizer(ffld_version=FFLD_VERSION,
struct=scopy,
cleanup=False)
min_res = mizer.minimize()
except ValueError:
pass
else:
if min_res.potential_energy < min_energy:
min_energy = min_res.potential_energy
min_energy_struct = scopy
return min_energy_struct
[docs] def build(self):
"""
Build the complex.
:raise ComplexSdfToMaeException: if there is an issue
:rtype: `schrodinger.structure.Structure`
:return: the complex
"""
try:
return self.sculptComplexBuild()
except ComplexSdfToMaeException as err:
if self.logger:
msg = ('Sculpt complex has failed with the following error '
'message. Trying complex builder instead.')
self.logger.info(msg)
self.logger.warning(str(err))
# it is not clear that there is any real benefit to falling back
# on complex builder but keeping the stub for now
try:
return self.buildComplexBuild()
except ComplexSdfToMaeException as err:
if self.logger:
msg = ('Complex builder has failed with the following error '
'message.')
self.logger.info(msg)
self.logger.error(str(err))
raise
[docs] def fixRingSpears(self, st):
"""
Fix ring-spears.
:type st: `schrodinger.structure.Structure`
:param st: the structure
"""
if ringspear.check_for_spears(st, return_bool=True):
st.generate3dConformation(require_stereo=False) # uses fast3d
if self.logger and ringspear.check_for_spears(st, return_bool=True):
msg = ('At least a single ring-spear has been found in the 3D '
'structure.')
self.logger.warning(msg)
[docs] def updateProperties(self, st):
"""
Update properties on the given structure.
:type st: `schrodinger.structure.Structure`
:param st: the structure
"""
for key, value in self.st.property.items():
if key.startswith(self.SD_PROP_STARTER):
st.property[key] = value
[docs] def run(self):
"""
Run.
:rtype: `schrodinger.structure.Structure`
:return: the structure
"""
self.prepare()
st = self.build()
self.fixRingSpears(st)
self.updateProperties(st)
return st
[docs] def write(self, mae_file=None):
"""
Write.
:type mae_file: str
:param mae_file: the 3D `*mae` file
:rtype: str
:return: the 3D `*mae` file
"""
mae_file = mae_file or fileutils.get_basename(self.sdf_file) + '.mae'
st = self.run()
st.write(mae_file)
return mae_file
[docs] def sculptComplexBuild(self, sites=None, geometry=None, optimize=True):
"""
Build the complex using sculpt complex.
:type sites: list or None
:param sites: contains atom index pair tuples, the first index is for the
atom that binds to the metal and is negated for eta-coordinated
ligands, the second index is for the atom that represents the
metal positioning relative to the ligand and is 0 for
eta-coordinated ligands, the order of sites in this list
determines the coordination geometry of the complex, if None
an arbitrarily ordered list of sites will be determined from
the structure
:type geometry: str or None
:param geometry: the VSEPR geometry from the buildcomplex geometry
constants, if None uses the sculpt complex convention of using
the last geometry for a given number of slots coming from the
order in buildcomplex.IDEAL_SLOTS
:type optimize: bool
:param optimize: whether to perform a standard geometry optimization
on the final sculpted complex
:raise ComplexSdfToMaeException: if there is an issue
:rtype: `schrodinger.structure.Structure`
:return: the complex
"""
st = self.st.copy()
self.addHydrogens(st)
# remove potential center markers prepared for build complex,
# site indexing doesn't need adjusting
if not sites:
sites = self._getBindingSites(st)
to_remove = [site[1] for site in sites if site[1]]
if to_remove:
st.deleteAtoms(to_remove)
# use first isomer and arbitrary site ordering for now
coordinators = ','.join(str(abs(site[0])) for site in sites)
try:
coordination_info, geometries = scu.validate_coord_flag(
coordinators)
except argparse.ArgumentTypeError as err:
raise ComplexSdfToMaeException(str(err))
if geometry:
assert geometry in geometries
else:
# the convention for sculpt complex has been to use the last
# geometry for a given number of slots coming from the order
# in buildcomplex.IDEAL_SLOTS
geometry = geometries[-1]
# del_h has implications regarding the final bond orders of metal-ligand
# bonds, for example for a metal-carbon bond, like M-CH2-phenyl, using del_h
# will produce a metal carbyne (triple bond) while not using it will not,
# using False by default for now
del_h = False
title = ''
enumerating = False
obj = scu.Sculptor(self.metal_element, coordination_info, geometry,
del_h, title, enumerating)
try:
st = obj.sculpt(st, optimize=optimize)
except scu.SculptingError as err:
raise ComplexSdfToMaeException(str(err))
return st
[docs] def buildComplexBuild(self):
"""
Build the complex using build complex.
:raise ComplexSdfToMaeException: if there is an issue
:rtype: `schrodinger.structure.Structure`
:return: the complex
"""
builder = buildcomplex.ComplexBuilder(metal=self.metal_element,
geometry=buildcomplex.TETRAHEDRAL,
homoleptic=False,
isomer=None)
total_n_sites = 0
for mol in self.st.molecule:
st = mol.extractStructure()
sites = self._getBindingSites(st)
n_sites = len(sites)
if n_sites > 2:
msg = ('Only mono- and bi-dentate ligands are supported.')
raise ComplexSdfToMaeException(msg)
total_n_sites += n_sites
if total_n_sites > COORDINATION_DICT[buildcomplex.TETRAHEDRAL]:
msg = ('Complex must have tetrahedral coordination.')
raise ComplexSdfToMaeException(msg)
st = self._getPreparedLigand(st)
if n_sites == 1:
builder.addMonodentateLigand(st, sites[0])
elif n_sites == 2:
builder.addBidentateLigand(st, sites)
try:
st = builder.createComplex(force=True)
except ValueError as err:
raise ComplexSdfToMaeException(str(err))
with ioredirect.IOSilence():
try:
buildcomplex.minimize_complex(st, forcefield=FFLD_VERSION)
except ValueError:
pass
return st
[docs] @staticmethod
def type_cast_sdf_file(sdf_file):
"""
Type cast the 2D `*sdf` file.
:type sdf_file: str
:param sdf_file: the 2D `*sdf` file
:raise argparse.ArgumentTypeError: is there is an issue
:rtype: str
:return: the 2D `*sdf` file
"""
sdf_file = parserutils.type_file(sdf_file)
try:
ComplexSdfToMae.read_sdf(sdf_file)
except ComplexSdfToMaeException as err:
raise argparse.ArgumentTypeError(str(err))
return sdf_file
[docs] def getAvailableGeometries(self, geometry=None):
"""
Return the available geometries.
:type geometry: str or None
:param geometry: the specific coordination geometry to use, if not specified
all available geometries will be considered
:raise ComplexSdfToMaeException: if there is an issue
:rtype: list[str]
:return: contains available geometries
"""
self.prepare()
sites = self._getBindingSites(self.st)
geometries = buildcomplex.SLOTS_TO_GEOMS.get(len(sites))
if not geometries:
msg = (
f'Complexes with {len(sites)} coordination sites are not supported.'
)
if self.logger:
self.logger.error(msg)
raise ComplexSdfToMaeException(msg)
if geometry:
# geometry may have been given in pretty or cli format so convert
# to pretty now
geometry = buildcomplex.get_pretty_geom_str(geometry)
if geometry not in geometries:
msg = (
f'The specified geometry of {geometry} is not available for '
f'complexes with {len(sites)} coordination sites.')
if self.logger:
self.logger.error(msg)
raise ComplexSdfToMaeException(msg)
geometries = [geometry]
return geometries
[docs] def get3DIsomers(self,
geometry=None,
out_rep=None,
epsilon=DEFAULT_EPSILON):
"""
Return 3D isomers.
:type geometry: str or None
:param geometry: the specific coordination geometry to use, if not specified
all available geometries will be considered
:type out_rep: str or None
:param out_rep: if None then the conversion is to the opposite of
the given representation, eta to centroid or centroid to eta, if
a string then must be either module constant parserutils.CENTROID
or parserutils.ETA in which case the conversion will always provide
an output representation of the given type
:type epsilon: float
:param epsilon: the energy window in kcal/mol for binning isomers
:raise ComplexSdfToMaeException: if there is an issue
:rtype: list[`schrodinger.structure.Structure`]
:return: contains 3D isomers
"""
geometries = self.getAvailableGeometries(geometry=geometry)
sites = self._getBindingSites(self.st)
# see MATSCI-10581 - strictly this function needs to not only return isomers but
# possibly also conformers, for example as in A-metal-B (180 deg.) vs A-metal-B
# (90 deg.) where A and B are monodentate ligands, the geometry list misses some
# coordinations, for example square pyramidal, for now use the following algorithm,
# for a given number of slots (N) with locations according to the given geometry,
# consider all possible permutations of the N sites onto the N slots, bin the
# energies using the epsilon window and collect the lowest energy structure
# from each bin, this is time consuming but ensures a variety of output
# structures for ligands of any denticity
f3d_engine = fast3d.SingleConformerEngine()
all_sts = []
for geometry in geometries:
geom = geometry.replace(' ', '_').lower()
sts = []
for these_sites in itertools.permutations(sites, len(sites)):
try:
st = self.sculptComplexBuild(sites=these_sites,
geometry=geometry,
optimize=True)
except ComplexSdfToMaeException:
continue
# see MATSCI-11806 where it was found that even with geometry
# optimization it is possible that scuplt complex lands us in a
# bad local minimum, for example a square pyramidal sp3 carbon,
# fast3d single conformer engine fixes that, the central metal
# atom is always the first atom
core_idxs = [1] + [
atom.index for atom in st.atom[1].bonded_atoms
]
f3d_engine.run(st, core_idxs)
self.fixRingSpears(st)
st.title = f'{self.st.title}-{geom}'
sts.append(st)
if not sts:
continue
try:
sts = analyze.get_low_energy_reps(sts, epsilon)
except ValueError:
continue
all_sts.extend(sts)
# only fail if no structures could be produced
if not all_sts:
msg = ('Sculpt complex has failed for all coordination geometries.')
if self.logger:
self.logger.error(msg)
raise ComplexSdfToMaeException(msg)
all_sts = sorted(all_sts,
key=lambda x: x.property.get(mm.OPLS_POTENTIAL_ENERGY))
for idx, st in enumerate(all_sts, 1):
st.title += f'-{idx}'
self.updateProperties(st)
tesu.toggle_structure(st, out_rep=out_rep)
return all_sts
[docs]class ComplexSdfToRxnWF(ComplexSdfToMae):
"""
Manage the 2D-to-3D `*sdf` to `*_rxnwf_mae` file conversion for a
metal complex.
"""
R_GROUP_SD_KEY_PATTERN = re.compile(r'R(\d+)(.*)') # doesn't match \n
SITE_ATOM_KEY = 'i_matsci_Reaction_Workflow_Enumeration_Atom'
REPLACE_ATOM_SD_KEY = 'Replace Indices'
SUPERPOSITION_ATOM_SD_KEY = 'Superimpose Indices'
[docs] def __init__(self,
sdf_file,
reference_rxnwf_file=None,
handle_rotamers=True,
logger=None):
"""
Create an instance.
:type sdf_file: str
:param sdf_file: the 2D `*sdf` file
:type reference_rxnwf_file: str
:param reference_rxnwf_file: the reaction workflow file containing the reference
structures, used for finding an optimal 3D geometry
:type handle_rotamers: bool
:param handle_rotamers: whether to rotate mono-dentate haptic ligands containing
superposition atoms such that they maximally overlap with the reference
reaction workflow
:type logger: logging.Logger or None
:param logger: output logger or None if there isn't one
"""
super().__init__(sdf_file, logger=logger)
self.reference_rxnwf_file = reference_rxnwf_file
self.handle_rotamers = handle_rotamers
self.rgroup_files, sites, replace_idxs, superposition_idxs = \
self.parse_novel_sdf_file(sdf_file)
# mark site atoms and collect finalized data after the complex
# build so as to avoid index tracking
for site_idx, rgroup_idx in sites:
atom = self.st.atom[site_idx]
atom.property[self.SITE_ATOM_KEY] = rgroup_idx
for atom in self.st.atom:
if atom.index not in replace_idxs:
atom.property[rxnwfu.KEEP_ATOM_KEY] = True
for idx, superposition_idx in enumerate(superposition_idxs, 1):
atom = self.st.atom[superposition_idx]
atom.property[rxnwfu.SUPERPOSITION_ATOM_KEY] = idx
[docs] @staticmethod
def parse_novel_sdf_file(sdf_file):
"""
Parse the novel 2D `*sdf` file.
:type sdf_file: str
:param sdf_file: the novel 2D `*sdf` file
:return: list, list, list, list
:return: (1) contains R-group file data as [R-group file, R-group index]
pairs, (2) contains site data as [site "to" index, R-group index] pairs,
(3) contains replace indicies, (4) contains superposition indices
"""
st = ComplexSdfToMae.read_sdf(sdf_file)
sd_prop_starter = ComplexSdfToMae.SD_PROP_STARTER
r_group_sd_key_pattern = ComplexSdfToRxnWF.R_GROUP_SD_KEY_PATTERN
replace_atom_sd_key = ComplexSdfToRxnWF.REPLACE_ATOM_SD_KEY.replace(
' ', '_')
superposition_atom_sd_key = ComplexSdfToRxnWF.SUPERPOSITION_ATOM_SD_KEY.replace(
' ', '_')
rgroup_files = []
sites = []
replace_idxs = []
superposition_idxs = []
for key, value in st.property.items():
if not key.startswith(sd_prop_starter):
continue
key = key.replace(sd_prop_starter, '')
match = re.match(r_group_sd_key_pattern, key)
if match:
rgroup_idx = int(match.group(1))
if not match.group(2):
rgroup_files.append([value.strip(), rgroup_idx])
continue
else:
rgroup_idx = None
try:
idxs = list(map(int, value.replace(' ', '').split(',')))
except ValueError:
continue
if rgroup_idx is not None:
sites.extend([[idx, rgroup_idx] for idx in idxs])
elif key.startswith(replace_atom_sd_key):
replace_idxs.extend(idxs)
elif key.startswith(superposition_atom_sd_key):
superposition_idxs.extend(idxs)
return rgroup_files, sites, replace_idxs, superposition_idxs
[docs] @staticmethod
def type_cast_novel_sdf_file(sdf_file):
"""
Type cast the novel 2D `*sdf` file.
:type sdf_file: str
:param sdf_file: the novel 2D `*sdf` file
:raise argparse.ArgumentTypeError: is there is an issue
:rtype: str
:return: the novel 2D `*sdf` file
"""
sdf_file = parserutils.type_file(sdf_file)
replace_atom_sd_key = ComplexSdfToRxnWF.REPLACE_ATOM_SD_KEY
superposition_atom_sd_key = ComplexSdfToRxnWF.SUPERPOSITION_ATOM_SD_KEY
try:
rgroup_files, sites, replace_idxs, superposition_idxs = \
ComplexSdfToRxnWF.parse_novel_sdf_file(sdf_file)
except ComplexSdfToMaeException as err:
raise argparse.ArgumentTypeError(str(err))
for rgroup_file in rgroup_files:
parserutils.type_file(rgroup_file[0])
if not replace_idxs or not superposition_idxs:
msg = (f'Input {sdf_file} is missing at least one of the required '
f'meta-data: "{replace_atom_sd_key}" and '
f'"{superposition_atom_sd_key}".')
raise argparse.ArgumentTypeError(msg)
if len(superposition_idxs) < 3:
msg = (
f'The "{superposition_atom_sd_key}" meta-data must define at '
'least 3 indices.')
raise argparse.ArgumentTypeError(msg)
return sdf_file
def _defineMetalAttrs(self, idx):
"""
Define metal attributes.
:type idx: int
:param idx: the index
"""
super()._defineMetalAttrs(idx)
metal_atom = self.st.atom[idx]
self.metal_is_keep_atom = metal_atom.property.get(rxnwfu.KEEP_ATOM_KEY)
self.metal_superposition_idx = metal_atom.property.get(
rxnwfu.SUPERPOSITION_ATOM_KEY)
[docs] def updateProperties(self, st):
"""
Update properties on the given structure.
:type st: `schrodinger.structure.Structure`
:param st: the structure
"""
st.property[rxnwfu.REACTION_WF_STRUCTURE_KEY] = True
st.property[rxnwfu.CONFORMERS_GROUP_KEY] = 'conformer_group_1'
st.property[rxnwfu.SIBLING_GROUP_KEY] = 'sibling_group_1'
metal_atom = st.atom[1]
if self.metal_is_keep_atom:
metal_atom.property[rxnwfu.KEEP_ATOM_KEY] = True
if self.metal_superposition_idx:
metal_atom.property[
rxnwfu.SUPERPOSITION_ATOM_KEY] = self.metal_superposition_idx
for atom in st.atom:
if atom.property.get(rxnwfu.KEEP_ATOM_KEY):
for neighbor in atom.bonded_atoms:
if neighbor.atomic_number == 1:
neighbor.property[rxnwfu.KEEP_ATOM_KEY] = True
[docs] def buildBestRotamer(self, nov_st):
"""
Build the best rotamer:
:type nov_st: `schrodinger.structure.Structure`
:param nov_st: the novel structure
:raise ComplexSdfToMaeException: if there is an issue
"""
if not self.reference_rxnwf_file:
return
# get a representative reference structure that has superposition
# atoms
for ref_st in structure.StructureReader(self.reference_rxnwf_file):
ref_all_super_idxs = sfu.get_superposition_idxs(ref_st)
if ref_all_super_idxs:
break
else:
return
nov_all_super_idxs = sfu.get_superposition_idxs(nov_st)
if not nov_all_super_idxs:
return
alen = len(nov_all_super_idxs)
if alen != len(ref_all_super_idxs) or alen < 3:
msg = ('The number of novel and reference superposition '
'atoms must be equivalent and >= 3.')
raise ComplexSdfToMaeException(msg)
# make ligand molecules
tmp_nov_st = nov_st.copy()
tmp_nov_st.deleteAtoms([1])
# collect ligand structures that need rotation
mols = []
for mol in tmp_nov_st.molecule:
st = mol.extractStructure()
haptic_idxs = [
atom.index
for atom in st.atom
if atom.property.get(self.HAPTIC_ATOM_PROP)
]
haptic_idx_groups = analyze.group_by_connectivity(st, haptic_idxs)
if len(haptic_idx_groups) == 1:
if set(haptic_idx_groups[0]).intersection(
sfu.get_superposition_idxs(st)):
mols.append(mol)
if not mols:
return
# get rough starting position of the entire novel structure by superposing
# on the metal atom
rmsd.superimpose(ref_st, [1],
nov_st, [1],
use_symmetry=False,
move_which=rmsd.CT)
nov_to_ref_idx_map = dict(zip(nov_all_super_idxs, ref_all_super_idxs))
# if a mono-dentate ligand molecule has haptic superposition atoms then
# superpose the ligand molecule in the novel structure onto the reference
# structure using all 1, 2, or >= 3 novel-reference superposition pairs
# FIXME - a single atom needs special handling
for mol in mols:
st = mol.extractStructure()
# get the map from ligand indices to original novel complex indices
# (including the metal that was removed)
amap = dict(
zip(range(1, st.atom_total + 1),
[i + 1 for i in mol.getAtomIndices()]))
nov_super_idxs = [amap[i] for i in sfu.get_superposition_idxs(st)]
ref_super_idxs = [nov_to_ref_idx_map[i] for i in nov_super_idxs]
# rmsd.MOLECULES includes zero-order bonds so temporarily remove
# metal-ligand bonds
with RemoveMetalBonds(nov_st):
rmsd.superimpose(ref_st,
ref_super_idxs,
nov_st,
nov_super_idxs,
use_symmetry=False,
move_which=rmsd.MOLECULES)
# raise an error if the rmsd over all superposition atoms
# is too large, for example perhaps due to a bad choice of
# reference rxnwf file, etc.
tmp_nov_st = nov_st.copy()
armsd = rmsd.superimpose(ref_st,
ref_all_super_idxs,
tmp_nov_st,
nov_all_super_idxs,
use_symmetry=False,
move_which=rmsd.CT)
thresh = 0.5 # Angstrom
if armsd > thresh:
msg = ('During the 2D-to-3D *sdf to `*mae` conversion '
'mono-dentate haptic ligands in the novel structure '
'have been rotated so that their superposition atoms '
'maximally overlap with those from the given reference '
'reaction workflow file. However the final rmsd over '
f'all superposition atoms remains larger than {thresh} '
'indicating a bad choice of reference reaction workflow.')
raise ComplexSdfToMaeException(msg)
[docs] def run(self):
"""
Run.
:rtype: `schrodinger.structure.Structure`
:return: the structure
"""
self.prepare()
st = self.build()
self.updateProperties(st)
if self.handle_rotamers:
self.buildBestRotamer(st)
self.fixRingSpears(st)
return st
[docs] def write(self, rxnwf_file=None):
"""
Write.
:type rxnwf_file: str
:param rxnwf_file: the 3D `*_rxnwf.mae` file
:rtype: str, list
:return: the 3D `*_rxnwf.mae` file, contains for each site a list of
data `[from_idx, to_idx, rgroup_idx]`
"""
rxnwf_file = rxnwf_file or fileutils.get_basename(self.sdf_file) + \
rxnwfu.REACTION_WF_TAG + '.mae'
st = self.run()
sites = []
for atom in st.atom:
rgroup_idx = atom.property.get(self.SITE_ATOM_KEY)
if rgroup_idx is not None:
for neighbor in atom.bonded_atoms:
if neighbor.atomic_number != 1:
sites.append([neighbor.index, atom.index, rgroup_idx])
break
st.write(rxnwf_file)
return rxnwf_file, sites
[docs]def convert_sdf_to_rxnwf(sdf_file,
reference_rxnwf_file,
handle_rotamers=True,
output_rxnwf_file_name=None,
logger=None):
"""
Convert the given single structure 2D `*sdf` file to a 3D `*_rxnwf.mae`
file.
:type sdf_file: str
:param sdf_file: the `*sdf` file
:type reference_rxnwf_file: str
:param reference_rxnwf_file: the reaction workflow file containing the reference
structures, used for finding an optimal 3D geometry
:type handle_rotamers: bool
:param handle_rotamers: whether to rotate mono-dentate haptic ligands containing
superposition atoms such that they maximally overlap with the reference
reaction workflow
:type output_rxnwf_file_name: str
:param output_rxnwf_file_name: the output reaction workflow file name
:type logger: logging.Logger or None
:param logger: output logger or None if there isn't one
:rtype: str, list, list
:return: (1) the 3D `*_rxnwf.mae` file, (2) contains R-group file data as
[R-group file, R-group index] pairs, (3) contains for each site a list of
data [from_idx, to_idx, rgroup_idx]
"""
obj = ComplexSdfToRxnWF(sdf_file,
reference_rxnwf_file=reference_rxnwf_file,
handle_rotamers=handle_rotamers,
logger=logger)
rxnwf_file, sites = obj.write(rxnwf_file=output_rxnwf_file_name)
jobutils.add_outfile_to_backend(rxnwf_file, wait=False)
return rxnwf_file, obj.rgroup_files, sites
[docs]def get_3d_isomers(sdf_file,
geometry=None,
out_rep=None,
epsilon=DEFAULT_EPSILON,
logger=None):
"""
Return 3D isomers for the given 2D `*sdf` file of a transition metal complex.
:type sdf_file: str
:param sdf_file: the `*sdf` file
:type geometry: str or None
:param geometry: the specific coordination geometry to use, if not specified
all available geometries will be considered
:type out_rep: str or None
:param out_rep: if None then the conversion is to the opposite of
the given representation, eta to centroid or centroid to eta, if
a string then must be either module constant parserutils.CENTROID
or parserutils.ETA in which case the conversion will always provide
an output representation of the given type
:type epsilon: float
:param epsilon: the energy window in kcal/mol for binning isomers
:type logger: logging.Logger or None
:param logger: output logger or None if there isn't one
:rtype: list[`schrodinger.structure.Structure`]
:return: contains 3D isomers
"""
obj = ComplexSdfToMae(sdf_file, logger=logger)
return obj.get3DIsomers(geometry=geometry, out_rep=out_rep, epsilon=epsilon)