Source code for schrodinger.application.scaffold_enumeration.mdl
import contextlib
import itertools
import re
from rdkit import Chem
from . import atomlist
from . import common
from . import cxsmiles
from . import markush
def _translate_mdl_atomlist_atoms(mol):
'''
Replaces MDL "atom list" query atoms with regular (non-query) atoms.
:param mol: Molecule to be massaged.
:type mol: `rdkit.Chem.rdchem.RWMol`
'''
pt = Chem.GetPeriodicTable()
regular_atoms = []
for atom in mol.GetAtoms():
if atom.HasQuery():
smarts = atom.GetSmarts()
if re.match(r'\[(#\d+,)*#\d+\]', smarts):
elements = [
int(match.group(1))
for match in re.finditer(r'#(\d+)[,\]]', smarts)
]
regular_atom = Chem.Atom(elements[0])
atomlist.set_atom_elements(regular_atom,
map(pt.GetElementSymbol, elements))
regular_atoms.append((atom.GetIdx(), regular_atom))
for (idx, atom) in regular_atoms:
mol.ReplaceAtom(idx, atom)
def _collect_posvar_bonds(mol):
'''
Collect "multi-center groups" (for the "position variation bonds")
assuming that `mol` originated from MDL format. Replace groups "center"
query atoms with regular atoms.
:param mol: Molecule to be processed.
:type mol: `rdkit.Chem.rdchem.RWMol`
:return: List of named tuples that describe the "multi-center groups"
recognized within `mol`.
:rtype: list(.cxsmiles.MCG)
'''
outcome = []
# position-variant bonds
for bond in mol.GetBonds():
try:
text = bond.GetProp('_MolFileBondEndPts')
except KeyError:
continue
if not re.match(r'\((\d+\s+)+\d+\)', text):
continue
# identify the "center" (dummy) atom
for atom in (bond.GetBeginAtom(), bond.GetEndAtom()):
if atom.HasQuery() and atom.GetSmarts() == '*':
dummy = atom
break
else:
continue
atoms = [int(w) - 1 for w in text[1:-1].split()]
outcome.append(cxsmiles.MCG(center=dummy.GetIdx(), atoms=atoms[1:]))
mol.ReplaceAtom(dummy.GetIdx(), Chem.Atom(0))
return outcome
def _collect_repeating_units(mol):
'''
Collect "repeating units" from the "substance groups" associated with `mol`.
Side effect: removes substance groups from the `mol`.
:param mol: Molecule to be inspected.
:type mol: `rdkit.Chem.rdchem.RWMol`
:return: List of named tuples that describe the "repeating units".
:rtype: list(.cxsmiles.SRU)
'''
outcome = []
for group in Chem.GetMolSubstanceGroups(mol):
try:
if group.GetProp('TYPE') != 'SRU':
continue
subscript = group.GetProp('LABEL')
superscript = group.GetProp('CONNECT').lower()
except KeyError:
continue
outcome.append(
cxsmiles.SRU(atoms=list(group.GetAtoms()),
subscript=subscript,
superscript=superscript))
Chem.ClearMolSubstanceGroups(mol)
return outcome
[docs]def translate_mdl_enumerable_features(mol, prop_prefix=common.CML_PROP_PREFIX):
'''
Translates metadata that pertains to the "enumerable features"
from the convetions assumed by the RDKit SDMolSupplier to the
form expected by this package.
:param mol: Molecule with enumerable features in MDL "language".
:type mol: `rdkit.Chem.rdchem.ROMol`
:return: Adapted molecule.
:rtype: `rdkit.Chm.rdchem.RWMol`
'''
out = Chem.RWMol(mol)
# R-labels
markush.canonicalize_R_labels(out)
rlabels = markush.get_rlabels_map(out)
# convert R-group placeholders into the regular (non-query) atoms
for idx in itertools.chain.from_iterable(rlabels.values()):
atom = Chem.Atom(0)
out.ReplaceAtom(idx, atom, preserveProps=True)
# "atom lists", "position variation bonds" and "repeating units"
# https://docs.chemaxon.com/display/docs/markush-features.md
_translate_mdl_atomlist_atoms(out)
mcgs = _collect_posvar_bonds(out)
srus = _collect_repeating_units(out)
# assign IDs to bonds/atoms
id_prop = prop_prefix + 'id'
for atom in out.GetAtoms():
atom.SetProp(id_prop, f'a{atom.GetIdx() + 1}')
for bond in out.GetBonds():
bond.SetProp(id_prop, f'b{bond.GetIdx() + 1}')
if mcgs or srus:
out.SetProp(prop_prefix + 'sgroups',
cxsmiles._mcgs_and_srus_as_rdcml_json(out, mcgs, srus))
return out.GetMol()
[docs]class MdlFileReader(contextlib.AbstractContextManager):
[docs] def __init__(self, filename, prop_prefix=common.CML_PROP_PREFIX):
self._supplier = Chem.SDMolSupplier(filename)
self._prop_prefix = prop_prefix
def __enter__(self):
return self
def __exit__(self, *exc_details):
return None
def __iter__(self):
return self
def __next__(self):
while True:
if raw := next(self._supplier):
break
return translate_mdl_enumerable_features(raw, self._prop_prefix)