Source code for schrodinger.application.scaffold_enumeration.common
import itertools
import json
CML_PROP_PREFIX = 'cml_'
CML_ID_PROP = CML_PROP_PREFIX + 'id'
CML_SGROUPS_PROP = CML_PROP_PREFIX + 'sgroups'
CML_RGROUP_REF_PROP = CML_PROP_PREFIX + 'rgroupRef'
CML_ATTACHMENT_ORDER_PROP = CML_PROP_PREFIX + 'attachmentOrder'
#------------------------------------------------------------------------------#
[docs]def get_atom_id(atom):
'''
Returns atom ID obtained either from the CML ID property or
derived from the atom index in case the ID property is not available.
:param atom: Atom.
:type atom: rdkit.Chem.Atom
:return: Atom ID.
:rtype: str
'''
if atom.HasProp(CML_ID_PROP):
return atom.GetProp(CML_ID_PROP)
else:
return f'a{atom.GetIdx() + 1}'
#------------------------------------------------------------------------------#
[docs]def get_atom_id_map(mol):
'''
Returns a dictionary that maps "atom IDs" (obtained via `get_atom_id()`)
onto atom indices.
:param mol: Molecule.
:type mol: rdkit.Chem.Mol
:return: Map between atom IDs and their indices in `mol`.
:rtype: dict(str, int)
'''
outcome = dict()
for atom in mol.GetAtoms():
label = get_atom_id(atom)
if label in outcome:
raise RuntimeError(f'same ID ({label}) shared by several atoms')
else:
outcome[label] = atom.GetIdx()
return outcome
#------------------------------------------------------------------------------#
[docs]def get_bond_id(bond):
'''
Returns bond ID obtained either from the CML ID property or
derived from the bond index in case the ID property is not available.
:param bond: Bond.
:type bond: rdkit.Chem.Bond
:return: Bond ID.
:rtype: str
'''
if bond.HasProp(CML_ID_PROP):
return bond.GetProp(CML_ID_PROP)
else:
return f'b{bond.GetIdx()}'
#------------------------------------------------------------------------------#
[docs]def get_sgroups(mol):
'''
Returns list of dictionaries that represent "S-groups" from CML
input. CML reader stores this data as a molecule-level property
in JSON format.
:param mol: Molecule.
:type mol: rdkit.Chem.Mol
:return: List of dictionaries that contain "S-groups" as captured
from CML input (XML attributes stored as key/value pairs).
:rtype: list(dict(str, str))
'''
try:
return json.loads(mol.GetProp(CML_SGROUPS_PROP))
except KeyError:
return []
#------------------------------------------------------------------------------#
[docs]def set_sgroups(mol, sgroups):
'''
Serializes list of dictionaries `sgroups` as JSON and stores the
outcome as a molecular-level property (CML_SGROUPS_PROP).
:param mol: Molecule.
:type mol: rdkit.Chem.Mol
:param sgroups: List of dictionaries that meant to represent "S-groups".
:type sgroups: list(dict(str, str))
'''
mol.SetProp(CML_SGROUPS_PROP, json.dumps(sgroups))
#------------------------------------------------------------------------------#
[docs]def product_of_ranges(extents):
'''
Returns iterator over Cartesian product of ranges.
:param extents: Iterable over the range extents. For example,
if extents is (3, 8), iterator domain is going to be
[0, 3) x [0, 8).
:type extents: iterable over positive int
:return: Iterator over tuples of integers.
'''
ranges = (range(n) for n in extents)
return itertools.product(*ranges)
#------------------------------------------------------------------------------#
[docs]class EnumerableMixin:
'''
Methods common to several "enumerables".
'''
[docs] def getIter(self):
'''
Returns iterator over realizations.
:return: Iterator over `getRealization()` returns.
:rtype: iterator
'''
iterable = product_of_ranges(extents=self.getExtents())
return map(lambda i: self.getRealization(i), iterable)
[docs] def getRandomRealization(self, prng):
'''
Returns random realization.
:param prng: mt19937 pseudorandom number generator from numpy.
:type prng: `numpy.random.RandomState`
'''
return self.getRealization(
[prng.randint(0, e) for e in self.getExtents()])
#------------------------------------------------------------------------------#