Source code for schrodinger.application.scaffold_enumeration.common
import itertools
import json
CML_PROP_PREFIX = 'cml_'
CML_ID_PROP = CML_PROP_PREFIX + 'id'
CML_SGROUPS_PROP = CML_PROP_PREFIX + 'sgroups'
CML_RGROUP_REF_PROP = CML_PROP_PREFIX + 'rgroupRef'
CML_ATTACHMENT_ORDER_PROP = CML_PROP_PREFIX + 'attachmentOrder'
#------------------------------------------------------------------------------#
[docs]def get_atom_id(atom):
    '''
    Returns atom ID obtained either from the CML ID property or
    derived from the atom index in case the ID property is not available.
    :param atom: Atom.
    :type atom: rdkit.Chem.Atom
    :return: Atom ID.
    :rtype: str
    '''
    if atom.HasProp(CML_ID_PROP):
        return atom.GetProp(CML_ID_PROP)
    else:
        return f'a{atom.GetIdx() + 1}' 
#------------------------------------------------------------------------------#
[docs]def get_atom_id_map(mol):
    '''
    Returns a dictionary that maps "atom IDs" (obtained via `get_atom_id()`)
    onto atom indices.
    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol
    :return: Map between atom IDs and their indices in `mol`.
    :rtype: dict(str, int)
    '''
    outcome = dict()
    for atom in mol.GetAtoms():
        label = get_atom_id(atom)
        if label in outcome:
            raise RuntimeError(f'same ID ({label}) shared by several atoms')
        else:
            outcome[label] = atom.GetIdx()
    return outcome 
#------------------------------------------------------------------------------#
[docs]def get_bond_id(bond):
    '''
    Returns bond ID obtained either from the CML ID property or
    derived from the bond index in case the ID property is not available.
    :param bond: Bond.
    :type bond: rdkit.Chem.Bond
    :return: Bond ID.
    :rtype: str
    '''
    if bond.HasProp(CML_ID_PROP):
        return bond.GetProp(CML_ID_PROP)
    else:
        return f'b{bond.GetIdx()}' 
#------------------------------------------------------------------------------#
[docs]def get_sgroups(mol):
    '''
    Returns list of dictionaries that represent "S-groups" from CML
    input. CML reader stores this data as a molecule-level property
    in JSON format.
    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol
    :return: List of dictionaries that contain "S-groups" as captured
        from CML input (XML attributes stored as key/value pairs).
    :rtype: list(dict(str, str))
    '''
    try:
        return json.loads(mol.GetProp(CML_SGROUPS_PROP))
    except KeyError:
        return [] 
#------------------------------------------------------------------------------#
[docs]def set_sgroups(mol, sgroups):
    '''
    Serializes list of dictionaries `sgroups` as JSON and stores the
    outcome as a molecular-level property (CML_SGROUPS_PROP).
    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol
    :param sgroups: List of dictionaries that meant to represent "S-groups".
    :type sgroups: list(dict(str, str))
    '''
    mol.SetProp(CML_SGROUPS_PROP, json.dumps(sgroups)) 
#------------------------------------------------------------------------------#
[docs]def product_of_ranges(extents):
    '''
    Returns iterator over Cartesian product of ranges.
    :param extents: Iterable over the range extents. For example,
        if extents is (3, 8), iterator domain is going to be
        [0, 3) x [0, 8).
    :type extents: iterable over positive int
    :return: Iterator over tuples of integers.
    '''
    ranges = (range(n) for n in extents)
    return itertools.product(*ranges) 
#------------------------------------------------------------------------------#
[docs]class EnumerableMixin:
    '''
    Methods common to several "enumerables".
    '''
[docs]    def getIter(self):
        '''
        Returns iterator over realizations.
        :return: Iterator over `getRealization()` returns.
        :rtype: iterator
        '''
        iterable = product_of_ranges(extents=self.getExtents())
        return map(lambda i: self.getRealization(i), iterable) 
[docs]    def getRandomRealization(self, prng):
        '''
        Returns random realization.
        :param prng: mt19937 pseudorandom number generator from numpy.
        :type prng: `numpy.random.RandomState`
        '''
        return self.getRealization(
            [prng.randint(0, e) for e in self.getExtents()])  
#------------------------------------------------------------------------------#