Source code for schrodinger.application.scaffold_enumeration.atomlist
'''
Implements "atom list" enumeration (see ENUM-246).
'''
import collections
import re
import rdkit.Chem
from schrodinger.utils import log
from . import common
logger = log.get_output_logger(__name__)
#------------------------------------------------------------------------------#
AtomListInfo = collections.namedtuple(
   'AtomListInfo', [
       'atom',    # index of the atom with varying atomic number
       'elements' # list of the atomic numbers to be enumerated
]) # yapf: disable
cml_mrvQueryProps = common.CML_PROP_PREFIX + 'mrvQueryProps'
#------------------------------------------------------------------------------#
def _validate_elements(elements):
    '''
    Converts chemical element symbols into the corresponding
    atomic numbers and uniquifies them.
    :param elements: List of elements.
    :type text: list(str)
    :return: List of atomic numbers.
    :rtype: list(int)
    '''
    pt = rdkit.Chem.GetPeriodicTable()
    atomic_numbers = set()
    for e in elements:
        try:
            atomic_numbers.add(pt.GetAtomicNumber(e))
        except RuntimeError:
            logger.warning('could not figure out atomic number for "%s"', e)
    return sorted(atomic_numbers)
#------------------------------------------------------------------------------#
def _parse_mrv_atom_list(text):
    '''
    Parses "atom list" data from MRV query text. For example, for
    elements C, O and N, the query reads "L,C,O,N:".
    :param text: Text to parse.
    :type text: str
    :return: List of chemical elements.
    :rtype: list(str)
    '''
    elements = []
    for m in re.findall(r'L((?:,[^:,L]+)+):', text):
        for e in m.split(','):
            if e:
                elements.append(e)
    return elements
#------------------------------------------------------------------------------#
def _collect_atom_lists(mol):
    '''
    Collects "atom list" data (ENUM-246).
    :param mol: Molecule.
    :type mol: rdkit.Chem.Mol
    :return: List of `AtomListInfo` instances.
    :rtype: list(AtomListInfo)
    '''
    atomlists = []
    for atom in mol.GetAtoms():
        elements = _validate_elements(get_atom_elements(atom))
        if elements:
            atomlists.append(AtomListInfo(atom.GetIdx(), elements))
    return atomlists
#------------------------------------------------------------------------------#
[docs]def get_atom_elements(atom):
    '''
    Returns atom list associated with the `atom`.
    :param atom: RDKit atom.
    :type atom: `rdkit.Chem.Atom`
    :return: List of elements.
    :rtype: list(str)
    '''
    try:
        return _parse_mrv_atom_list(atom.GetProp(cml_mrvQueryProps))
    except KeyError:
        return [] 
#------------------------------------------------------------------------------#
[docs]def set_atom_elements(atom, elements):
    '''
    Makes `atom` into an atom list (in ENUM-246 sense).
    :param atom: RDKit atom.
    :type atom: `rdkit.Chem.Atom`
    :param elements: Iterable over elements.
    :type elements: iterable over str
    '''
    if not elements:
        raise RuntimeError("no elements")
    try:
        orig = atom.GetProp(cml_mrvQueryProps)
    except KeyError:
        orig = ''
    atom.SetProp(cml_mrvQueryProps,
                 orig + ','.join(['L'] + list(elements)) + ':') 
#------------------------------------------------------------------------------#
[docs]class AtomListEnumerable(common.EnumerableMixin):
[docs]    def __init__(self, mol):
        '''
        :param mol: RDKit molecule.
        :type mol: ROMol
        '''
        self.atomlists = _collect_atom_lists(mol)
        self.mol = mol 
[docs]    def getExtents(self):
        return [len(a.elements) for a in self.atomlists] 
[docs]    def getRealization(self, idx):
        '''
        :param idx: "Index" of a realization.
        :type idx: iterable over int
        :return: RDKit molecule without enumerable "atom lists".
        :rtype: rdkit.Chem.Mol
        '''
        if self.atomlists:
            outcome = rdkit.Chem.Mol(self.mol)
            for (i, alist) in zip(idx, self.atomlists):
                atom = outcome.GetAtomWithIdx(alist.atom)
                atom.ClearProp(cml_mrvQueryProps)
                atom.SetAtomicNum(alist.elements[i])
            return outcome
        else:
            return self.mol  
#------------------------------------------------------------------------------#