Source code for schrodinger.application.scaffold_enumeration.atomlist
'''
Implements "atom list" enumeration (see ENUM-246).
'''
import collections
import re
import rdkit.Chem
from schrodinger.utils import log
from . import common
logger = log.get_output_logger(__name__)
#------------------------------------------------------------------------------#
AtomListInfo = collections.namedtuple(
'AtomListInfo', [
'atom', # index of the atom with varying atomic number
'elements' # list of the atomic numbers to be enumerated
]) # yapf: disable
cml_mrvQueryProps = common.CML_PROP_PREFIX + 'mrvQueryProps'
#------------------------------------------------------------------------------#
def _validate_elements(elements):
'''
Converts chemical element symbols into the corresponding
atomic numbers and uniquifies them.
:param elements: List of elements.
:type text: list(str)
:return: List of atomic numbers.
:rtype: list(int)
'''
pt = rdkit.Chem.GetPeriodicTable()
atomic_numbers = set()
for e in elements:
try:
atomic_numbers.add(pt.GetAtomicNumber(e))
except RuntimeError:
logger.warning('could not figure out atomic number for "%s"', e)
return sorted(atomic_numbers)
#------------------------------------------------------------------------------#
def _parse_mrv_atom_list(text):
'''
Parses "atom list" data from MRV query text. For example, for
elements C, O and N, the query reads "L,C,O,N:".
:param text: Text to parse.
:type text: str
:return: List of chemical elements.
:rtype: list(str)
'''
elements = []
for m in re.findall(r'L((?:,[^:,L]+)+):', text):
for e in m.split(','):
if e:
elements.append(e)
return elements
#------------------------------------------------------------------------------#
def _collect_atom_lists(mol):
'''
Collects "atom list" data (ENUM-246).
:param mol: Molecule.
:type mol: rdkit.Chem.Mol
:return: List of `AtomListInfo` instances.
:rtype: list(AtomListInfo)
'''
atomlists = []
for atom in mol.GetAtoms():
elements = _validate_elements(get_atom_elements(atom))
if elements:
atomlists.append(AtomListInfo(atom.GetIdx(), elements))
return atomlists
#------------------------------------------------------------------------------#
[docs]def get_atom_elements(atom):
'''
Returns atom list associated with the `atom`.
:param atom: RDKit atom.
:type atom: `rdkit.Chem.Atom`
:return: List of elements.
:rtype: list(str)
'''
try:
return _parse_mrv_atom_list(atom.GetProp(cml_mrvQueryProps))
except KeyError:
return []
#------------------------------------------------------------------------------#
[docs]def set_atom_elements(atom, elements):
'''
Makes `atom` into an atom list (in ENUM-246 sense).
:param atom: RDKit atom.
:type atom: `rdkit.Chem.Atom`
:param elements: Iterable over elements.
:type elements: iterable over str
'''
if not elements:
raise RuntimeError("no elements")
try:
orig = atom.GetProp(cml_mrvQueryProps)
except KeyError:
orig = ''
atom.SetProp(cml_mrvQueryProps,
orig + ','.join(['L'] + list(elements)) + ':')
#------------------------------------------------------------------------------#
[docs]class AtomListEnumerable(common.EnumerableMixin):
[docs] def __init__(self, mol):
'''
:param mol: RDKit molecule.
:type mol: ROMol
'''
self.atomlists = _collect_atom_lists(mol)
self.mol = mol
[docs] def getExtents(self):
return [len(a.elements) for a in self.atomlists]
[docs] def getRealization(self, idx):
'''
:param idx: "Index" of a realization.
:type idx: iterable over int
:return: RDKit molecule without enumerable "atom lists".
:rtype: rdkit.Chem.Mol
'''
if self.atomlists:
outcome = rdkit.Chem.Mol(self.mol)
for (i, alist) in zip(idx, self.atomlists):
atom = outcome.GetAtomWithIdx(alist.atom)
atom.ClearProp(cml_mrvQueryProps)
atom.SetAtomicNum(alist.elements[i])
return outcome
else:
return self.mol
#------------------------------------------------------------------------------#