Source code for schrodinger.rdkit.coarsegrain
"""
Conversions between Coarse-grained Schrodinger structure objects (mmct) and
RDKit mol objects.
Please see schrodinger.rdkit_adapter for structural/philosophic differences
between these two formats
"""
from collections import Counter
from rdkit import Chem
from schrodinger.infra import mm
from schrodinger.application.matsci import msutils
from schrodinger.thirdparty import rdkit_adapter
# Particle names that are forbidden to be used as CG names if to convert to rdkit
PROTECTED_PATTERN_BIT = ['D', 'R', 'r', 'v', 'x', 'X', 'H']
# Global variable, see get_proxy_periodic_table
_PROXY_PERIODIC_TABLE = None
[docs]def get_proxy_periodic_table():
"""
Get proxy periodic table.
:rtype: list[str]
:return: Cached list of elements, do not modify!!
"""
global _PROXY_PERIODIC_TABLE
if _PROXY_PERIODIC_TABLE is None:
periodic_table = Chem.rdchem.GetPeriodicTable()
_PROXY_PERIODIC_TABLE = [
periodic_table.GetElementSymbol(atomic_num)
for atomic_num in range(1, 118)
if periodic_table.GetDefaultValence(atomic_num) == -1
]
return _PROXY_PERIODIC_TABLE
# Each CG particle name is mapped to an element name. These atoms are used to
# generate RDKIT mol for CG system.
[docs]def get_cgparticle_to_element_mapper(cg_particle_names):
"""
Create a mapper between coarse-grain particle name and an element.
:type cg_particle_names: list
:param cg_particle_names: list of CG particle names
:rtype: dict
:return: dict with CG particle name as key and element name as value
"""
# Can't use sets here, need to keep the order to be reproducible.
# Keep only unique, still keep the order
cg_particle_names = [
name for name, count in Counter(cg_particle_names).items() if count == 1
]
protected_set = set(cg_particle_names).intersection(PROTECTED_PATTERN_BIT)
if protected_set:
raise RuntimeError(
'Coarse-grained structures containing protected names, such as '
f'{sorted(protected_set)}, are not supported.')
proxy_element = get_proxy_periodic_table()
# remove CG bead names from the proxy element due to possible conflict
# between two names.
proxy_element = [
name for name in proxy_element if name not in cg_particle_names
]
if len(cg_particle_names) > len(proxy_element):
raise RuntimeError(
f"Cannot have more than {len(proxy_element)} unique particle names")
# Create a mapper between schrodinger CG particle name and new element
# name for rdkit mol
mapper = dict(zip(cg_particle_names, proxy_element))
return mapper
[docs]def prepare_cg_for_rdkit(struct):
"""
Create fake AA from a CG structure that can be converted to RDKIT mol.
:param schrodinger.structure.Structure struct: Input CG structure
:rtype: schrodinger.structure.Structure, dict
:return: Fake AA structure and internal mapping dict between schrodinger
particle name and rdkit proxy element name
"""
struct = struct.copy()
msutils.remove_atom_property(struct,
prop=mm.MMCT_ATOM_PROPERTY_COARSE_GRAIN)
particle_name = sorted(set(atom.name for atom in struct.atom))
proxy_element_mapper = get_cgparticle_to_element_mapper(particle_name)
for atom in struct.atom:
atom.element = proxy_element_mapper[atom.name]
return struct, proxy_element_mapper
def _coarsegrain_st_to_rdkit(st):
"""
Create RDKIT mol object from a coarse-grained structure
:type st: `schrodinger.structure.Structure`
:param st: structure
:raise: rdkit_adapter.adapter.InconsistentStructureError
:rtype: `rdkit.Mol`, dict
:return: rdkit molecule and internal mapping dict between schrodinger
particle name and rdkit proxy element name
"""
# an atomistic structure, raise error
if not msutils.is_coarse_grain(st, by_atom=True):
raise rdkit_adapter.adapter.InconsistentStructureError(
"_coarsegrain_st_to_rdk_mol only supports coarse-grained "
"structures. Please see rdkit_adapter.to_rdkit function")
st, proxy_element_mapper = prepare_cg_for_rdkit(st)
# include_properties and include_coordinates are set to false in
# adapter.evaluate_smarts, match here. If changing please check the
# performance (MATSCI-11446)
mol = rdkit_adapter.to_rdkit(st,
include_properties=False,
include_coordinates=False,
sanitize=False)
# reset name of rd_mol
for atom in mol.GetAtoms():
st_idx = atom.GetIntProp(rdkit_adapter.SDGR_INDEX)
st_atom = st.atom[st_idx]
atom.name = st_atom.name
atom.SetProp('smilesSymbol', st_atom.name)
atom.SetNoImplicit(True)
mol.UpdatePropertyCache(strict=False)
return mol, proxy_element_mapper
[docs]def get_coarsegrain_smiles(st, atom_ids=None):
"""
Get smiles for coarse-grained structure
:type st: `schrodinger.structure.Structure`
:param st: structure
:type atom_ids: list
:param atom_ids: list of substructure atom id
:return: str
:rtype: smiles for coarse grain substructure
"""
mol, _ = _coarsegrain_st_to_rdkit(st)
if not atom_ids:
return Chem.MolToSmiles(mol)
sdgr_to_rdk_idx = rdkit_adapter.get_map_sdgr_to_rdk(mol)
rdk_atom_ids = [sdgr_to_rdk_idx[idx] for idx in atom_ids]
return Chem.MolFragmentToSmiles(mol, rdk_atom_ids)