Source code for schrodinger.rdkit.coarsegrain
"""
Conversions between Coarse-grained Schrodinger structure objects (mmct) and
RDKit mol objects.
Please see schrodinger.rdkit_adapter for structural/philosophic differences
between these two formats
"""
from collections import Counter
from rdkit import Chem
from schrodinger.infra import mm
from schrodinger.application.matsci import msutils
from schrodinger.thirdparty import rdkit_adapter
# Particle names that are forbidden to be used as CG names if to convert to rdkit
PROTECTED_PATTERN_BIT = ['D', 'R', 'r', 'v', 'x', 'X', 'H']
# Global variable, see get_proxy_periodic_table
_PROXY_PERIODIC_TABLE = None
[docs]def get_proxy_periodic_table():
    """
    Get proxy periodic table.
    :rtype: list[str]
    :return: Cached list of elements, do not modify!!
    """
    global _PROXY_PERIODIC_TABLE
    if _PROXY_PERIODIC_TABLE is None:
        periodic_table = Chem.rdchem.GetPeriodicTable()
        _PROXY_PERIODIC_TABLE = [
            periodic_table.GetElementSymbol(atomic_num)
            for atomic_num in range(1, 118)
            if periodic_table.GetDefaultValence(atomic_num) == -1
        ]
    return _PROXY_PERIODIC_TABLE 
# Each CG particle name is mapped to an element name. These atoms are used to
# generate RDKIT mol for CG system.
[docs]def get_cgparticle_to_element_mapper(cg_particle_names):
    """
    Create a mapper between coarse-grain particle name and an element.
    :type cg_particle_names: list
    :param cg_particle_names: list of CG particle names
    :rtype: dict
    :return: dict with CG particle name as key and element name as value
    """
    # Can't use sets here, need to keep the order to be reproducible.
    # Keep only unique, still keep the order
    cg_particle_names = [
        name for name, count in Counter(cg_particle_names).items() if count == 1
    ]
    protected_set = set(cg_particle_names).intersection(PROTECTED_PATTERN_BIT)
    if protected_set:
        raise RuntimeError(
            f'{sorted(protected_set)} are protected names. Coarse-grained '
            f'structures containing these particle names is not supported.')
    proxy_element = get_proxy_periodic_table()
    # remove CG bead names from the proxy element due to possible conflict
    # between two names.
    proxy_element = [
        name for name in proxy_element if name not in cg_particle_names
    ]
    if len(cg_particle_names) > len(proxy_element):
        raise RuntimeError(
            f"Cannot have more than {len(proxy_element)} unique particle names")
    # Create a mapper between schrodinger CG particle name and new element
    # name for rdkit mol
    mapper = dict(zip(cg_particle_names, proxy_element))
    return mapper 
[docs]def prepare_cg_for_rdkit(struct):
    """
    Create fake AA from a CG structure that can be converted to RDKIT mol.
    :param schrodinger.structure.Structure struct: Input CG structure
    :rtype: schrodinger.structure.Structure, dict
    :return: Fake AA structure and internal mapping dict between schrodinger
        particle name and rdkit proxy element name
    """
    struct = struct.copy()
    msutils.remove_atom_property(struct,
                                 prop=mm.MMCT_ATOM_PROPERTY_COARSE_GRAIN)
    particle_name = sorted(set(atom.name for atom in struct.atom))
    proxy_element_mapper = get_cgparticle_to_element_mapper(particle_name)
    for atom in struct.atom:
        atom.element = proxy_element_mapper[atom.name]
    return struct, proxy_element_mapper 
def _coarsegrain_st_to_rdkit(st):
    """
    Create RDKIT mol object from a coarse-grained structure
    :type st: `schrodinger.structure.Structure`
    :param st: structure
    :raise: rdkit_adapter.adapter.InconsistentStructureError
    :rtype: `rdkit.Mol`, dict
    :return: rdkit molecule and internal mapping dict between schrodinger
        particle name and rdkit proxy element name
    """
    # an atomistic structure, raise error
    if not msutils.is_coarse_grain(st, by_atom=True):
        raise rdkit_adapter.adapter.InconsistentStructureError(
            "_coarsegrain_st_to_rdk_mol only supports coarse-grained "
            "structures. Please see rdkit_adapter.to_rdkit function")
    st, proxy_element_mapper = prepare_cg_for_rdkit(st)
    # include_properties and include_coordinates are set to false in
    # adapter.evaluate_smarts, match here. If changing please check the
    # performance (MATSCI-11446)
    mol = rdkit_adapter.to_rdkit(st,
                                 include_properties=False,
                                 include_coordinates=False,
                                 sanitize=False)
    # reset name of rd_mol
    for atom in mol.GetAtoms():
        st_idx = atom.GetIntProp(rdkit_adapter.SDGR_INDEX)
        st_atom = st.atom[st_idx]
        atom.name = st_atom.name
        atom.SetProp('smilesSymbol', st_atom.name)
        atom.SetNoImplicit(True)
    mol.UpdatePropertyCache(strict=False)
    return mol, proxy_element_mapper
[docs]def get_coarsegrain_smiles(st, atom_ids=None):
    """
    Get smiles for coarse-grained structure
    :type st: `schrodinger.structure.Structure`
    :param st: structure
    :type atom_ids: list
    :param atom_ids: list of substructure atom id
    :return: str
    :rtype: smiles for coarse grain substructure
    """
    mol, _ = _coarsegrain_st_to_rdkit(st)
    if not atom_ids:
        return Chem.MolToSmiles(mol)
    sdgr_to_rdk_idx = rdkit_adapter.get_map_sdgr_to_rdk(mol)
    rdk_atom_ids = [sdgr_to_rdk_idx[idx] for idx in atom_ids]
    return Chem.MolFragmentToSmiles(mol, rdk_atom_ids)