Source code for schrodinger.thirdparty.rdkit_adapter
"""
Conversions between Schrodinger structure objects (mmct) and RDKit mol objects.
There are some structural/philosophic differences between these two formats,
stemming from their distinct origins (RDKit being originally used for
chemiformatics, and schrodinger/mmct being originally used for molecular
modeling.)
Notably:
Schrodinger wants all atoms to have positions in space. RDKit allows
unspecified position, or multiple conformers.
Schrodinger wants all Hydrogens to be fully specified (position and bonding).
My understanding is that RDKit has three types of hydrogens::
* Implicit - calculated based on valence. These are not shown in SMILES.
* Explicit - as a property of the associated heavy atom. These are shown in
SMILES like [cH]
* Included in connectivity graph - (only these can have coordinates or
other properties). These are show in SMILES like c([H]).
There are other distinctions, for instance Schrodinger is aware of dative, or
zero-order bonds, whereas RDKit is aware of aromatic and conjugated bonds.
"""
import contextlib
import re
from rdkit import Chem
from rdkit import RDLogger
from schrodinger import adapter
from schrodinger import structure
from schrodinger.infra import structure as infrastructure
CT_PROP_FORMAT = re.compile(r'[birs]_[^_ ]+_\w*')
ANNOTATION_PROP = adapter.RDK_INDEX #: i_rdk_index
SDGR_INDEX = adapter.SCHRODINGER_INDEX
[docs]class InconsistentStructureError(ValueError):
pass
[docs]class UnsupportedStructureError(NotImplementedError):
"""For structures that can't be translated between RDKit and Schrodinger yet"""
[docs]def to_rdkit(st,
implicitH=False,
include_properties=True,
include_coordinates=True,
sanitize=True,
include_stereo=True):
"""
Create a RdKit molecule from a Schrodinger structure (aka mmct).
:param st: The schrodinger structure to be translated to RDKit. The input
structure remains unmodified.
:type st: schrodinger.structure.Structure
:param implicitH: Should hydrogens be listed implicitly? If False, hydrogens
will be included in the connectivity graph, and 3D coordinates and
properties of the hydrogens will be translated. Some pattern matching
in RDKit requires implicit hydrogens, however.
:type implicitH: bool
:param include_properties: Should atom and structure level properties
be copied from the schrodinger structure to the RDKit mol?
:type include_properties: bool
:param include_coordinates: Should the coordinates of the structure be
copied to a conformer associated with the RDKit mol?
:param sanitize: Should the molecule be sanitized? Sanitization discerns
aromaticity, for example. But it rejects invalid molecules.
:type sanitize: bool
:param include_stereo: Whether the stereochemistry of the structure should
be translated into the RDKit mol.
:type include_stereo: bool
:return: An rdkit mol representing the same structure as the input st
:rtype: rdkit.Mol
:raises InconsistentStructureError: if the input structure has inconsistent or
incorrect stereochemical labels.
:raises UnsupportedStructureError: If the input structure can't be
translated between RDKit and Schrodinger yet.
"""
if not isinstance(st, (structure.Structure, infrastructure.Structure)):
raise TypeError(
'Input structure to to_rdkit() should be a Schrodinger Structure')
options = adapter.RDKitOptions()
options.hydrogens = adapter.Hydrogens.MakeImplicit if implicitH else adapter.Hydrogens.Retain
options.label_atoms = adapter.LabelAtoms.Enable
options.properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore
options.coordinates = adapter.Coordinates.Copy if include_coordinates else adapter.Coordinates.Ignore
options.sanitize = adapter.Sanitize.Enable if sanitize else adapter.Sanitize.Disable
options.stereochemistry = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore
try:
mol = adapter.to_rdkit(st, options)
except adapter.InconsistentStructureError as err:
raise InconsistentStructureError(*err.args)
except adapter.UnsupportedStructureError as err:
raise UnsupportedStructureError(*err.args)
return mol
[docs]def from_rdkit(mol,
include_properties=True,
generate_coordinates=False,
conformer=None,
include_stereo=True):
"""
Create a Schrodinger structure from an RdKit molecule.
For correct behavior, requires that the RdKit molecule be sanitized
beforehand.
If the RDKit molecule does not have 3d structure, one can be generated
using fast3d.
:param mol: RDKit mol to be converted to Schrodinger space. It will not be
modified.
:type mol: rdkit.Mol
:param include_properties: Should atom and molecule properties be copied
from the RDKit mol?
:type include_properties: bool
:param generate_coordinates: Should 3D coordinates be generated if the
RDKit mol does not have associated coordinates? Uses fast3d.
:type generate_coordinates: bool
:param conformer: If the RDKit mol has more than one associated conformer,
choose one to turn into a Schrodinger structure. If None, then
the first conformer will be used.
:type conformer: NoneType or int
:return: A schrodinger.Structure representing the same molecule as the
input mol
:rtype: schrodinger.Structure
:raises ValueError: If there is more than one conformer associated with the
structure or if a specific conformer is requested and is unavailable.
"""
if not isinstance(mol, Chem.Mol):
raise TypeError('Input mol to from_rdkit() should be an RdKit Mol')
if conformer is not None and mol.GetNumConformers() < conformer + 1:
msg = ("Requested conformer #{}, but there are only {} conformers "
"available.".format(conformer, mol.GetNumConformers()))
raise ValueError(msg)
properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore
try:
conformer = mol.GetConformer(conformer or 0)
has_3d_coords = conformer.Is3D()
except ValueError:
# No geometry associated with this structure.
# Coordinates are generated using fast3d below (if generate_coordinates=True)
conformer = None
has_3d_coords = False
stereo = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore
try:
st = adapter.to_structure(mol, adapter.LabelAtoms.Enable, properties,
conformer, stereo)
except adapter.InconsistentStructureError as err:
raise InconsistentStructureError(*err.args)
except adapter.UnsupportedStructureError as err:
raise UnsupportedStructureError(*err.args)
if generate_coordinates and not has_3d_coords:
st.generate3dConformation(require_stereo=False)
return st
[docs]def translate_rdkit_props_dict(props):
"""
Make a copy of a property dict like the one returned by mol.GetPropsAsDict,
in which property names that don't look like mmct properties are prefixed
with <typechar>_rdkit_.
:param props: property dictionary
:type props: dict
:return: new property dictionary
:rtype: dict
"""
new_props = {}
for name, value in props.items():
is_schrodinger_property = bool(CT_PROP_FORMAT.match(name))
if is_schrodinger_property:
new_name = name
if name.startswith('s_st_Chirality') or name == 's_m_title':
continue
elif name.startswith('s_'):
# Workaround for quirk of GetPropsAsDict(), which returns strings
# that look like a number as floats, breaking the call to
# mmct_ct_property_set_string().
value = str(value)
elif name in ('origNoImplicit', 'isImplicit'):
# Generated hydrogens. Not an important property
continue
elif isinstance(value, bool):
new_name = 'b_rdkit_' + name
elif isinstance(value, int):
new_name = 'i_rdkit_' + name
elif isinstance(value, float):
new_name = 'r_rdkit_' + name
elif isinstance(value, str):
new_name = 's_rdkit_' + name
else:
# Ignore weird properties such as RDKit internals.
continue
new_props[new_name] = value
return new_props
[docs]@contextlib.contextmanager
def suppress_rdkit_log():
"""
Disable all RDKIT logging.
"""
RDLogger.DisableLog('rdApp.*')
try:
yield
finally:
RDLogger.EnableLog('rdApp.*')
[docs]@contextlib.contextmanager
def convert_log_to_exception():
"""
:raise: RuntimeError if RDKit has logged any messages within the context
"""
capture = adapter.CaptureRDErrorLog()
yield
if capture.messages():
raise RuntimeError(f"RDKit ERROR:\n{capture.messages()}")
[docs]def get_map_sdgr_to_rdk(mol):
"""
Get a dict mapping schrodinger atoms index to rdkit atom index
:type mol: `rdkit.Mol`
:param mol: rdkit molecule for which mapping is desired
:rtype: dict
:return: dict with key as schrodinger atom index and rdkit atoms index as
value
"""
return {a.GetIntProp(SDGR_INDEX): a.GetIdx() for a in mol.GetAtoms()}
[docs]def get_map_rdk_to_sdgr(mol):
"""
Get a dict mapping rdkit atoms index to schrodinger atom index
:type mol: `rdkit.Mol`
:param mol: rdkit molecule for which mapping is desired
:rtype: dict
:return: dict with key as rdkit atom index and schrodinger atoms index as
value
"""
return {a.GetIdx(): a.GetIntProp(SDGR_INDEX) for a in mol.GetAtoms()}