"""
A module for generating SMILES and unique SMILES strings.
Provides python access to the classes in the canvaslibs_ext directory.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Mike Beachy, Matvey Adzhigirey
import re
import warnings
import schrodinger.application.canvas.utils as canvasutils
from schrodinger.infra.canvas import ChmMmctAdaptor
from schrodinger.infra.canvas import ChmMmctSmilesGenerator
from schrodinger.structure import NO_STEREO
from schrodinger.structure import STEREO_FROM_3D
from schrodinger.structure import STEREO_FROM_ANNOTATION
from schrodinger.structure import STEREO_FROM_ANNOTATION_AND_GEOM
from schrodinger.structure import STEREO_FROM_GEOMETRY
from . import build
try:
_stereo_enums = set([
ChmMmctAdaptor.StereoFromGeometry,
ChmMmctAdaptor.StereoFromGeometry_Safe,
ChmMmctAdaptor.StereoFromAnnotation,
ChmMmctAdaptor.StereoFromAnnotation_Safe,
ChmMmctAdaptor.StereoFromAnnotationAndGeometry,
ChmMmctAdaptor.StereoFromAnnotationAndGeometry_Safe,
STEREO_FROM_GEOMETRY,
STEREO_FROM_ANNOTATION,
STEREO_FROM_ANNOTATION_AND_GEOM,
STEREO_FROM_3D,
])
except AttributeError:
raise ImportError(
"The canvaslibs_ext library is not available for this platform, so the smiles module will not work."
)
unique_smiles_prop_name = 's_canvas_Unique_SMILES'
unique_smiles_stereo_prop_name = 's_canvas_Unique_SMILES_Stereo'
_mmstereo_re = re.compile("^s_st_(Chirality|EZ|PM|AtomNumChirality)_")
_canvas_license = None
[docs]def get_property_name(stereo):
"""
Return the default m2io property name for the unique SMILES string with
the type of stereochemistry given in 'stereo'.
:param stereo: Any of the module level variables NO_STEREO,
STEREO_FROM_GEOMETRY, STEREO_FROM_ANNOTATION, or
STEREO_FROM_ANNOTATION_AND_GEOM, or values from the actual enum in
the ChmMmctAdaptor class.
"""
if stereo == NO_STEREO or stereo == ChmMmctAdaptor.NoStereo:
return unique_smiles_prop_name
elif stereo in _stereo_enums:
return unique_smiles_stereo_prop_name
else:
raise Exception("Unrecognized stereochemistry type: '%s'" % stereo)
def _determine_stereo_option(struct, safe=True):
"""
Look for mmstereo properties in the provide Structure.
If mmstereo properties are found, return the type of stereo
determination to use as ChmMmctAdaptor.StereoFromAnnotation.
Otherwise, return ChmMmctAdaptor.StereoFromGeometry.
Note: the stereo annotations can be stale.
"""
for p in struct.property:
if _mmstereo_re.match(p):
if safe:
return ChmMmctAdaptor.StereoFromAnnotation_Safe
else:
return ChmMmctAdaptor.StereoFromAnnotation
if safe:
return ChmMmctAdaptor.StereoFromGeometry_Safe
else:
return ChmMmctAdaptor.StereoFromGeometry
[docs]def remove_stereo_annotation(struct):
"""
There is no current option in the SmilesGenerator that can be used to
determine stereochemical information from the 3d geometry only. The
STEREO_FROM_3D option does not override any stereochemical annotations
that are already present in a structure.
If you want the stereochemistry to be determined by the 3d geometry
only, use this function to remove any existing stereochemical
annotations.
This is a recognized issue that will be addressed in future releases in
a backwards compatible way.
"""
property_names = list(struct.property)
for pname in property_names:
if _mmstereo_re.match(pname):
del (struct.property[pname])
def _translate_stereo_enum(stereo, safe=True):
"""
This function translates a module sterechemistry constant into its
ChmMmctAdaptor enum equivalent.
"""
if stereo == STEREO_FROM_GEOMETRY:
if safe:
return ChmMmctAdaptor.StereoFromGeometry_Safe
else:
return ChmMmctAdaptor.StereoFromGeometry
elif stereo == STEREO_FROM_ANNOTATION:
if safe:
return ChmMmctAdaptor.StereoFromAnnotation_Safe
else:
return ChmMmctAdaptor.StereoFromAnnotation
elif (stereo == STEREO_FROM_ANNOTATION_AND_GEOM or
stereo == STEREO_FROM_3D):
if stereo == STEREO_FROM_3D:
warnings.warn(
"The STEREO_FROM_3D is deprecated in favor of "
"STEREO_FROM_ANNOTATION_AND_GEOM",
DeprecationWarning,
stacklevel=3)
if safe:
return ChmMmctAdaptor.StereoFromAnnotationAndGeometry_Safe
else:
return ChmMmctAdaptor.StereoFromAnnotationAndGeometry
elif stereo == NO_STEREO:
return ChmMmctAdaptor.NoStereo
else:
raise ValueError("SmilesGenerator: invalid stereo option: %s" % stereo)
[docs]class SmilesGenerator(object):
"""
A class to generate a SMILES string from a Structure object.
This is just a thin wrapper to the canvaslibs_ext classes.
"""
[docs] def __init__(self,
stereo=STEREO_FROM_ANNOTATION_AND_GEOM,
unique=True,
safe=True,
wantAllH=False,
forceAllBondOrders=False,
wildcardAllAtoms=False):
"""
Construct a SmilesGenerator with specific behavior for
stereochemistry and unique smiles.
:param stereo: This should be set to one of the module level constants
and will specify behavior in the getSmiles method.
Can be one of the following (default is
STEREO_FROM_ANNOTATION_AND_GEOM):
- STEREO_FROM_ANNOTATION_AND_GEOM - Derive stereochemistry from
annotations, but use the 3D coordinates when no annotation is
present. This is the same behavior as the old STEREO_FROM_3D
option, which is deprecated.
- STEREO_FROM_ANNOTATION - Derive stereochemistry from
pre-existing mmstereo properties (faster, so useful when
structures are known to be 2D).
- STEREO_FROM_GEOMETRY - Derive stereochemistry from the 3D
coordinates only (for 3D structures). Annotations are used for
2D structures.
- NO_STEREO - Don't include stereochemistry.
:type unique: bool
:param unique: If True, generate unique (a.k.a. canonical) SMILES.
:type safe: bool
:param safe: If True, use only stereochemistry from mmstereo that
is deemed "safe" by the Canvas libraries. If False, use all
stereochemistry info from mmstereo. This is relevant for
the STEREO_FROM_GEOMETRY, STEREO_FROM_ANNOTATION, and
STEREO_FROM_ANNOTATION_AND_GEOM options of the stereo argument.
:type wantAllH: bool
:param wantAllH: If True, each hydrogen receives its own SMILES token
:type forceAllBondOrders: bool
:param forceAllBondOrders: If True, all bond orders in the SMILES will be explicit.
By default, aromatic and single bond orders (C-C, c:c) are
suppressed.
:type wildcardAllAtoms: bool
:param wildcardAllAtoms: If True, all heavy atoms will appear as asterisks
when calling getSmiles(). Ignored when calling getSmilesAndMap()
"""
global _canvas_license
if _canvas_license is None:
_canvas_license = canvasutils.get_license(
canvasutils.LICENSE_SHARED)
self.smiles_generator = ChmMmctSmilesGenerator()
self.stereo = _translate_stereo_enum(stereo, safe)
self.unique = unique
self.safe = safe
self.wildcardAllAtoms = wildcardAllAtoms
self.smiles_generator.setForceHydrogens(wantAllH)
self.smiles_generator.setForceAllBondOrders(forceAllBondOrders)
[docs] def getSmiles(self, struct):
"""
Returns a SMILES string for a structure. Use the wantAllH option
when initializing the SmilesGenerator instance if hydrogens are needed.
:param struct: The Structure object from which to generate the
SMILES string.
"""
ct_handle = struct.handle
stereo = self.stereo
wildcard = self.wildcardAllAtoms
if self.unique:
return self.smiles_generator.getUniqueSmiles(
ct_handle, stereo, wildcard)
else:
return self.smiles_generator.getSmiles(ct_handle, stereo, wildcard)
[docs] def getSmilesAndMap(self, struct):
"""
Returns a SMILES string and index mapping of the atoms in a structure.
Use the wantAllH option when initializing the SmilesGenerator instance
to speicify whether hydrogens should be included (default is to include
heavy atoms only).
:param struct: The Structure object from which to generate the
SMILES string.
:rtype: (str, list)
:return: SMILES string, and a list of new atom indices, which can be
passed directly to build.reorder_atoms().
"""
ct_handle = struct.handle
stereo = self.stereo
if self.unique:
return self.smiles_generator.getUniqueSmilesAndMap(
ct_handle, stereo)
else:
return self.smiles_generator.getSmilesAndMap(ct_handle, stereo)
[docs] def getStandardizedSmiles(self, struc):
"""
Get a SMILES string representing the standardized version
of a structure by neutralizing the structure first. This ensures
different ionization states of the same compound produce the same SMILES.
Different tautomers will still generate different SMILES. To check whether
different input structures are tautomers of each other,
analyze.generate_tautomer_code() can be used.
:param struc: Structure to get the standardized SMILES string for.
:type struc: schrodinger.strucgture.Structure
:return: Standardized SMILES string
:rtype: str
"""
for atom in struc.atom:
atom.property['b_is_orig_atom'] = True
build.add_hydrogens(struc)
neut_st = build.neutralize_structure(struc)
# We need to remove the hydrodgens added prior to neutralization
# before generating SMILES because the added hydrogens may cause
# us to lose information about which stereo centers were defined
# and which weren't.
def delete_added_hydrogens(st):
delete_atoms = []
for atom in st.atom:
if atom.property.get('b_is_orig_atom'):
del atom.property['b_is_orig_atom']
else:
delete_atoms.append(atom.index)
st.deleteAtoms(delete_atoms)
delete_added_hydrogens(struc)
delete_added_hydrogens(neut_st)
return self.getSmiles(neut_st)
[docs] def getUniqueOrder(self, struct):
"""
Returns a canonicalized ordering of atoms in the given structure.
NOTE: Structure MUST contain all hydrogens.
NOTE: Uniqueness process does not consider atom coordinates, so
symmetrically equivalent atoms will have arbitrarily assigned ordering.
In other words, identical conformers are likely to have a >0 RMSD after
renumbering.
:type struct: `structure.Structure`
:param struct: The Structure object from which to generate the
SMILES string.
:rtype: list(int)
:return: List of canonically ordered atom indices, which can be
passed directly to build.reorder_atoms().
"""
order = self.smiles_generator.getUniqueOrder(struct.handle)
return list(order)
[docs] def canonicalize(self, pattern):
"""
Return canonicalized (unique) version of the specified SMILES string.
"""
return self.smiles_generator.canonicalize(pattern)
#EOF