Source code for schrodinger.livedesign.substructure
"""
Substructure searching and alignment
Copyright Schrodinger LLC, All Rights Reserved.
"""
from typing import Generator
from typing import NamedTuple
from typing import Optional
from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdMolEnumerator
from rdkit.Chem import rdTautomerQuery
NO_MATCH_ERROR_MSG = "Substructure match with reference not found"
[docs]class QueryOptions(NamedTuple):
"""
:cvar adjust_conjugated_five_rings: whether to set bond queries in
conjugated five-member rings to SINGLE|DOUBLE|AROMATIC
:cvar adjust_single_bonds_between_aromatic_atoms: whether to sets
non-ring single bonds between two aromatic atoms to SINGLE|AROMATIC
:cvar adjust_single_bonds_to_degree_one_neighbors: whether to set single
bonds bewteen aromatic atoms and degree-one neighbors to SINGLE|AROMATIC
:cvar tautomer_insensitive: whether to consider tautomer insensitivity
:cvar stereospecific: whether to consider stereochemistry and chirality
"""
adjust_conjugated_five_rings: bool = False
adjust_single_bonds_between_aromatic_atoms: bool = False
adjust_single_bonds_to_degree_one_neighbors: bool = False
tautomer_insensitive: bool = False
stereospecific: bool = True
[docs]def replace_generic_h_queries(query):
"""
Replaces QH, AH, MH, and XH queries with something which works in the RDKit.
Reminder:
- QH = "any atom except carbon"
- AH = "any atom, including H"
- MH = "any metal, or H"
- XH = "halogen or H"
"""
atoms_with_h_queries = []
for atom in query.GetAtoms():
if atom.HasQuery() and atom.GetQueryType() in (
'QH', 'AH', 'MH', 'XH') and atom.GetDegree() == 1:
atoms_with_h_queries.append(atom.GetIdx())
if not atoms_with_h_queries:
return [query]
original_query = Chem.RWMol(query)
original_query.BeginBatchEdit()
all_queries = [original_query]
has_h_query = Chem.AtomFromSmarts('[!H0]')
for query_atom_idx in atoms_with_h_queries:
for i in range(len(all_queries)):
mol = Chem.RWMol(all_queries[i])
nbr = mol.GetAtomWithIdx(query_atom_idx).GetNeighbors()[0]
if nbr.GetIdx() in atoms_with_h_queries:
continue
if nbr.HasQuery():
nbr.ExpandQuery(has_h_query.GetQuery())
else:
# replace neighbor with a query atom that has at least 1 hydrogen
nbr_with_h = Chem.AtomFromSmarts(f'[#{nbr.GetAtomicNum()}!H0]')
mol.ReplaceAtom(nbr.GetIdx(), nbr_with_h)
mol.RemoveAtom(query_atom_idx)
all_queries.append(mol)
for mol in all_queries:
mol.CommitBatchEdit()
return all_queries
[docs]def expand_query(
base_query: Chem.rdchem.Mol,
options: QueryOptions) -> Generator[Chem.rdchem.Mol, None, None]:
"""
Expands a given query, accounting for tautomer matching, link nodes, and
variable bonds. If the substructure options dictate it, each generated
query is also adjusted.
"""
options = options or QueryOptions()
query_params = Chem.AdjustQueryParameters.NoAdjustments()
query_params.adjustConjugatedFiveRings = \
options.adjust_conjugated_five_rings
query_params.adjustSingleBondsBetweenAromaticAtoms = \
options.adjust_single_bonds_between_aromatic_atoms
query_params.adjustSingleBondsToDegreeOneNeighbors = \
options.adjust_single_bonds_to_degree_one_neighbors
query_params.makeDummiesQueries = True
base_query = Chem.rdmolops.MergeQueryHs(base_query)
query_mols = rdMolEnumerator.Enumerate(base_query) or [base_query]
for mol in query_mols:
if options.tautomer_insensitive:
# initialize RingInfo
Chem.FastFindRings(mol)
try:
# SHARED-8672: When rgroup decomposition options have the
# adjust_conjugated_five_rings and tautomer_insensitive turned
# on, sometimes a kekulization error will be raised when creating
# the tautomer query. If that occurs, ignore the tautomers so that
# the original scaffold can still match
tqry = rdTautomerQuery.TautomerQuery(mol)
query = tqry.GetTemplateMolecule()
except Chem.rdchem.KekulizeException:
query = mol
pass
else:
query = mol
query = Chem.AdjustQueryProperties(query, query_params)
query.UpdatePropertyCache(False)
yield from replace_generic_h_queries(query)
[docs]def substructure_matches(mol: Chem.rdchem.Mol,
query_mol: Chem.rdchem.Mol,
options: Optional[QueryOptions] = None):
"""
Generates all substructure matches against a given query mol
"""
options = options or QueryOptions()
params = Chem.rdchem.SubstructMatchParameters()
params.useChirality = options.stereospecific
params.useEnhancedStereo = options.stereospecific
for query in expand_query(query_mol, options):
yield from mol.GetSubstructMatches(query, params)
[docs]def apply_substructure_coordinates(mol: Chem.rdchem.Mol,
template_mol: Chem.rdchem.Mol,
options: Optional[QueryOptions] = None):
"""
Applies coordinates from the provided template to the input mol; used for
compound alignment requests in image generation.
NOTE: If the substructure match to the template fails, the alignment is
skipped altogether, leaving the input mol coordinates as they were
"""
options = options or QueryOptions()
template_mol = next(expand_query(template_mol, options))
params = Chem.rdchem.SubstructMatchParameters()
params.useChirality = options.stereospecific
params.useEnhancedStereo = options.stereospecific
if not mol.HasSubstructMatch(template_mol, params):
return
rdDepictor.GenerateDepictionMatching2DStructure(mol, template_mol)