"""
Creates a frame that contains widgets to aid in reordering two structures to
have the same atom order, plus functions to estimate atom ordering.
Copyright Schrodinger, LLC. All rights reserved.
"""
import warnings
from schrodinger.application.matsci import clusterstruct
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import measure
from schrodinger.structutils import rmsd
from schrodinger.structutils import smiles
[docs]def map_hydrogens(struct1, struct2, atom_map):
"""
For all heavy atoms already mapped, if they only have a single hydrogen
attached to them we can also map that hydrogen.
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures. No atoms in this dictionary will be added to the
dictionary returned by this method.
:rtype: dict
:return: dictionary, keys are indexes of H atoms in struct1 that were mapped
by this method, each value is the index of a struct2 H atom that maps to the
key atom.
"""
def bonded_unmapped_protons(atom, mapped_atoms):
protons = []
for neighbor in atom.bonded_atoms:
if neighbor.element == 'H' and neighbor.index not in mapped_atoms:
protons.append(neighbor.index)
return protons
h_map = {}
mapped_comp_atoms = set(atom_map.values())
for index1, index2 in atom_map.items():
atom1 = struct1.atom[index1]
atom2 = struct2.atom[index2]
protons1 = bonded_unmapped_protons(atom1, set(list(atom_map)))
protons2 = bonded_unmapped_protons(atom2, set(atom_map.values()))
if len(protons1) == len(protons2):
if len(protons1) == 1:
key = protons1[0]
comp = protons2[0]
if key not in atom_map and comp not in mapped_comp_atoms:
h_map[key] = comp
elif len(protons1) > 1:
h_map.update(
map_neighboring_hydrogens(struct1, struct2, atom1, protons1,
protons2, atom_map))
return h_map
[docs]def map_neighboring_hydrogens(struct1,
struct2,
atom,
protons1,
protons2,
atom_map,
threshold=10.):
"""
If two or more hydrogens are bonded to an atom, we have to look at the
similarities of the dihedrals between the structures to map them from
struct2 to struct1
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom: `schrodinger.structure._StructureAtom`
:type atom: The atom the reference protons are bonded to - the index of this
atom must be a key in atom_map
:type protons1: list
:param protons1: list of atom indexes of protons bound to atom in struct1
:type protons2: list
:param protons2: list of atom indexes of protons bound to atom in struct2
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures
:type threshold: float
:param threshold: The dihedral angle difference threshold for considering
protons "the same" between the two structures (degrees).
:rtype: dict
:return: dictionary, keys are indexes of H atoms in struct1 that were mapped
by this method, each value is the index of a struct2 H atom that maps to the
key atom.
"""
def mapped_heavy_neighbors(myatom):
# Return a list of all heavy atoms bound to myatom that are mapped
heavies = []
for neighbor in myatom.bonded_atoms:
if neighbor.element != 'H' and neighbor.index in atom_map:
heavies.append(neighbor)
return heavies
def add_dihedral_heavies(nlist):
# Add atoms in nlist to the list of dihedral atoms unless they are
# already in the list
new = [(x, x.index) for x in nlist if x.index not in dihedral_indexes]
while len(dihedral_heavies) < 3 and new:
new_atom, new_index = new.pop(0)
dihedral_heavies.append(new_atom)
dihedral_indexes.add(new_index)
# Only attempt to map if there are the same number of unmapped hydrogens in
# both structures (MATSCI-961, MATSCI-966)
if len(protons1) != len(protons2):
return {}
# First heavy atom in the dihedral is atom itself
dihedral_heavies = [atom]
dihedral_indexes = {atom.index}
# First attempt to use any heavy atoms directly bonded to atom
neighbors = mapped_heavy_neighbors(atom)
add_dihedral_heavies(neighbors)
# If we don't have enough atoms for a dihedral, check for mapped neighbors
# of any heavy atoms directly bonded to atom
if len(dihedral_heavies) < 3:
if neighbors:
neighbors2 = mapped_heavy_neighbors(neighbors[0])
add_dihedral_heavies(neighbors2)
# Just add random mapped atoms until we get a satisfactory set of atoms
if len(dihedral_heavies) < 3:
for index in atom_map.keys():
newatom = struct1.atom[index]
if newatom.element != 'H':
add_dihedral_heavies([newatom])
if len(dihedral_heavies) == 3:
break
else:
# Couldn't find enough heavy atoms
return {}
dihedral_heavies2 = [
struct2.atom[atom_map[x.index]] for x in dihedral_heavies
]
# Compute all the dihedral angles
def dihedral_delta(torsion1, torsion2):
# Calculate the delta between two dihedrals. This is just the
# difference between them unless each is near 180 and opposite in sign
delta = abs(torsion1 - torsion2)
# Catch instances near +-180
if delta >= 360 - threshold:
delta = abs(delta - 360)
return delta
angle = measure.measure_dihedral_angle
hatoms1 = [struct1.atom[x] for x in protons1]
dihedrals1 = [(x.index, angle(x, *dihedral_heavies)) for x in hatoms1]
hatoms2 = [struct2.atom[x] for x in protons2]
dihedrals2 = [(x.index, angle(x, *dihedral_heavies2)) for x in hatoms2]
# Match dihedral angles that are within the threshold
h_map = {}
for index2, torsion2 in dihedrals2:
for index1, torsion1 in dihedrals1:
if dihedral_delta(torsion1, torsion2) <= threshold and \
index1 not in atom_map and index2 not in atom_map.values():
# Found a match between two unmapped hydrogens
if index1 not in h_map and index2 not in h_map.values():
# We haven't paired either of these hydrogens yet
h_map[index1] = index2
if len(h_map) == len(protons1):
# A mapping was found for each proton attached to atom
return h_map
else:
# We either map all the unmapped hydrogens attched to atom or none
return {}
[docs]def find_atoms_near_point(struct, distance, xyz, dcell=None):
"""
Find all atoms in struct that are within distance of point xyz. It is PBC
aware.
:type struct: `schrodinger.structure.Structure`
:param struct: The structure containing the atoms to search.
:type distance: float
:param distance: Find all atoms within this distance in Angstroms
:type xyz: list
:param xyz: list of [x, y, z] coordinates
:type dcell: `schrodinger.infra.structure.DistanceCell` or None
:param dcell: The infrastructure DistanceCell created for input structure.
If None, it will be created (can be slow).
:rtype: list
:return: list of atom indexes within distance of point xyz. The list is
sorted by distance with the index of the closest atom first.
"""
if dcell is None:
dcell, pbc = clusterstruct.create_distance_cell(struct, distance)
neighbors = {}
for match in dcell.query_atoms(*xyz):
idx, distance = match.getIndex(), match.getDistanceSquared()
# Keep only the shortest distance to the atom from xyz
if ((idx in neighbors and neighbors[idx] > distance) or
idx not in neighbors):
neighbors[idx] = distance
neighbors = sorted(neighbors, key=neighbors.get)
return neighbors
[docs]def map_by_lone_element(struct1, struct2, atom_map=None, check_formula=False):
"""
Return a map of atoms in struct2 to atoms in struct1. The only criteria
used is that if only a single atom of an element exists in both structures,
those atoms are mapped to each other. All other atoms are unmapped.
Note that this function assumes that a reaction might have taken place
between the two structures, so just because there is a single F atom in both
structures does not mean that the C atoms bound to the F is the same in both
structures.
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures
:type check_formula: bool
:param check_formula: True if the molecular formulas should be checked to
ensure they are identical, False if not
:rtype: dict
:return: A mapping of atom numbers in struct2 to atom numbers in struct1.
keys are struct1 atom numbers, values are struct2 atom numbers. Dictionary
contains only those atoms mapped by this method
:raise ValueError: if the molecular formulas of the two structures are not
equal and check_formula is not False
"""
if check_formula:
identical, form1, form2 = compare_molecular_formulas(struct1, struct2)
if not identical:
raise ValueError('The molecular formula of the first structure, %s'
'is not identical to the molecular formula of the '
'second structure, %s' % (form1, form2))
elements = {}
elements2 = {}
if atom_map is None:
atom_map = {}
element_map = {}
set2 = set(atom_map.values())
def enumerate_elements(struct, mapped, new_map):
"""
For any atom in struct that is not in mapped, add to a list in new_map.
new_map is a dictionary keyed by atomic symbol whose values are atom
indexes of that element
"""
for atom in struct.atom:
if atom.index not in mapped:
new_map.setdefault(atom.element, []).append(atom.index)
enumerate_elements(struct1, atom_map, elements)
enumerate_elements(struct2, set2, elements2)
# Find those elements for which there is only one atom in each struct
for element, members in elements.items():
if len(members) == 1:
members2 = elements2.get(element, [])
if len(members2) == 1:
element_map[members[0]] = members2[0]
return element_map
[docs]def map_by_smiles(struct1,
struct2,
atom_map=None,
also_map_hydrogens=True,
stereo=smiles.STEREO_FROM_ANNOTATION_AND_GEOM):
"""
Return a map of atoms in struct2 to atoms in struct1. Unique SMILES strings
are generated for both structures - if they are the same, then a mapping is
produced that maps the atom indexes of struct2 onto the atom indexes of
struct 1. Protons are not mapped, except where only a single proton is
attached to a mapped atom.
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures. No atoms in this dictionary are returned in the final
mapping.
:type also_map_hydrogens: bool
:param also_map_hydrogens: By default, an attempt will be made to map
hydrogen atoms based on the heavy atom mapping. Set to False to not do this.
:type stereo: `smiles` module constant
:param stereo: The stereo value to feed to the SMILES generator. By default
this is smiles.STEREO_FROM_ANNOTATION_AND_GEOM, use smiles.NO_STEREO to
generate SMILES strings with no stereo information
:rtype: dict
:return: A mapping of atom numbers in struct2 to atom numbers in struct1.
keys are struct1 atom numbers values are struct2 atom numbers
"""
if atom_map is None:
atom_map = {}
sgen = smiles.SmilesGenerator(stereo=stereo, unique=True)
# mapx is a list of atom indicies in the same order as the smiles string
smiles1, maplist1 = sgen.getSmilesAndMap(struct1)
smiles2, maplist2 = sgen.getSmilesAndMap(struct2)
if smiles1 != smiles2:
return {}
smiles_map = dict(list(zip(maplist1, maplist2)))
for key, value in atom_map.items():
try:
if smiles_map[key] != value:
# This SMILES map violates the already established mapping by
# mapping a different comp atom to this ref atom
return {}
else:
# This atom already appears in the atom map
del smiles_map[key]
except KeyError:
# This atom isn't in the smiles map - it's probably a hydrogen
pass
if value in smiles_map.values():
# This SMILES map violates the already established mapping by
# mapping a different ref atom to this comp atom
return {}
if also_map_hydrogens:
h_map = map_hydrogens(struct1, struct2, smiles_map)
smiles_map.update(h_map)
return smiles_map
[docs]def map_by_smarts(struct1, struct2, atom_map=None, also_map_hydrogens=True):
"""
Return a map of atoms in struct2 to atoms in struct1. If the SMARTS pattern
for each entire molecule matches, then atoms are set to that order.
Otherwise, struct1 is searched for atoms that have a unique SMARTS pattern.
If only one atom in struct2 also has that SMARTS pattern, the two atoms are
mapped to each other. This is done for each unique pair in the two
structures.
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures. No atoms in this dictionary are returned in the final
mapping.
:type also_map_hydrogens: bool
:param also_map_hydrogens: By default, an attempt will be made to map
hydrogen atoms based on the heavy atom mapping even if they are not mapped
via SMARTS patterns. Set to False to not do this. Even if this paramter is
False, hydrogens that uniquely match SMARTS patterns will be mapped.
:rtype: dict
:return: A mapping of atom numbers in struct2 to atom numbers in struct1.
keys are struct1 atom numbers values are struct2 atom numbers. Dictionary
contains only those atoms mapped by this method.
:raise ValueError: if the SMILES strings of the two structures do not match
"""
if atom_map is None:
atom_map = {}
sgen = smiles.SmilesGenerator(stereo=smiles.NO_STEREO,
unique=True,
wantAllH=True)
def reorder_struct(struct):
"""
Return a re-ordered copy of struct, the smiles string, and a dictionary
whose keys are struct atom indexes and values are the atom indexes in
the reordered copy
"""
smiles, maplist = sgen.getSmilesAndMap(struct)
numat = struct.atom_total
# Remove any indexes that are higher than the total number of atoms.
# These are H atoms that the SmilesGenerator added to fulfill what it
# thinks are empty valences.
maplist = [x for x in maplist if x <= numat]
scopy = build.reorder_atoms(struct, maplist)
smap = dict(enumerate(maplist, 1))
return scopy, smiles, smap
# First we need to reorder the structures by their own unique SMILES to
# have any chance of matching SMARTS for the whole structure
s1_reordered, s1_smiles, s1_remap = reorder_struct(struct1)
s2_reordered, s2_smiles, s2_remap = reorder_struct(struct2)
smarts_map = {}
if s1_smiles == s2_smiles:
# The same SMILES pattern for both structures, just map all the atoms
# The remap dictionaries have {original_atom_index: SMILES atom index}
# so we map the SMILES atom index for struct1 to the SMILES atom
# index for struct2.
smarts_map = {
x: y for x, y in zip(s1_remap.values(), s2_remap.values())
}
for key, value in atom_map.items():
if smarts_map[key] != value:
# This SMARTS map violates the already established mapping by
# mapping a different comp atom to this ref atom
smarts_map = {}
break
else:
del smarts_map[key]
if value in smarts_map.values():
# This SMARTS map violates the already established mapping by
# mapping a different ref atom to this comp atom
smarts_map = {}
break
if smarts_map:
return smarts_map
# Not a total structure SMARTS match, or an error occurred in which the
# total SMARTS match didn't match the already mapped atoms, but maybe some
# atoms will match.
def fill_smarts_dict(struct, mapped):
"""
:type struct: `schrodinger.structure.Structure`
:param struct: The struct to evalute SMARTS with
:type mapped: set
:param mapped: The set of atoms already mapped in this structure
:rtype: dict
:return: A dictionary of SMARTS patterns (keys) and list of atoms
with that pattern (values)
"""
smarts = {}
for atom in struct.atom:
if atom.index not in mapped:
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
category=DeprecationWarning,
message="analyze.generate_smarts")
asmart = analyze.generate_smarts(struct, [atom.index])
smarts.setdefault(asmart, []).append(atom.index)
return smarts
smap1 = fill_smarts_dict(struct1, set(list(atom_map)))
smap2 = fill_smarts_dict(struct2, set(atom_map.values()))
for pattern, members1 in smap1.items():
if len(members1) == 1:
members2 = smap2.get(pattern, [])
if len(members2) == 1:
smarts_map[members1[0]] = members2[0]
if also_map_hydrogens:
h_map = map_hydrogens(struct1, struct2, smarts_map)
smarts_map.update(h_map)
return smarts_map
[docs]def map_by_superposition(struct1,
struct2,
atom_map,
check_formula=False,
preserve_elements=True,
threshold=1.0,
also_map_hydrogens=True):
"""
Return a map of atoms in struct2 to atoms in struct1. struct2 is first
superimposed on struct1 using the atom lists supplied. Then all atoms in
struct2 that are within threshold distance of an atom in struct1 are mapped
to the closest atom.
Note that this function assumes that a reaction might have taken place
between the two structures, so just because there is a single F atom in both
structures does not mean that the C atoms bound to the F is the same in both
structures.
:type struct1: `schrodinger.structure.Structure`
:param struct1: The first structure
:type struct2: `schrodinger.structure.Structure`
:param struct2: The structure to compare to the first structure
:type atom_map: dict
:param atom_map: keys are atom indexes for struct1, values are atom indexes
for struct2. Each key, value pair represents an atom that is the same atom
in both structures. atom_map must be at least 3 atoms long - the
superposition will be done using atom_map. No atoms in this dictionary are
returned in the final mapping.
:type check_formula: bool
:param check_formula: True if the molecular formulas should be checked to
ensure they are identical, False if not
:type preserve_elements: bool
:param preserve_elements: True if only atoms of the same element should be
mapped to each other, False if not
:type threshold: float
:param threshold: Only atoms closer than this in Angstroms will be
considered for mapping
:type also_map_hydrogens: bool
:param also_map_hydrogens: By default, an attempt will be made to map
hydrogen atoms based on the heavy atom mapping even if they are not mapped
via superposition. Set to False to not do this. Even if this paramter is
False, hydrogens that map via superposition will be mapped.
:rtype: dict
:return: A mapping of atom numbers in struct2 to atom numbers in struct1.
keys are struct1 atom numbers values are struct2 atom numbers
:raise ValueError: if the molecular formulas of the two structures are not
equal and check_formula is not False
:raise RuntimeError: if atom_map is shorter than 3 atoms
"""
if check_formula:
identical, form1, form2 = compare_molecular_formulas(struct1, struct2)
if not identical:
raise ValueError('The molecular formula of the first structure, %s'
'is not identical to the molecular formula of the '
'second structure, %s' % (form1, form2))
dcell, pbc = clusterstruct.create_distance_cell(struct1, threshold)
struct2_posed = struct2.copy()
rmsd.superimpose(struct1, list(atom_map), struct2_posed,
list(atom_map.values()))
superpose_map = {}
atoms1 = set(list(atom_map))
atoms2 = set(atom_map.values())
for atom in struct2_posed.atom:
if atom.index in atoms2:
# Already mapped
continue
for index in find_atoms_near_point(struct1,
threshold,
atom.xyz,
dcell=dcell):
if index in superpose_map or index in atoms1:
continue
if preserve_elements:
neighbor = struct1.atom[index]
if neighbor.element != atom.element:
continue
superpose_map[index] = atom.index
break
if also_map_hydrogens:
h_map = map_hydrogens(struct1, struct2, superpose_map)
superpose_map.update(h_map)
return superpose_map