"""
Functions for measuring distances and angles in structures.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: Matvey Adzhigirey
import math
import numpy
from scipy.spatial import cKDTree
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.infra import mmlist
from schrodinger.infra import structure as cppstructure
from schrodinger.structutils import transform
[docs]def get_close_atoms(st, dist, atoms=None):
"""
Use this function to find all atoms within a specified distance of each
other in roughly O(N) time.
Returns a list of tuples in the form of: (atom1, atom2), where atom1 and
atom2 are atom indices.
This function is only roughly O(N) in the number of atoms in the
molecule because as dist increases it will reach the limit of O(N^2).
Its true cost is O(N*m) where m is the number of atoms in a cubic box
with edges of dist length.
:param st: Structure object
:param dist: distance threshold, in angstroms.
:param atoms: optionally consider only atoms with these indices (all atoms
in CT by are scanned by default)
NOTES:
- Each atom pair is listed only once in the output.
- This function is efficient only for small distances (<3A)
- Periodic boundary conditions (PBC) are NOT honored.
"""
dc = cppstructure.DistanceCell(st, dist)
if atoms:
limitations = set(atoms)
atoms = map(st.atom.__getitem__, atoms)
else:
limitations = None
atoms = st.atom
nearby_atom_pairs = []
for a in atoms:
index = a.index
for nearby_atom in dc.query(*a.xyz):
if nearby_atom <= index:
continue
if limitations and nearby_atom not in limitations:
continue
nearby_atom_pairs.append((a.index, nearby_atom))
return nearby_atom_pairs
[docs]def get_close_coordinates(coords, dist):
"""
Use this function to find all coordinates within a specified distance of
each other in roughly O(N) time.
:param coords: List of (x, y, z) coordinates.
:type coords: list(list(float, float, float))
:param dist: distance threshold, in angstroms.
:type dist: float
:return: Returns indices to the input array for pairs of the coordinates
that are within the threshold of each other.
:rtype: tuple(int, int)
"""
coords_array = numpy.array(coords, dtype=numpy.float32)
cell_handle = mm.mmct_create_distance_cell_xyz2(coords_array, dist)
link_cell_within = []
for i, coord in enumerate(coords):
neighbor_mmlist = \
mm.mmct_query_distance_cell(cell_handle, *coord)
for idx in range(mm.mmlist_get_size(neighbor_mmlist)):
j = mm.mmlist_get(neighbor_mmlist, idx)
if j > i:
link_cell_within.append((i, j))
mm.mmlist_delete(neighbor_mmlist)
mm.mmct_delete_distance_cell(cell_handle)
return link_cell_within
[docs]def create_distance_cell(st, distance, honor_pbc=True):
"""
Create a DistanceCell for the given structure and cutoff. If struct has the
Chorus box properties, the distance cell will be PBC-aware.
:param st: The input structure, may have the Chorus box properties.
:type st: `schrodinger.structure.Structure`
:param distance: The cutoff for finding nearest neighbor atoms.
:type distance: float
:param honor_pbc: Whether to honor Periodic Boundary Conditions, if defined
as properties in the "st" structure. Default is True.
:type honor_pbc: bool
:return: The distance cell.
:rtype: `schrodinger.infra.structure.DistanceCell`
"""
if honor_pbc:
try:
pbc = cppstructure.PBC(st)
except IndexError:
# PBC properties are not present
pbc = None
else:
pbc = None
if pbc:
dcell = cppstructure.DistanceCell(st, distance, pbc)
else:
dcell = cppstructure.DistanceCell(st, distance)
return dcell
[docs]def any_atom_in_distance_cell(distance_cell, st):
"""
Check that if any atom of `st` lies in the `distance_cell`.
:param distance_cell: A DistanceCell object
:type distance_cell: schrodinger.infra.structure.DistanceCell
:param st: A structure object
:type st: schrodinger.structure.Structure
:return: True if any atom of `st` lies in the `dcell` else False
:rtype: bool
"""
for xyz in st.getXYZ():
match = distance_cell.query_atoms(*xyz)
if match:
return True
return False
[docs]def get_atoms_close_to_structure(st, other_st, cutoff, honor_pbc=True):
"""
Returns a list of atoms from st that are within the threshold distance
of st2.
Example: Get a list of receptor atoms close to the ligand:
close_atoms = measure.get_atoms_close_to_structure(re_st, lig_st, 3.0)
:param st: Structure atoms from wich should be analyzed/returned.
:type st: `structure.Structure`
:param other_st: Query structure.
:type other_st: `structure.Structure`
:param cutoff: Distance theshold.
:type cutoff: float
:param honor_pbc: Honor Periodic Boundary Conditions, if defined as
properties in the "st" structure. Default is True.
:type honor_pbc: bool
"""
dcell = create_distance_cell(st, cutoff, honor_pbc)
close_atoms = set()
for xyz in other_st.getXYZ():
for match in dcell.query_atoms(*xyz):
close_atoms.add(match.getIndex())
return sorted(close_atoms)
[docs]def get_atoms_close_to_subset(st, other_atoms, cutoff, honor_pbc=True):
"""
Returns a list of atoms from that are within the threshold distance
of "other_atoms" subset, and are not themselves in that subset.
Example: Get a list of receptor atoms close to the ligand:
close_atoms = measure.get_atoms_close_to_subset(st, lig_atoms, 3.0)
:param st: Structure atoms from wich should be analyzed/returned.
:type st: `structure.Structure`
:param other_atoms: Query atoms.
:type other_atoms: list of int
:param cutoff: Distance theshold.
:type cutoff: float
:param honor_pbc: Honor Periodic Boundary Conditions, if defined as
properties in the "st" structure. Default is True.
:type honor_pbc: bool
"""
dcell = create_distance_cell(st, cutoff, honor_pbc)
close_atoms = set()
for query_anum in other_atoms:
xyz = st.atom[query_anum].xyz
for match in dcell.query_atoms(*xyz):
close_anum = match.getIndex()
if close_anum not in other_atoms:
close_atoms.add(close_anum)
return sorted(close_atoms)
[docs]def get_shortest_distance(st, atoms=None, st2=None, cutoff=numpy.inf):
"""
Determines the shortest distance and indices of the nearest atoms
between two structures or between a groups of atoms in a single
structure.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
:type st: `schrodinger.structure.Structure`
:param st: Structure containing group(s) of atoms for nearest distance
search.
:type atoms: list(int)
:param atoms: If specified, the distances between this group of atoms and
all other atoms in st are evaluated. Either atoms or st2, but
not both, must be specified.
:type st2: `schrodinger.structure.Structure`
:param st2: Structure of second group of atoms for nearest-distance search.
Either st2 or atoms, but not both, must be specified.
:type cutoff: float
:param cutoff: Cutoff distance in Angstroms for nearest-distance search
(by default no cutoff is used). Setting this parameter can
speed the calculation by considering only points between
sets that are within the cutoff value. None will be returned
if no neighbors are found within the specified cutoff.
:rtype: tuple of float, int, int or None
:return: A tuple containing the nearest distance between atoms and the
indices of the closest atoms between each set.
"""
if st2 is None and atoms is None:
raise ValueError("Either atoms or st2 must be specified.")
elif st2 is not None and atoms is not None:
raise ValueError("Cannot specify both atoms and st2.")
st_coords = st.getXYZ()
if st2:
set1_coords = st_coords
set2_coords = st2.getXYZ()
else:
if len(atoms) == 0:
raise ValueError("None of the structure atoms were included in "
"atoms list")
set2_atoms = [
ai for ai in range(1, st.atom_total + 1) if ai not in atoms
]
set1_coords = [st_coords[ai - 1] for ai in atoms]
set2_coords = [st_coords[ai - 1] for ai in set2_atoms]
if len(set2_coords) == 0:
raise ValueError(("All structure atoms were "
"included in atoms (no "
"second group specified)"))
kd_tree = cKDTree(set1_coords)
distances, nn_indices = kd_tree.query(set2_coords,
distance_upper_bound=cutoff)
min_distance = min(distances)
if min_distance == numpy.inf:
# None of the distances where within the cutoff:
return None
# Atom from set 2 for the shortest distance match:
set2_index = numpy.where(distances == min_distance)[0][0]
# Corresponding atom from set 1:
set1_index = nn_indices[set2_index]
if st2:
anum_1 = set1_index + 1
anum_2 = set2_index + 1
else:
anum_1 = atoms[set1_index]
anum_2 = set2_atoms[set2_index]
return (min_distance, anum_1, anum_2)
[docs]def get_atoms_close_to_point(st, xyz, cutoff, honor_pbc=True):
"""
Returns a list of atoms from st that are within the threshold distance
of a point, which is defined by its xyz coordinates..
Example: Get a list of receptor atoms close to a point:
close_atoms = measure.get_atoms_close_to_structure(re_st, xyz, 3.0)
:param st: Structure atoms from wich should be analyzed/returned.
:type st: `structure.Structure`
:param xyz: xyz coordinates of point
:type xyz: list
:param cutoff: Distance theshold.
:type cutoff: float
:param honor_pbc: Honor Periodic Boundary Conditions, if defined as
properties in the "st" structure. Default is True.
:type honor_pbc: bool
:return: list of atoms close to a point
:rtype: list
"""
dcell = create_distance_cell(st, cutoff, honor_pbc)
atoms = []
for match in dcell.query_atoms(*xyz):
atoms.append(match.getIndex())
return atoms
[docs]def dist_cell_iterator(st,
dist=None,
atoms=None,
cell_handle=None,
delete_dist_cell=True):
"""
Create an iterator that uses a distance cell to iterate through
neighbors of the specified atoms
:param st: The structure to examine
:type st: `schrodinger.structure.Structure`
:param dist: The distance cutoff for calculating neighbors. Either dist or
cell_handle must be given, but not both.
:type dist: float
:param atoms: A list of atom numbers to calculate neighbors for. If not,
given, st.atom will be used.
:type atoms: list
:param cell_handle: A handle to an existing distance cell. If not given, a
new distance cell will be created with distance dist. Either dist or
cell_handle must be given, but not both.
:type cell_handle: int
:param delete_dist_cell: If cell_handle is given and delete_dist_cell is
True, then distance cell cell_handle will be deleted after iteration is
complete. Has no effect if cell_handle is not given. Defaults to True.
:type delete_dist_cell: bool
:return: An iterator that iterates through neighbors of the specified
atoms
:rtype: iter
:raise ValueError: If both dist and cell_handle are not None
:deprecated: The DistanceCellIterator class provides the same functionality
as this function but with increased flexibility
"""
if dist is not None and cell_handle is not None:
raise ValueError("Cannot provide a distance and an existing distance "
"cell handle to dist_cell_iterator")
elif dist is None and cell_handle is None:
raise ValueError("Must provide either a distance or an existing "
"distance cell handle to dist_cell_iterator")
if cell_handle is None:
cell_handle = mm.mmct_create_distance_cell(st.handle, dist)
delete_dist_cell = True
if atoms is None:
atoms = range(1, st.atom_total + 1)
for atom_num in atoms:
atom = st.atom[atom_num]
coords = atom.xyz
neighbor_mmlist = mm.mmct_query_distance_cell(cell_handle, *coords)
neighbor_pylist = mmlist._mmlist_to_pylist(neighbor_mmlist)
mm.mmlist_delete(neighbor_mmlist)
yield (atom_num, neighbor_pylist)
if delete_dist_cell:
mm.mmct_delete_distance_cell(cell_handle)
[docs]class DistanceCellIterator(object):
"""
Iterate through neighbors of specified atoms. This class replaces the
dist_cell_iterator function.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
"""
[docs] def __init__(self, struc, dist, atoms=None):
"""
Construct the distance cell to use for finding neighbors
:param struc: The structure to use for building the distance cell
:type struc: `schrodinger.structure.Structure`
:param dist: The distance cutoff for calculating neighbors
:type dist: float
:param atoms: A list of atom numbers for `struc`. If given, the
distance cell will only contain the specified subset of atoms, so all
other atoms will be ignored when calculating neighbors. If not given,
all atoms will be used.
:type atoms: list
"""
self._struc = struc
if atoms is None:
cell_st = struc
self._renumber_map = None
else:
cell_st = struc.extract(atoms)
self._renumber_map = atoms
# Atoms are 1-indexed. We add a None at index 0 so that indices in
# this list match the atom numbers.
self._renumber_map.insert(0, None)
self._cell_handle = mm.mmct_create_distance_cell(cell_st.handle, dist)
def __del__(self):
mm.mmct_delete_distance_cell(self._cell_handle)
[docs] def iterateNeighboringAtoms(self, struc=None, atoms=None):
"""
Iterate over neighboring atoms (atoms within `dist` Angstrom of each
other).
NOTE: Periodic boundary conditions (PBC) are NOT honored.
:param struc: The query structure. Neighbors will be found for each
atom of this structure. If not given, the structure
passed to `__init__` will be used as the query
structure.
:type struc: `schrodinger.structure.Structure`
:param atoms: A list of atom numbers for the query structure. If given,
only the specified atoms will be examined. If not given,
all atoms of the query structure will be used.
:type atoms: list(int)
:return: A generator that iterates through neighbors. Each iteration
will yield a tuple of (atom index, list of neighboring atom
indicies)
:rtype: generator
Note: This method returns atom indices instead of atom object due to
speed concerns. Profiling (using timeit) showed that:
- returning atom indices instead of atom objects reduced runtime by
~25%
- using `coord = mm.mmct_atom_get_xyz(struc.handle, atom_num)` in
place of `coord = struc.atom[atom_num].xyz` also reduced runtime
by ~25%
"""
if struc is None:
struc = self._struc
if atoms is None:
atoms = range(1, struc.atom_total + 1)
for atom_num in atoms:
coord = mm.mmct_atom_get_xyz(struc.handle, atom_num)
neighbor_mmlist = mm.mmct_query_distance_cell(
self._cell_handle, *coord)
neighbor_nums = mmlist._mmlist_to_pylist(neighbor_mmlist)
mm.mmlist_delete(neighbor_mmlist)
if self._renumber_map is not None:
neighbor_nums = [self._renumber_map[i] for i in neighbor_nums]
yield (atom_num, neighbor_nums)
[docs]def measure_distance(atom1, atom2):
"""
Measure the distance between two atoms.
All atom arguments must be _StructureAtom objects (returned from the
Structure.atom list, and can be from different structures), or XYZ
coordinates, as lists or numpy arrays.
See also the Structure.measure method. It can use integer atom indices
in addition to _StructureAtom objects, but is restricted to measurements
within the structure and cannot do plane angle measurements.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
"""
if isinstance(atom1, structure._StructureAtom):
return mm.mmct_atom_get_distance_s(atom1._ct.handle, atom1._index,
atom2._ct.handle, atom2._index)
else:
# Coordinates were passed in
return numpy.linalg.norm(numpy.asarray(atom1) - numpy.asarray(atom2))
[docs]def measure_bond_angle(atom1, atom2, atom3):
"""
Measure the atom between 3 specified atoms.
All atom arguments must be _StructureAtom objects (returned from the
Structure.atom list, and can be from different structures), or XYZ
coordinates, as lists or numpy arrays.
See also the Structure.measure method. It can use integer atom indices
in addition to _StructureAtom objects, but is restricted to measurements
within the structure and cannot do plane angle measurements.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
"""
if isinstance(atom1, structure._StructureAtom):
return mm.mmct_atom_get_bond_angle_s(atom1._ct.handle, atom1._index,
atom2._ct.handle, atom2._index,
atom3._ct.handle, atom3._index)
else:
# Coordinates were passed in
atom1 = mm.Cartesian(*atom1)
atom2 = mm.Cartesian(*atom2)
atom3 = mm.Cartesian(*atom3)
angle_radians = mm.Cartesian.angle(atom1, atom2, atom3)
return math.degrees(angle_radians)
[docs]def measure_dihedral_angle(atom1, atom2, atom3, atom4):
"""
Measure the dihedral angle between the specified atoms.
All atom arguments must be _StructureAtom objects (returned from the
Structure.atom list, and can be from different structures), or XYZ
coordinates, as lists or numpy arrays.
See also the Structure.measure method. It can use integer atom indices
in addition to _StructureAtom objects, but is restricted to measurements
within the structure and cannot do plane angle measurements.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
"""
if isinstance(atom1, structure._StructureAtom):
return mm.mmct_atom_get_dihedral_angle_s(atom1._ct.handle, atom1._index,
atom2._ct.handle, atom2._index,
atom3._ct.handle, atom3._index,
atom4._ct.handle, atom4._index)
else:
# Coordinates were passed in
atom1 = mm.Cartesian(*atom1)
atom2 = mm.Cartesian(*atom2)
atom3 = mm.Cartesian(*atom3)
atom4 = mm.Cartesian(*atom4)
angle_radians = mm.Cartesian.dihedral(atom1, atom2, atom3, atom4)
return math.degrees(angle_radians)
[docs]def measure_plane_angle(atom1,
atom2,
atom3,
atom4,
atom5,
atom6,
minangle=False):
"""
Measure the angle between planes of the provided atoms.
All atom arguments must be _StructureAtom objects (returned from the
Structure.atom list, and can be from different structures), or XYZ
coordinates, as lists or numpy arrays.
See also the Structure.measure method. It can use integer atom indices
in addition to _StructureAtom objects, but is restricted to measurements
within the structure and cannot do plane angle measurements.
NOTE: Periodic boundary conditions (PBC) are NOT honored.
Parameters
minangle (bool)
This applies to the planar angle calculation and if True restricts
the angle to the range [0, 90] degrees. That is, it treats the order
of atoms defining a plane as unimportant, and the directionality of
the plane normals is ignored.
"""
if isinstance(atom1, structure._StructureAtom):
atom1 = atom1.xyz
atom2 = atom2.xyz
atom3 = atom3.xyz
atom4 = atom4.xyz
atom5 = atom5.xyz
atom6 = atom6.xyz
atom1 = mm.Cartesian(*atom1)
atom2 = mm.Cartesian(*atom2)
atom3 = mm.Cartesian(*atom3)
atom4 = mm.Cartesian(*atom4)
atom5 = mm.Cartesian(*atom5)
atom6 = mm.Cartesian(*atom6)
normal1 = mm.get_normal_vector_to_three_points(atom1, atom2, atom3)
normal2 = mm.get_normal_vector_to_three_points(atom4, atom5, atom6)
angle_degrees = mm.get_angle_between_two_vectors(normal1, normal2)
if minangle and angle_degrees > 90:
angle_degrees = 180 - angle_degrees
return angle_degrees
[docs]class LinearError(ValueError):
"""
A class indicating a plane could not be computed due to all atoms being
linear
"""
[docs]def fit_plane_to_points(coords):
"""
Fit a plane to a set of xyz coordinates
This method comes from
http://stackoverflow.com/questions/15959411/fit-points-to-a-plane-algorithms-how-to-iterpret-results
It is the SVD method appearing there.
:type coords: `numpy.array`
:param coords: The coordinates to fit the plane to, such as come from
struct.getXYZ()
:rtype: `numpy.array`
:return: An array of 3 floats that defines a normalized vector that is
normal to the best-fit plane
:raises LinearError: If there are only 3 coordinates and they are
colinear. If there are 4 or more colinear coordinates, one of the
infinite number of possible vectors will be returned.
:raises ValueError: If there are fewer than 3 sets of coordinates
:raises numpy.linalg.LinAlgError: If the SVD does not converge (note - I
was unable to find a case where this happened)
"""
# There are 5 methods coded on that page in Python that are supposed to
# give equivalent answers. They do not in most cases. In my hands, only
# two of the
# methods consistently give similar and reasonable answers - fitPlaneSVD
# and fitPlaneLTSQ. They are both very fast and approximately the same
# speed. I've implemented the SVD method because that seems most
# frequently mentioned for finding the best fit plane. This SVD method
# appears numerically stable even if all points are co-planar. It cannot
# handle only three points, though, so the normal is computed directly
# for that case.
if len(coords) < 3:
raise ValueError('At least 3 sets of coordinates must be supplied')
if len(coords) == 3:
# SVD requires at least 4 coordinates
narrays = [numpy.array(x) for x in coords]
vec1 = narrays[1] - narrays[0]
vec2 = narrays[2] - narrays[0]
norm = transform.get_normalized_vector(numpy.cross(vec1, vec2))
if not transform.get_vector_magnitude(norm):
raise LinearError('All three coordinates are colinear')
return norm
[rows, cols] = coords.shape
# Set up constraint equations of the form AB = 0,
# where B is a column vector of the plane coefficients
# in the form b(1)*X + b(2)*Y +b(3)*Z + b(4) = 0.
pmat = numpy.ones((rows, 1))
ab = numpy.hstack([coords, pmat])
[uval, dval, vval] = numpy.linalg.svd(ab, 0)
# Solution is last column of vval.
bvec = vval[3, :]
nn = numpy.linalg.norm(bvec[0:3])
bvec = bvec / nn
return bvec[0:3]
[docs]def fit_plane_to_points_as_list(coords):
"""
Fit a plane to a set of xyz coordinates
This method is used by Maestro because it's easier to access a list
from C++ than a numpy array.
This method comes from
http://stackoverflow.com/questions/15959411/fit-points-to-a-plane-algorithms-how-to-iterpret-results
It is the SVD method appearing there.
:type coords: `numpy.array`
:param coords: The coordinates to fit the plane to, such as come from
struct.getXYZ()
:rtype: `list`
:return: An list of 3 floats that defines a normalized vector that is
normal to the best-fit plane
:raises LinearError: If there are only 3 coordinates and they are
colinear. If there are 4 or more colinear coordinates, one of the
infinite number of possible vectors will be returned.
:raises ValueError: If there are fewer than 3 sets of coordinates
:raises numpy.linalg.LinAlgError: If the SVD does not converge (note - I
was unable to find a case where this happened)
"""
result = fit_plane_to_points(coords)
return result.tolist()
#EOF