"""
This module contains the CatHypoConverter class, which converts a Catalyst
pharmacophore hypothesis provided in BioCAD CHM format to a Phase static
feature hypothesis.
"""
import os
import numpy
from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import mm
from schrodinger.infra import phase
from schrodinger.utils import fileutils
CAT_ACCEPTOR = "HB_ACCEPTOR"
CAT_DONOR = "HB_DONOR"
CAT_HYDROPHOBIC = "HYDROPHOBIC"
CAT_NEGATIVE = "NEG_IONIZABLE"
CAT_POSITIVE = "POS_IONIZABLE"
CAT_AROMATIC = "RING_AROMATIC"
# Catalyst coordinates are in picometers.
PICO_TO_ANGSTROM = 0.01
[docs]class CatHypoConverter:
"""
Converts a Catalyst hypothesis to a Phase hypothesis.
"""
[docs] def __init__(self, chmfile, hypo_id=None, fd=None):
"""
Constructor taking the name of a Catalyst hypothesis file, an ID for
the Phase hypothesis, and Phase feature definitions. If projected
types are set to True in the A/D definitions, the Phase hypothesis
will contain projected A/D features instead of atom-centered features
with vectors.
:param chmfile: Catalyst hypothesis file. Extension must be ".chm".
:type chmfile: str
:param hypo_id: Phase hypothesis ID. If None, the basename of chmfile
is used.
:param fd: Feature definitions. If None, default Phase feature
definitions are used.
:type fd: list(phase.PhpFeatureDefinition)
"""
if fileutils.splitext(chmfile)[1] != ".chm":
mesg = f'Illegal Catalyst hypothesis file type: \"{chmfile}\"'
raise OSError(mesg)
if not os.path.isfile(chmfile):
raise OSError(f'Catalyst hypothesis file \"{chmfile}\" not found')
self._chmfile = chmfile
self._hypo_id = hypo_id or os.path.splitext(
os.path.basename(chmfile))[0]
self._fd = fd or phase_utils.get_default_feature_definitions()
self._proj_only = {
phase.FEATURE_TYPE_ACCEPTOR: False,
phase.FEATURE_TYPE_DONOR: False
}
self._proj_dist = {
phase.FEATURE_TYPE_ACCEPTOR: 1.8,
phase.FEATURE_TYPE_DONOR: 1.8
}
for feature in self._fd:
feature_type = feature.getFeatureName()
if feature_type in self._proj_only:
self._proj_only[feature_type] = feature.getProjectedType()
self._proj_dist[feature_type] = feature.getExtendDistance()
self._fit_block = []
self._hypo_sites = []
self._phase_hypo = None
self._convertToPhase()
[docs] def getPhaseHypo(self):
"""
Returns a copy of the converted hypothesis.
:return: The Phase hypothesis.
:rtype: phase.PhpHypoAdaptor
"""
return phase.PhpHypoAdaptor(self._phase_hypo)
def _addAcceptor(self, feature_name, frag_type):
"""
Adds an acceptor feature to the list of hypothesis sites.
:param feature_name: The Catalyst acceptor feature name.
:type feature_name: str
:param frag_type: Static fragment type.
:type frag_type: str
"""
base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs(
feature_name, 2)
# For heavy atom X, the Catalyst acceptor configuration is X----Q,
# where the X-Q distance is 3 angstroms. Shift Q toward X to give
# it the correct Phase projected distance.
proj_dist = self._proj_dist[phase.FEATURE_TYPE_ACCEPTOR]
self._shiftCatFeature(base_xyz, proj_xyz, proj_dist)
xyz = []
tol = base_tol
if not self._proj_only[phase.FEATURE_TYPE_ACCEPTOR]:
xyz.append(base_xyz)
else:
frag_type = phase.FRAG_TYPE_A0
tol = proj_tol
xyz.append(proj_xyz)
self._addSiteToHypo(frag_type, xyz, tol)
def _addAromatic(self, feature_name, frag_type):
"""
Adds an aromatic ring feature to the list of hypothesis sites.
:param feature_name: The Catalyst aromatic ring feature name.
:type feature_name: str
:param frag_type: Static fragment type.
:type frag_type: str
"""
base_xyz, base_tol, proj_xyz1, _ = self._getCatBlobs(feature_name, 2)
# Need two projected points that are 1.8 angstroms from centroid.
self._shiftCatFeature(base_xyz, proj_xyz1, 1.8)
proj_xyz2 = numpy.subtract(numpy.multiply(2.0, base_xyz), proj_xyz1)
xyz = [base_xyz, proj_xyz1, list(proj_xyz2)]
self._addSiteToHypo(frag_type, xyz, base_tol)
def _addDonor(self, feature_name, frag_type):
"""
Adds a donor feature to the list of hypothesis sites.
:param feature_name: The Catalyst donor feature name.
:type feature_name: str
:param frag_type: Static fragment type.
:type frag_type: str
"""
base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs(
feature_name, 2)
# For heavy atom X, the Catalyst donor configuration is X----Q
# (i.e., no hydrogen atom), where the X-Q distance is 3 angstroms.
# Shift Q toward X so that the separation is 1 + the correct Phase
# projected distance. Then shift X by 1 angstrom toward Q so that
# it's located approximately where we'd expect the hydrogen to be
# if it were bonded to a nitrogen or oxygen.
proj_dist = self._proj_dist[phase.FEATURE_TYPE_DONOR]
self._shiftCatFeature(base_xyz, proj_xyz, 1.0 + proj_dist)
self._shiftCatFeature(proj_xyz, base_xyz, proj_dist)
xyz = []
tol = base_tol
if not self._proj_only[phase.FEATURE_TYPE_DONOR]:
xyz.append(base_xyz)
else:
frag_type = phase.FRAG_TYPE_D0
tol = proj_tol
xyz.append(proj_xyz)
self._addSiteToHypo(frag_type, xyz, tol)
def _addPoint(self, feature_name, frag_type):
"""
Adds a hydrophobic, negative ionic, or positive ionic feature to the
list of hypothesis sites.
:param feature_name: The Catalyst feature name.
:type feature_name: str
:param frag_type: Static fragment type.
:type frag_type: str
"""
xyz, tol = self._getCatBlobs(feature_name, 1)
self._addSiteToHypo(frag_type, [xyz], tol)
def _addSiteToHypo(self, frag_type, xyz, tol):
"""
Creates a static site and adds it to the list of hypothesis sites.
:param frag_type: Static fragment type.
:type frag_type: str
:param xyz: Site coordinates. Includes base feature and projected
point(s), if applicable.
:type xyz: list(list(float))
:param tol: Positional tolerance.
:type tol: float
"""
hypo_site = phase.createStaticSite(frag_type, xyz)
hypo_site.setTol(tol)
self._hypo_sites.append(hypo_site)
def _addXvol(self):
"""
Adds excluded volumes to hypothesis.
"""
xvol = phase.PhpExclVol()
for i, line in enumerate(self._fit_block):
if "EXCLUDED_VOLUME" in line:
# Coordinates and radius appear on the second and third lines
# following this one.
tokens = self._fit_block[i + 2].split(" ")
xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)]
tokens = self._fit_block[i + 3].split(" ")
r = float(tokens[2]) * PICO_TO_ANGSTROM
xvol.addSphere(xyz[0], xyz[1], xyz[2], r)
if xvol.numSpheres():
self._phase_hypo.addXvol(xvol)
def _convertToPhase(self):
"""
Peforms the Catalyst-->Phase hypothesis conversion.
"""
# Used to call the appropriate feature-adding function based on the
# Catalyst feature type:
FEATURE_FUNCTION = {
CAT_ACCEPTOR: (self._addAcceptor, phase.FRAG_TYPE_A1),
CAT_DONOR: (self._addDonor, phase.FRAG_TYPE_D1),
CAT_HYDROPHOBIC: (self._addPoint, phase.FRAG_TYPE_H),
CAT_NEGATIVE: (self._addPoint, phase.FRAG_TYPE_N),
CAT_POSITIVE: (self._addPoint, phase.FRAG_TYPE_P),
CAT_AROMATIC: (self._addAromatic, phase.FRAG_TYPE_R2)
}
self._readFitBlock()
# The first line in self._fit_block looks something like this:
# FIT fit ( HB_ACCEPTOR1 HB_DONOR2 HB_DONOR3 HYDROPHOBIC5 )
tokens = self._fit_block[0].split(" ")
for token in tokens:
# Process token if it starts with "HB_ACCEPTOR", "HB_DONOR", etc.
for feature_type, feature_function in FEATURE_FUNCTION.items():
if token.startswith(feature_type):
# Call the appropriate _add* function.
function_name = feature_function[0]
frag_type = feature_function[1]
function_name(token, frag_type)
# Sort hypothesis sites by feature type and add site numbers.
self._hypo_sites.sort(key=lambda x: x.getSiteType())
for i, site in enumerate(self._hypo_sites):
site.setSiteNumber(i)
# Create a static feature hypothesis.
self._phase_hypo = phase.PhpHypoAdaptor(self._hypo_id, self._hypo_sites,
self._fd, mm.MMCT_INVALID_CT)
# Build hypothesis attributes from sites so that tolerances are
# bubbled up.
self._phase_hypo.buildAllAttr(True)
# No need to keep the rarely used radii attribute.
self._phase_hypo.deleteAttr(phase.HYPO_ATTR_RAD)
# Add excluded volumes.
self._addXvol()
def _getCatBlobs(self, feature_name, num_blobs):
"""
Scans the fit block for the requested number of BLOB records with
the indicated feature name. Returns a list [xyz_i, tol_i,...] for
i=1,...,num_blobs, where xyz_i holds the Cartesian coordinates of the
ith blob and tol_i is the matching tolerance for the ith blob. In the
available CHM examples, all 2-point blobs (acceptor, donor, ring)
consist of a base/centroid blob, followed by a projected/normal blob.
:param feature_name: The Catalyst feature name.
:type feature_name: str
:param num_blobs: The number of BLOB records to look for. Should be
1 for a single-point feature (H, N, P), and 2 for a vector
feature (A, D, R).
:return: list of xyz coordinates, matching tol for each blob.
:rtype: list(list(float), float, etc.)
"""
blobs = []
num_blobs2 = num_blobs * 2
feature_pattern = feature_name + "/"
for i, line in enumerate(self._fit_block):
if "BLOB" in line and feature_pattern in line:
# Coordinates and tolerance appear on the second and
# third lines following this one.
tokens = self._fit_block[i + 2].split(" ")
xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)]
tokens = self._fit_block[i + 3].split(" ")
tol = float(tokens[2]) * PICO_TO_ANGSTROM
blobs.append(xyz)
blobs.append(tol)
if len(blobs) == num_blobs2:
break
if len(blobs) != num_blobs2:
mesg = "Could not find the requested number of blobs (%d) " + \
"for feature %s"
raise RuntimeError(mesg % (num_blobs, feature_name))
return blobs
def _readFitBlock(self):
"""
Reads the "FIT fit" block from the .chm file, which contains the
hypothesis feature types, feature coordinates, and excluded volumes.
"""
keep_it = False
with open(self._chmfile, 'r') as fh:
for line in fh.readlines():
keep_it = keep_it or "FIT fit" in line
if keep_it:
self._fit_block.append(line.strip())
if not self._fit_block:
mesg = f'File \"{self._chmfile}\" is not formatted correctly'
raise RuntimeError(mesg)
def _shiftCatFeature(self, xyz_fixed, xyz_move, dist):
"""
Shifts xyz_move toward or away from xyz_fixed so that the distance
between the two points is dist.
:param xyz_fixed: Stationary Cartesian coordinates.
:type xyz_fixed: list[float]
:param xyz_move: Cartesian coordinates to shift.
:type xyz_move: list[float]
"""
axis = numpy.subtract(xyz_move, xyz_fixed)
shift = numpy.multiply(axis, dist / numpy.linalg.norm(axis))
xyz_new = numpy.add(xyz_fixed, shift)
# Modify xyz_move in-place.
for i in range(3):
xyz_move[i] = xyz_new[i]