Source code for schrodinger.application.phase.packages.cat_hypo_converter

"""
This module contains the CatHypoConverter class, which converts a Catalyst
pharmacophore hypothesis provided in BioCAD CHM format to a Phase static
feature hypothesis.
"""

import os

import numpy

from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import mm
from schrodinger.infra import phase
from schrodinger.utils import fileutils

CAT_ACCEPTOR = "HB_ACCEPTOR"
CAT_DONOR = "HB_DONOR"
CAT_HYDROPHOBIC = "HYDROPHOBIC"
CAT_NEGATIVE = "NEG_IONIZABLE"
CAT_POSITIVE = "POS_IONIZABLE"
CAT_AROMATIC = "RING_AROMATIC"

# Catalyst coordinates are in picometers.
PICO_TO_ANGSTROM = 0.01


[docs]class CatHypoConverter: """ Converts a Catalyst hypothesis to a Phase hypothesis. """
[docs] def __init__(self, chmfile, hypo_id=None, fd=None): """ Constructor taking the name of a Catalyst hypothesis file, an ID for the Phase hypothesis, and Phase feature definitions. If projected types are set to True in the A/D definitions, the Phase hypothesis will contain projected A/D features instead of atom-centered features with vectors. :param chmfile: Catalyst hypothesis file. Extension must be ".chm". :type chmfile: str :param hypo_id: Phase hypothesis ID. If None, the basename of chmfile is used. :param fd: Feature definitions. If None, default Phase feature definitions are used. :type fd: list(phase.PhpFeatureDefinition) """ if fileutils.splitext(chmfile)[1] != ".chm": mesg = f'Illegal Catalyst hypothesis file type: \"{chmfile}\"' raise OSError(mesg) if not os.path.isfile(chmfile): raise OSError(f'Catalyst hypothesis file \"{chmfile}\" not found') self._chmfile = chmfile self._hypo_id = hypo_id or os.path.splitext( os.path.basename(chmfile))[0] self._fd = fd or phase_utils.get_default_feature_definitions() self._proj_only = { phase.FEATURE_TYPE_ACCEPTOR: False, phase.FEATURE_TYPE_DONOR: False } self._proj_dist = { phase.FEATURE_TYPE_ACCEPTOR: 1.8, phase.FEATURE_TYPE_DONOR: 1.8 } for feature in self._fd: feature_type = feature.getFeatureName() if feature_type in self._proj_only: self._proj_only[feature_type] = feature.getProjectedType() self._proj_dist[feature_type] = feature.getExtendDistance() self._fit_block = [] self._hypo_sites = [] self._phase_hypo = None self._convertToPhase()
[docs] def getPhaseHypo(self): """ Returns a copy of the converted hypothesis. :return: The Phase hypothesis. :rtype: phase.PhpHypoAdaptor """ return phase.PhpHypoAdaptor(self._phase_hypo)
def _addAcceptor(self, feature_name, frag_type): """ Adds an acceptor feature to the list of hypothesis sites. :param feature_name: The Catalyst acceptor feature name. :type feature_name: str :param frag_type: Static fragment type. :type frag_type: str """ base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs( feature_name, 2) # For heavy atom X, the Catalyst acceptor configuration is X----Q, # where the X-Q distance is 3 angstroms. Shift Q toward X to give # it the correct Phase projected distance. proj_dist = self._proj_dist[phase.FEATURE_TYPE_ACCEPTOR] self._shiftCatFeature(base_xyz, proj_xyz, proj_dist) xyz = [] tol = base_tol if not self._proj_only[phase.FEATURE_TYPE_ACCEPTOR]: xyz.append(base_xyz) else: frag_type = phase.FRAG_TYPE_A0 tol = proj_tol xyz.append(proj_xyz) self._addSiteToHypo(frag_type, xyz, tol) def _addAromatic(self, feature_name, frag_type): """ Adds an aromatic ring feature to the list of hypothesis sites. :param feature_name: The Catalyst aromatic ring feature name. :type feature_name: str :param frag_type: Static fragment type. :type frag_type: str """ base_xyz, base_tol, proj_xyz1, _ = self._getCatBlobs(feature_name, 2) # Need two projected points that are 1.8 angstroms from centroid. self._shiftCatFeature(base_xyz, proj_xyz1, 1.8) proj_xyz2 = numpy.subtract(numpy.multiply(2.0, base_xyz), proj_xyz1) xyz = [base_xyz, proj_xyz1, list(proj_xyz2)] self._addSiteToHypo(frag_type, xyz, base_tol) def _addDonor(self, feature_name, frag_type): """ Adds a donor feature to the list of hypothesis sites. :param feature_name: The Catalyst donor feature name. :type feature_name: str :param frag_type: Static fragment type. :type frag_type: str """ base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs( feature_name, 2) # For heavy atom X, the Catalyst donor configuration is X----Q # (i.e., no hydrogen atom), where the X-Q distance is 3 angstroms. # Shift Q toward X so that the separation is 1 + the correct Phase # projected distance. Then shift X by 1 angstrom toward Q so that # it's located approximately where we'd expect the hydrogen to be # if it were bonded to a nitrogen or oxygen. proj_dist = self._proj_dist[phase.FEATURE_TYPE_DONOR] self._shiftCatFeature(base_xyz, proj_xyz, 1.0 + proj_dist) self._shiftCatFeature(proj_xyz, base_xyz, proj_dist) xyz = [] tol = base_tol if not self._proj_only[phase.FEATURE_TYPE_DONOR]: xyz.append(base_xyz) else: frag_type = phase.FRAG_TYPE_D0 tol = proj_tol xyz.append(proj_xyz) self._addSiteToHypo(frag_type, xyz, tol) def _addPoint(self, feature_name, frag_type): """ Adds a hydrophobic, negative ionic, or positive ionic feature to the list of hypothesis sites. :param feature_name: The Catalyst feature name. :type feature_name: str :param frag_type: Static fragment type. :type frag_type: str """ xyz, tol = self._getCatBlobs(feature_name, 1) self._addSiteToHypo(frag_type, [xyz], tol) def _addSiteToHypo(self, frag_type, xyz, tol): """ Creates a static site and adds it to the list of hypothesis sites. :param frag_type: Static fragment type. :type frag_type: str :param xyz: Site coordinates. Includes base feature and projected point(s), if applicable. :type xyz: list(list(float)) :param tol: Positional tolerance. :type tol: float """ hypo_site = phase.createStaticSite(frag_type, xyz) hypo_site.setTol(tol) self._hypo_sites.append(hypo_site) def _addXvol(self): """ Adds excluded volumes to hypothesis. """ xvol = phase.PhpExclVol() for i, line in enumerate(self._fit_block): if "EXCLUDED_VOLUME" in line: # Coordinates and radius appear on the second and third lines # following this one. tokens = self._fit_block[i + 2].split(" ") xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)] tokens = self._fit_block[i + 3].split(" ") r = float(tokens[2]) * PICO_TO_ANGSTROM xvol.addSphere(xyz[0], xyz[1], xyz[2], r) if xvol.numSpheres(): self._phase_hypo.addXvol(xvol) def _convertToPhase(self): """ Peforms the Catalyst-->Phase hypothesis conversion. """ # Used to call the appropriate feature-adding function based on the # Catalyst feature type: FEATURE_FUNCTION = { CAT_ACCEPTOR: (self._addAcceptor, phase.FRAG_TYPE_A1), CAT_DONOR: (self._addDonor, phase.FRAG_TYPE_D1), CAT_HYDROPHOBIC: (self._addPoint, phase.FRAG_TYPE_H), CAT_NEGATIVE: (self._addPoint, phase.FRAG_TYPE_N), CAT_POSITIVE: (self._addPoint, phase.FRAG_TYPE_P), CAT_AROMATIC: (self._addAromatic, phase.FRAG_TYPE_R2) } self._readFitBlock() # The first line in self._fit_block looks something like this: # FIT fit ( HB_ACCEPTOR1 HB_DONOR2 HB_DONOR3 HYDROPHOBIC5 ) tokens = self._fit_block[0].split(" ") for token in tokens: # Process token if it starts with "HB_ACCEPTOR", "HB_DONOR", etc. for feature_type, feature_function in FEATURE_FUNCTION.items(): if token.startswith(feature_type): # Call the appropriate _add* function. function_name = feature_function[0] frag_type = feature_function[1] function_name(token, frag_type) # Sort hypothesis sites by feature type and add site numbers. self._hypo_sites.sort(key=lambda x: x.getSiteType()) for i, site in enumerate(self._hypo_sites): site.setSiteNumber(i) # Create a static feature hypothesis. self._phase_hypo = phase.PhpHypoAdaptor(self._hypo_id, self._hypo_sites, self._fd, mm.MMCT_INVALID_CT) # Build hypothesis attributes from sites so that tolerances are # bubbled up. self._phase_hypo.buildAllAttr(True) # No need to keep the rarely used radii attribute. self._phase_hypo.deleteAttr(phase.HYPO_ATTR_RAD) # Add excluded volumes. self._addXvol() def _getCatBlobs(self, feature_name, num_blobs): """ Scans the fit block for the requested number of BLOB records with the indicated feature name. Returns a list [xyz_i, tol_i,...] for i=1,...,num_blobs, where xyz_i holds the Cartesian coordinates of the ith blob and tol_i is the matching tolerance for the ith blob. In the available CHM examples, all 2-point blobs (acceptor, donor, ring) consist of a base/centroid blob, followed by a projected/normal blob. :param feature_name: The Catalyst feature name. :type feature_name: str :param num_blobs: The number of BLOB records to look for. Should be 1 for a single-point feature (H, N, P), and 2 for a vector feature (A, D, R). :return: list of xyz coordinates, matching tol for each blob. :rtype: list(list(float), float, etc.) """ blobs = [] num_blobs2 = num_blobs * 2 feature_pattern = feature_name + "/" for i, line in enumerate(self._fit_block): if "BLOB" in line and feature_pattern in line: # Coordinates and tolerance appear on the second and # third lines following this one. tokens = self._fit_block[i + 2].split(" ") xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)] tokens = self._fit_block[i + 3].split(" ") tol = float(tokens[2]) * PICO_TO_ANGSTROM blobs.append(xyz) blobs.append(tol) if len(blobs) == num_blobs2: break if len(blobs) != num_blobs2: mesg = "Could not find the requested number of blobs (%d) " + \ "for feature %s" raise RuntimeError(mesg % (num_blobs, feature_name)) return blobs def _readFitBlock(self): """ Reads the "FIT fit" block from the .chm file, which contains the hypothesis feature types, feature coordinates, and excluded volumes. """ keep_it = False with open(self._chmfile, 'r') as fh: for line in fh.readlines(): keep_it = keep_it or "FIT fit" in line if keep_it: self._fit_block.append(line.strip()) if not self._fit_block: mesg = f'File \"{self._chmfile}\" is not formatted correctly' raise RuntimeError(mesg) def _shiftCatFeature(self, xyz_fixed, xyz_move, dist): """ Shifts xyz_move toward or away from xyz_fixed so that the distance between the two points is dist. :param xyz_fixed: Stationary Cartesian coordinates. :type xyz_fixed: list[float] :param xyz_move: Cartesian coordinates to shift. :type xyz_move: list[float] """ axis = numpy.subtract(xyz_move, xyz_fixed) shift = numpy.multiply(axis, dist / numpy.linalg.norm(axis)) xyz_new = numpy.add(xyz_fixed, shift) # Modify xyz_move in-place. for i in range(3): xyz_move[i] = xyz_new[i]