Source code for schrodinger.application.phase.packages.cat_hypo_converter

"""
This module contains the CatHypoConverter class, which converts a Catalyst
pharmacophore hypothesis provided in BioCAD CHM format to a Phase static
feature hypothesis.
"""

import os

import numpy

from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import mm
from schrodinger.infra import phase
from schrodinger.utils import fileutils

CAT_ACCEPTOR = "HB_ACCEPTOR"
CAT_DONOR = "HB_DONOR"
CAT_HYDROPHOBIC = "HYDROPHOBIC"
CAT_NEGATIVE = "NEG_IONIZABLE"
CAT_POSITIVE = "POS_IONIZABLE"
CAT_AROMATIC = "RING_AROMATIC"

# Catalyst coordinates are in picometers.
PICO_TO_ANGSTROM = 0.01


[docs]class CatHypoConverter:
    """
    Converts a Catalyst hypothesis to a Phase hypothesis.
    """

[docs]    def __init__(self, chmfile, hypo_id=None, fd=None):
        """
        Constructor taking the name of a Catalyst hypothesis file, an ID for
        the Phase hypothesis, and Phase feature definitions. If projected
        types are set to True in the A/D definitions, the Phase hypothesis
        will contain projected A/D features instead of atom-centered features
        with vectors.

        :param chmfile: Catalyst hypothesis file. Extension must be ".chm".
        :type chmfile: str

        :param hypo_id: Phase hypothesis ID. If None, the basename of chmfile
                is used.

        :param fd: Feature definitions. If None, default Phase feature
                definitions are used.
        :type fd: list(phase.PhpFeatureDefinition)
        """

        if fileutils.splitext(chmfile)[1] != ".chm":
            mesg = f'Illegal Catalyst hypothesis file type: \"{chmfile}\"'
            raise OSError(mesg)

        if not os.path.isfile(chmfile):
            raise OSError(f'Catalyst hypothesis file \"{chmfile}\" not found')

        self._chmfile = chmfile
        self._hypo_id = hypo_id or os.path.splitext(
            os.path.basename(chmfile))[0]
        self._fd = fd or phase_utils.get_default_feature_definitions()
        self._proj_only = {
            phase.FEATURE_TYPE_ACCEPTOR: False,
            phase.FEATURE_TYPE_DONOR: False
        }
        self._proj_dist = {
            phase.FEATURE_TYPE_ACCEPTOR: 1.8,
            phase.FEATURE_TYPE_DONOR: 1.8
        }
        for feature in self._fd:
            feature_type = feature.getFeatureName()
            if feature_type in self._proj_only:
                self._proj_only[feature_type] = feature.getProjectedType()
                self._proj_dist[feature_type] = feature.getExtendDistance()
        self._fit_block = []
        self._hypo_sites = []
        self._phase_hypo = None
        self._convertToPhase()

[docs]    def getPhaseHypo(self):
        """
        Returns a copy of the converted hypothesis.

        :return: The Phase hypothesis.
        :rtype: phase.PhpHypoAdaptor
        """

        return phase.PhpHypoAdaptor(self._phase_hypo)

    def _addAcceptor(self, feature_name, frag_type):
        """
        Adds an acceptor feature to the list of hypothesis sites.

        :param feature_name: The Catalyst acceptor feature name.
        :type feature_name: str

        :param frag_type: Static fragment type.
        :type frag_type: str
        """

        base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs(
            feature_name, 2)

        # For heavy atom X, the Catalyst acceptor configuration is X----Q,
        # where the X-Q distance is 3 angstroms. Shift Q toward X to give
        # it the correct Phase projected distance.
        proj_dist = self._proj_dist[phase.FEATURE_TYPE_ACCEPTOR]
        self._shiftCatFeature(base_xyz, proj_xyz, proj_dist)

        xyz = []
        tol = base_tol
        if not self._proj_only[phase.FEATURE_TYPE_ACCEPTOR]:
            xyz.append(base_xyz)
        else:
            frag_type = phase.FRAG_TYPE_A0
            tol = proj_tol
        xyz.append(proj_xyz)
        self._addSiteToHypo(frag_type, xyz, tol)

    def _addAromatic(self, feature_name, frag_type):
        """
        Adds an aromatic ring feature to the list of hypothesis sites.

        :param feature_name: The Catalyst aromatic ring feature name.
        :type feature_name: str

        :param frag_type: Static fragment type.
        :type frag_type: str
        """

        base_xyz, base_tol, proj_xyz1, _ = self._getCatBlobs(feature_name, 2)

        # Need two projected points that are 1.8 angstroms from centroid.
        self._shiftCatFeature(base_xyz, proj_xyz1, 1.8)
        proj_xyz2 = numpy.subtract(numpy.multiply(2.0, base_xyz), proj_xyz1)

        xyz = [base_xyz, proj_xyz1, list(proj_xyz2)]
        self._addSiteToHypo(frag_type, xyz, base_tol)

    def _addDonor(self, feature_name, frag_type):
        """
        Adds a donor feature to the list of hypothesis sites.

        :param feature_name: The Catalyst donor feature name.
        :type feature_name: str

        :param frag_type: Static fragment type.
        :type frag_type: str
        """

        base_xyz, base_tol, proj_xyz, proj_tol = self._getCatBlobs(
            feature_name, 2)

        # For heavy atom X, the Catalyst donor configuration is X----Q
        # (i.e., no hydrogen atom), where the X-Q distance is 3 angstroms.
        # Shift Q toward X so that the separation is 1 + the correct Phase
        # projected distance. Then shift X by 1 angstrom toward Q so that
        # it's located approximately where we'd expect the hydrogen to be
        # if it were bonded to a nitrogen or oxygen.
        proj_dist = self._proj_dist[phase.FEATURE_TYPE_DONOR]
        self._shiftCatFeature(base_xyz, proj_xyz, 1.0 + proj_dist)
        self._shiftCatFeature(proj_xyz, base_xyz, proj_dist)

        xyz = []
        tol = base_tol
        if not self._proj_only[phase.FEATURE_TYPE_DONOR]:
            xyz.append(base_xyz)
        else:
            frag_type = phase.FRAG_TYPE_D0
            tol = proj_tol
        xyz.append(proj_xyz)
        self._addSiteToHypo(frag_type, xyz, tol)

    def _addPoint(self, feature_name, frag_type):
        """
        Adds a hydrophobic, negative ionic, or positive ionic feature to the
        list of hypothesis sites.

        :param feature_name: The Catalyst feature name.
        :type feature_name: str

        :param frag_type: Static fragment type.
        :type frag_type: str
        """

        xyz, tol = self._getCatBlobs(feature_name, 1)
        self._addSiteToHypo(frag_type, [xyz], tol)

    def _addSiteToHypo(self, frag_type, xyz, tol):
        """
        Creates a static site and adds it to the list of hypothesis sites.

        :param frag_type: Static fragment type.
        :type frag_type: str

        :param xyz: Site coordinates. Includes base feature and projected
                point(s), if applicable.
        :type xyz: list(list(float))

        :param tol: Positional tolerance.
        :type tol: float
        """

        hypo_site = phase.createStaticSite(frag_type, xyz)
        hypo_site.setTol(tol)
        self._hypo_sites.append(hypo_site)

    def _addXvol(self):
        """
        Adds excluded volumes to hypothesis.
        """

        xvol = phase.PhpExclVol()
        for i, line in enumerate(self._fit_block):
            if "EXCLUDED_VOLUME" in line:
                # Coordinates and radius appear on the second and third lines
                # following this one.
                tokens = self._fit_block[i + 2].split(" ")
                xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)]
                tokens = self._fit_block[i + 3].split(" ")
                r = float(tokens[2]) * PICO_TO_ANGSTROM
                xvol.addSphere(xyz[0], xyz[1], xyz[2], r)

        if xvol.numSpheres():
            self._phase_hypo.addXvol(xvol)

    def _convertToPhase(self):
        """
        Peforms the Catalyst-->Phase hypothesis conversion.
        """

        # Used to call the appropriate feature-adding function based on the
        # Catalyst feature type:
        FEATURE_FUNCTION = {
            CAT_ACCEPTOR: (self._addAcceptor, phase.FRAG_TYPE_A1),
            CAT_DONOR: (self._addDonor, phase.FRAG_TYPE_D1),
            CAT_HYDROPHOBIC: (self._addPoint, phase.FRAG_TYPE_H),
            CAT_NEGATIVE: (self._addPoint, phase.FRAG_TYPE_N),
            CAT_POSITIVE: (self._addPoint, phase.FRAG_TYPE_P),
            CAT_AROMATIC: (self._addAromatic, phase.FRAG_TYPE_R2)
        }

        self._readFitBlock()
        # The first line in self._fit_block looks something like this:
        # FIT fit ( HB_ACCEPTOR1 HB_DONOR2 HB_DONOR3 HYDROPHOBIC5 )
        tokens = self._fit_block[0].split(" ")
        for token in tokens:
            # Process token if it starts with "HB_ACCEPTOR", "HB_DONOR", etc.
            for feature_type, feature_function in FEATURE_FUNCTION.items():
                if token.startswith(feature_type):
                    # Call the appropriate _add* function.
                    function_name = feature_function[0]
                    frag_type = feature_function[1]
                    function_name(token, frag_type)

        # Sort hypothesis sites by feature type and add site numbers.
        self._hypo_sites.sort(key=lambda x: x.getSiteType())
        for i, site in enumerate(self._hypo_sites):
            site.setSiteNumber(i)

        # Create a static feature hypothesis.
        self._phase_hypo = phase.PhpHypoAdaptor(self._hypo_id, self._hypo_sites,
                                                self._fd, mm.MMCT_INVALID_CT)

        # Build hypothesis attributes from sites so that tolerances are
        # bubbled up.
        self._phase_hypo.buildAllAttr(True)
        # No need to keep the rarely used radii attribute.
        self._phase_hypo.deleteAttr(phase.HYPO_ATTR_RAD)

        # Add excluded volumes.
        self._addXvol()

    def _getCatBlobs(self, feature_name, num_blobs):
        """
        Scans the fit block for the requested number of BLOB records with
        the indicated feature name. Returns a list [xyz_i, tol_i,...] for
        i=1,...,num_blobs, where xyz_i holds the Cartesian coordinates of the
        ith blob and tol_i is the matching tolerance for the ith blob. In the
        available CHM examples, all 2-point blobs (acceptor, donor, ring)
        consist of a base/centroid blob, followed by a projected/normal blob.

        :param feature_name: The Catalyst feature name.
        :type feature_name: str

        :param num_blobs: The number of BLOB records to look for. Should be
                1 for a single-point feature (H, N, P), and 2 for a vector
                feature (A, D, R).

        :return: list of xyz coordinates, matching tol for each blob.
        :rtype: list(list(float), float, etc.)
        """

        blobs = []
        num_blobs2 = num_blobs * 2
        feature_pattern = feature_name + "/"
        for i, line in enumerate(self._fit_block):
            if "BLOB" in line and feature_pattern in line:
                # Coordinates and tolerance appear on the second and
                # third lines following this one.
                tokens = self._fit_block[i + 2].split(" ")
                xyz = [float(tokens[j]) * PICO_TO_ANGSTROM for j in range(2, 5)]
                tokens = self._fit_block[i + 3].split(" ")
                tol = float(tokens[2]) * PICO_TO_ANGSTROM
                blobs.append(xyz)
                blobs.append(tol)
                if len(blobs) == num_blobs2:
                    break

        if len(blobs) != num_blobs2:
            mesg = "Could not find the requested number of blobs (%d) " + \
                   "for feature %s"
            raise RuntimeError(mesg % (num_blobs, feature_name))

        return blobs

    def _readFitBlock(self):
        """
        Reads the "FIT fit" block from the .chm file, which contains the
        hypothesis feature types, feature coordinates, and excluded volumes.
        """

        keep_it = False
        with open(self._chmfile, 'r') as fh:
            for line in fh.readlines():
                keep_it = keep_it or "FIT fit" in line
                if keep_it:
                    self._fit_block.append(line.strip())

        if not self._fit_block:
            mesg = f'File \"{self._chmfile}\" is not formatted correctly'
            raise RuntimeError(mesg)

    def _shiftCatFeature(self, xyz_fixed, xyz_move, dist):
        """
        Shifts xyz_move toward or away from xyz_fixed so that the distance
        between the two points is dist.

        :param xyz_fixed: Stationary Cartesian coordinates.
        :type xyz_fixed: list[float]

        :param xyz_move: Cartesian coordinates to shift.
        :type xyz_move: list[float]
        """

        axis = numpy.subtract(xyz_move, xyz_fixed)
        shift = numpy.multiply(axis, dist / numpy.linalg.norm(axis))
        xyz_new = numpy.add(xyz_fixed, shift)
        # Modify xyz_move in-place.
        for i in range(3):
            xyz_move[i] = xyz_new[i]