"""
Module for converting MOE `*.ph4` hypotheses into Schrodinger Phase `*.phypo`
format.
Copyright Schrodinger LLC, All Rights Reserved.
"""
import os
import re
from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
import numpy
from schrodinger.application.phase import constants
from schrodinger.application.phase import hypothesis
from schrodinger.infra import phase
from schrodinger.utils import cmdline
from schrodinger.utils import log
# Logging
logger = log.get_output_logger(__file__)
# Valid mappings from MOE pharmacophore annotations to Phase feature types:
# [Atom]
# Don H-bond Donor -- D Donor
# Acc H-bond Acceptor -- A Acceptor
# Cat Cation -- P Positive
# Ani Anion -- N Negative
# ML Metal Ligator -- ?
# HydA Hydrophobic Atom -- H Hydrophobe
# [Centroid]
# Aro Aromatic -- R Ring
# PiR pi-Ring -- R Ring
# Hyd Hydrophobic -- H Hydrophobe
# [Bioisostere]
# CN2 NCN+ -- P Positive
# O2 COO- -- N Negative
MOE_PHASE_MAPPING = {
"Don": constants.FEATURE_D,
"Acc": constants.FEATURE_A,
"Cat": constants.FEATURE_P,
"Ani": constants.FEATURE_N,
"HydA": constants.FEATURE_H,
"Aro": constants.FEATURE_R,
"PiR": constants.FEATURE_R,
"Hyd": constants.FEATURE_H,
"CN2": constants.FEATURE_P,
"O2": constants.FEATURE_N
}
# [Projected]
# Don2 Projected Donor -- Q Projected Donor
# Acc2 Projected Acceptor -- Q Projected Acceptor
# ML2 Proj. Metal Ligator -- ?
# PiN Ring Normal -- Q Projected Ring
MOE_PROJ_PAIRS = {
"Don2": constants.FEATURE_D,
"Acc2": constants.FEATURE_A,
"PiN": constants.FEATURE_R
}
# Permit must be "H" followed by remaining feature types
H_ALL_PERMITTED = constants.FEATURE_H + "".join(
constants.FEATURE_TYPES).replace(constants.FEATURE_H, "")
[docs]class PhpMOEConverter(object):
[docs] def createPhaseHypothesis(self, ph4_file):
"""
Reads a MOE pharmacophore file, creating a Phase hypothesis
from relevant information.
:param ph4_file: path to MOE .ph4 file to read
:type ph4_file: str
:return: tuple containing the Phase hypothesis adapted from the MOE
input file, and a string with any conversion warnings
:rtype: (`PhpHypoAdaptor`, str)
"""
# Extract pharmacophore features from MOE ph4 file
feature_data = self._readMOEFile(ph4_file)
sites, warning_msg = self._extractPhaseSites(feature_data)
if not sites:
return None, warning_msg
# Derive hypoID from MOE ph4 filename
hypoID = os.path.splitext(os.path.basename(ph4_file))[0]
phase_hypo = hypothesis.PhaseHypothesisStatic(hypoID, sites)
return phase_hypo, warning_msg
def _readMOEFile(self, ph4_file):
"""
Reads a MOE pharmacophore file, returning feature data as list of
pharmacophore site data organized by header indexed dictionaries.
:param ph4_file: path to MOE .ph4 file to read
:type ph4_file: str
:return: list of feature data dictionaries
:rtype: list of dict
"""
feature_data = []
# Read the *.ph4 file
with open(ph4_file, "r") as fh:
for line in fh:
if line.startswith("#moe:ph4que"):
continue
if line.startswith("#pharmacophore"):
continue
if line.startswith("#feature"):
entries = line.split()
count = entries[1]
headers = list(zip(entries[2::2], entries[3::2]))
# Grab all feature data
line = next(fh)
raw_data = []
while not line.startswith("#"):
raw_data += line.split()
line = next(fh)
feature_data = self._formInputData(count, headers, raw_data)
if line.startswith("#constraint"):
continue
if line.startswith("#endpharmacophore"):
break
return feature_data
def _formInputData(self, count, headers, input_list):
"""
Creates a dictionary from MOE ph4 data, supplied as a number of entries,
headers with header types, and a list of all input values in order.
:param count: number of entries to create
:type count: int
:param headers: (header, header type) pairs
:type headers: list of tuples
:param input_list: list of all inputs in a given section of MOE file
:type input_list: list of str
:return: list of dictionaries containing cast data indexed by header
:rtype: list of dict
"""
data = []
data_iter = iter(input_list)
for i in range(int(count)):
data_dict = {}
for title, title_type in headers:
value = next(data_iter)
data_dict[title] = self._castMOEType(value, title_type)
data.append(data_dict)
return data
def _castMOEType(self, value, MOE_type):
"""
Casts raw MOE pharmacophore strings.
:param value: value read from MOE text
:type value: str
:param MOE_type: value type (tt, t, r, i, ix)
:type MOE_type: str
:return: value cast to corresponding type (str, int, float, hex)
"""
if MOE_type == "tt" or MOE_type == "t":
return str(value)
elif MOE_type == "r":
return float(value)
elif MOE_type == "i":
return int(value)
elif MOE_type == "ix":
return int(value, 16)
else:
logger.error("Unknown MOE value type: %s" % MOE_type)
return None
def _extractPhaseSites(self, feature_data):
"""
Extracts feature data from raw data dictionary read from MOE file into
PhpSites suitable for populating a Phase hypothesis.
:param feature_data: list of MOE data dictionaries
:type feature_data: list of dict
:return: list of Phase sites, warning message
:rtype: (list of `PhpSite`, str)
"""
sites = []
warnings = []
for data in feature_data:
moe_expr = re.split(r"\W+", data["expr"])
# Each MOE feature has coordinates and matching tolerance
site = phase.PhpSite()
site.setCoordinates(data["x"], data["y"], data["z"])
site.setTol(data["r"])
# Create feature string for logging
feature_str = "%s (%.2f %.2f %.2f)" % (
(data["expr"], data["x"], data["y"], data["z"]))
# TODO: For now, just use the first type up to operators;
# Later will need to determine way to treat expr values with
# !|& operators and translate to allowed feature matching
moe_type = moe_expr[0]
if len(moe_expr) > 1:
msg = "Treating as %s" % moe_type
warnings.append("%s: %s" % (feature_str, msg))
# Standard pharmacophore feature
if moe_type in MOE_PHASE_MAPPING:
site.setSiteType(MOE_PHASE_MAPPING[moe_type])
sites.append(site)
# Projected points
elif moe_type in MOE_PROJ_PAIRS:
# If sensible, append a Q site to the previous site
base_site = sites[-1] if sites else phase.PhpSite()
if base_site.getSiteType() == MOE_PROJ_PAIRS[moe_type]:
# Only append this Q site's coordinates to the previous
# site's projected coordinates
current_proj_coords = list(base_site.getProjCoords())
Q_coords = site.getCoordinates()
base_site.setProjCoords(current_proj_coords + [Q_coords])
continue
# Otherwise, treat as a projected only site
site.setSiteType(MOE_PROJ_PAIRS[moe_type])
site.setProjectedOnly(True)
sites.append(site)
# Allow all feature types, but map as Hydrophobic
elif moe_type == "Any":
site.setSiteType(constants.FEATURE_H)
site.setPermitted(H_ALL_PERMITTED)
sites.append(site)
else:
msg = "Unknown MOE feature type"
warnings.append("%s: %s" % (feature_str, msg))
# Projected point check
sites, proj_coord_warnings = self._checkSiteProjectedPoints(sites)
warnings.extend(proj_coord_warnings)
# Prepend message to warnings
if not sites:
warnings.insert(0, "No compatible sites for Phase hypothesis --")
elif warnings:
warnings.insert(0, "Incompatible sites ignored --")
return sites, "\n".join(warnings)
def _checkSiteProjectedPoints(self, sites):
"""
Checks that sites have appropriate projected points, updating the sites
if possible.
:param sites: Phase sites to check static fragment projected coords
:type sites: list of `PhpSite`
:return: updated Phase sites, any update warnings
:rtype: (list of `PhpSite`, list of str)
"""
warnings = []
for site in sites:
if site.getSiteType() == phase.FEATURE_TYPE_AROMATIC:
# Convert to H if no projected coordinates
if len(site.getProjCoords()) == 0:
site.setSiteType(phase.FEATURE_TYPE_HYDROPHOBIC)
msg = "No projected vectors; converting to hydrophobic site"
x, y, z = site.getCoordinates()
warnings.append("Aro (%.2f %.2f %.2f): %s" % (x, y, z, msg))
# Add second normal in opposite direction
if len(site.getProjCoords()) == 1:
center_xyz = site.getCoordinates()
Q1_xyz = site.getProjCoords()[0]
Q2_xyz = 2 * numpy.array(center_xyz) - numpy.array(Q1_xyz)
site.setProjCoords([Q1_xyz, Q2_xyz])
return sites, warnings
[docs]def convert_MOE_hypothesis_to_ct(ph4_filename):
"""
Creates a Phase hypothesis from a MOE hypothesis file, returning the
hypothesis ct and any conversion warnings.
:param ph4_filename: MOE hypothesis filename
:type ph4_filename: str
:return: tuple of Phase hypothesis ct and conversion warning message
:rtype: (int, str)
"""
if not os.path.isfile(ph4_filename):
return None, "File not found: %s" % ph4_filename
moe_converter = PhpMOEConverter()
phase_hypo, warnings = moe_converter.createPhaseHypothesis(ph4_filename)
# Return hypothesis ct is successful, otherwise return None
hypo_ct = phase_hypo.getHypoCt() if phase_hypo else None
return hypo_ct, warnings
# =============================================================================
# Command line
# =============================================================================
[docs]def get_parser():
"""
Adds supported arguments to a parser object `argparser.ArgumentParser`.
:return: The configured argument parser object
:rtype: `argparser.ArgumentParser`
"""
parser = ArgumentParser(
description=__doc__,
usage="$SCHRODINGER/run phase_convert_MOE.py <ph4_file>",
formatter_class=RawDescriptionHelpFormatter)
# Add arguments
parser.add_argument("ph4_file",
metavar="<ph4_file>",
help="MOE style pharmacophore file (.ph4 extension)")
return parser
[docs]def check_parser_args(args):
"""
Validates the user specified arguments.
:param parser: Named tuple of the user-specified command line options.
:type parser: Named tuple
:return: tuple of validation success, and error message
:rtype: bool, str
"""
if not os.path.isfile(args.ph4_file) or not args.ph4_file.endswith(".ph4"):
return False, "Must provide valid MOE .ph4 file"
return True, ""
[docs]def main():
"""
Parses user options and runs driver.
"""
# Create the top-level parser
parser = get_parser()
args = parser.parse_args()
# Validate arguments
validated, msg = check_parser_args(args)
if not validated:
parser.error(msg)
# Set logging level
logger.setLevel(log.logging.INFO)
# Convert MOE hypothesis to Phase
logger.info("Converting MOE hypothesis: %s" % args.ph4_file)
moe_converter = PhpMOEConverter()
phase_hypo, warnings = moe_converter.createPhaseHypothesis(args.ph4_file)
if warnings:
logger.warning(warnings + "\n")
# If the conversion was not successful, log error
if not phase_hypo:
logger.error("Unable to convert MOE hypothesis to Phase format.")
return
# Write the hypothesis to disk
hypo_outfile = phase_hypo.getHypoID() + phase.PHASE_HYPO_FILE_EXT
phase_hypo.save(hypo_outfile, True)
logger.info("Phase hypothesis written: %s" % hypo_outfile)
logger.info("Hypothesis conversion complete.")
if __name__ == "__main__":
cmdline.main_wrapper(main)