Source code for schrodinger.application.phase.packages.hypo_refine.option_utils

"""
Module with phase_hypo_refine option parsing and validation functionality.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import argparse
import csv
import os
import tempfile
import zipfile

from schrodinger.application.phase.packages import conformer_reader
from schrodinger.application.phase.packages import phase_utils
from schrodinger.application.phase.packages.hypo_refine import project_utils
from schrodinger.infra import phase
from schrodinger.utils import cmdline
from schrodinger.utils import fileutils

PHASE_HYPO_REFINE = "phase_hypo_refine"

DEFAULT_NUM_MISS = 1
DEFAULT_TOL_CHANGE = 1.0
DEFAULT_TOL_STEPS = 4
DEFAULT_XVOL_BUFFER = 2.0
DEFAULT_XVOL_SPACING = 2.0
DEFAULT_BEDROC_ALPHA1 = 160.9
DEFAULT_BEDROC_WEIGHT1 = 0.5
DEFAULT_BEDROC_ALPHA2 = 20.0
DEFAULT_BEDROC_WEIGHT2 = 0.5

# Types of files that may be supplied as <actives> or <decoys>:
LEGAL_ACTIVE_DECOY_FORMATS = [
    phase.PhpFileFormat_PHP_FORMAT_MAE, phase.PhpFileFormat_PHP_FORMAT_SD,
    phase.PhpFileFormat_PHP_FORMAT_PHZIP
]


[docs]def add_project_creation_options(parser): """ Adds project creation options to the provided parser. :param parser: Argument parser object. :type parser: argparser.ArgumentParser """ project_options = parser.add_argument_group( title="Project Creation Options", description="These options control the creation of multi-conformer " "Phase projects from\nthe actives/decoys if those compounds are " "provided in Maestro or SD files.") conformer_reader.add_standard_confgen_options(project_options, False) project_options.add_argument( "-save_projects", action="store_true", help="Return a Zip archive of each created project. For example, if " "the active structures are provided in the file actives.maegz, the " "zipped project actives.phzip would be returned. This option is " "recommended if subsequent jobs are to be run with the same actives " "and decoys.") project_options.add_argument( "-exit", action="store_true", help="Exit after creating projects. Valid only with -save_projects.")
[docs]def add_refinement_options(parser): """ Adds hypothesis refinement options to the provided parser. :param parser: Argument parser object. :type parser: argparser.ArgumentParser """ refinement_options = parser.add_argument_group( title="Refinement Options", description="These options provide control over modifications made to " "the geometry of the\nhypothesis, its matching rules, matching " "tolerances and excluded volumes. All\nrefinements are designed to " "increase w1*BEDROC1 + w2*BEDROC2, where the two\nBEDROC scores " "correspond to different early recognition parameters, alpha1\nand " "alpha2, respectively.") refinement_options.add_argument( "-move", type=int, metavar="<maxcycle>", choices=[phase_utils.RestrictedRange(0, None, True)], default=phase.DEFAULT_MAX_CYCLES, help="Maximum number of cycles in optimization that adjusts positions " "and orientations of hypothesis features to improve BEDROC scores. " "This requires replacement of the reference ligand with a set of " "fragments that can be moved independently. Since the fragment-based " "reference ligand fails to accurately represent the overall shape of " "the original reference ligand, the volume score weight is set to 0 " "for the entire refinement process, and when performing any subsequent " "screens with the hypothesis. Set <maxcycle> to 0 to disable movement " "of features (default: {}).".format(phase.DEFAULT_MAX_CYCLES)) refinement_options.add_argument( "-frag", action="store_true", help="Convert reference ligand to fragments even if movement of " "features is disabled. This allows a fully consistent comparison of " "refinement results obtained with and without movement of features.") refinement_options.add_argument( "-miss", type=int, metavar="<m>", choices=[phase_utils.RestrictedRange(0, None, True)], default=DEFAULT_NUM_MISS, help="Maximum number of hypothesis features for which matching is " "optional. This stage of refinement explores whether BEDROC scores " "can be improved by allowing certain features to be missed. Set to 0 " "to disable (default: {}).".format(DEFAULT_NUM_MISS)) refinement_options.add_argument( "-ex", action="store_true", help="Exhaustively enumerate all partial matches when optional " "matching is in effect. This may or may not ultimately yield higher " "BEDROC scores, but it does improve the effectiveness of subsequently " "added excluded volumes by preventing the promotion of partially " "matched decoys which were not found prior to adding excluded volumes.") refinement_options.add_argument( "-tol", type=float, metavar="<change>", choices=[phase_utils.RestrictedRange(0.0, None, True)], default=DEFAULT_TOL_CHANGE, help="Maximum amount, up or down, to adjust matching tolerances. Set " "to 0 to disable (default: {}).".format(DEFAULT_TOL_CHANGE)) refinement_options.add_argument( "-steps", type=int, metavar="<n>", choices=[phase_utils.RestrictedRange(0, None, True)], default=DEFAULT_TOL_STEPS, help="Number of steps over which to apply maximum tolerance change. " "For example, if the maximum tolerance change is 1.0 and the number " "of steps is 4, each step will involve a tolerance adjustment of 0.25. " "(default: {}).".format(DEFAULT_TOL_STEPS)) refinement_options.add_argument( "-xvol", type=float, metavar="<dbuff>", choices=[phase_utils.RestrictedRange(0, None, True)], default=DEFAULT_XVOL_BUFFER, help="Use hits from the highest scoring hypothesis to identify " "locations for the placement of excluded volume spheres which clash " "with one or more decoys but not with any actives. A buffer distance " "of <dbuff> is enforced between the excluded volumes surface and the " "van der Waals surface of each active. Set to 0 to disable. " "(default: {}).".format(DEFAULT_XVOL_BUFFER)) refinement_options.add_argument( "-grid", type=float, metavar="<spacing>", choices=[phase_utils.RestrictedRange(0, None, False)], default=DEFAULT_XVOL_SPACING, help="The spacing between adjacent excluded volume spheres and the " "radii of the spheres. (default: {}).".format(DEFAULT_XVOL_SPACING)) refinement_options.add_argument( "-a1", type=float, metavar="<alpha1>", choices=[phase_utils.RestrictedRange(0.0, None, False)], default=DEFAULT_BEDROC_ALPHA1, help="BEDROC early recognition parameter 1 " "(default: {}).".format(DEFAULT_BEDROC_ALPHA1)) refinement_options.add_argument( "-w1", type=float, metavar="<weight1>", choices=[phase_utils.RestrictedRange(0.0, None, True)], default=DEFAULT_BEDROC_WEIGHT1, help="Weighting factor for BEDROC scores calculated using <alpha1> " "(default: {}).".format(DEFAULT_BEDROC_WEIGHT1)) refinement_options.add_argument( "-a2", type=float, metavar="<alpha2>", choices=[phase_utils.RestrictedRange(0.0, None, False)], default=DEFAULT_BEDROC_ALPHA2, help="BEDROC early recognition parameter 2 " "(default: {}).".format(DEFAULT_BEDROC_ALPHA2)) refinement_options.add_argument( "-w2", type=float, metavar="<weight2>", choices=[phase_utils.RestrictedRange(0.0, None, True)], default=DEFAULT_BEDROC_WEIGHT2, help="Weighting factor for BEDROC scores calculated using <alpha2> " "(default: {}).".format(DEFAULT_BEDROC_WEIGHT2)) refinement_options.add_argument( "-testonly", action="store_true", help="Run a single screen against <actives> and <decoys> using " "the supplied hypothesis and report the weighted BEDROC score. This " "option may be used to validate a refined hypothesis if -valid wasn't " "used in the original job. May be combined with -frag in case the " "supplied hypothesis has not been refined, but you wish to simulate " "the baseline conditions of a refinement where movement is enabled.")
[docs]def get_parser(): """ Creates argparse.ArgumentParser with supported command line options. :return: Argument parser object :rtype: argparser.ArgumentParser """ parser = argparse.ArgumentParser( prog=PHASE_HYPO_REFINE, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("hypo", metavar="<hypo>.phypo", help="The pharmacophore hypothesis to be refined.") parser.add_argument( "actives", metavar="<actives>", help="Maestro file, SD file, or zipped Phase project (.phzip) " "containing the actives. If zipped project, all ligands in the project " "will be used, and the project feature definitions must be identical " "to those of the hypothesis.") parser.add_argument( "decoys", metavar="<decoys>", help= "Maestro file, SD file, or zipped Phase project containing the decoys. " "The same caveats noted for <actives> apply if zipped project.") parser.add_argument( "-o", metavar="<out>.phypo", help="Output file for refined hypothesis (default: <hypo>-out.phypo).") parser.add_argument( "-valid", metavar="<actives2>,<decoys2>", help="Validate refined hypothesis against actives and decoys that " "differ from those employed in the refinement. Use of this option is " "strongly recommended. See -testonly for an alternate means of " "validating.") add_refinement_options(parser) add_project_creation_options(parser) jobcontrol_options = [cmdline.HOST, cmdline.JOBNAME, cmdline.TMPDIR] cmdline.add_jobcontrol_options(parser, options=jobcontrol_options) # Hidden options for subjobs that do BEDROC screens: parser.add_argument("-subjob", help=argparse.SUPPRESS) parser.add_argument("-mask_screen", action="store_true", help=argparse.SUPPRESS) parser.add_argument("-tol_screen", action="store_true", help=argparse.SUPPRESS) return parser
[docs]def validate_actives_decoys(args): """ Checks the validity of <actives>, <decoys>, and, if applicable, <actives2> and <decoys2>. :param args: argparser.Namespace with command line arguments :type args: argparser.Namespace :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ to_screen = {"<actives>": args.actives, "<decoys>": args.decoys} valid = args.valid if valid is not None: tokens = next(csv.reader([valid])) if len(tokens) != 2: mesg = "Invalid <actives2>,<decoys2> syntax: \"%s\"" % valid return False, mesg to_screen["<actives2>"] = tokens[0] to_screen["<decoys2>"] = tokens[1] # Ensure that file formats are legal and base names are unique. Base names # have to be unique so that we don't get collisions when creating projects, # (e.g., /path/to/actives.maegz --> actives.phzip), not to mention the fact # that someone might be trying to supply the same file twice. base_names = set() for key, value in to_screen.items(): file_format = phase.get_phase_file_format(value) if file_format not in LEGAL_ACTIVE_DECOY_FORMATS: mesg = "Illegal %s file format: \"%s\"" % (key, value) return False, mesg base_name = fileutils.get_basename(value) if base_name in base_names: mesg = "All active/decoy files must have unique base names" return False, mesg base_names.add(base_name) # Still need proper paths for <actives2>,<decoys2>. file_path = phase_utils.get_proper_path(value) if not os.path.isfile(file_path): mesg = "%s file \"%s\" not found" % (key, file_path) return False, mesg if file_format == phase.PhpFileFormat_PHP_FORMAT_PHZIP: sites_ok, mesg = validate_project_sites(file_path) if not sites_ok: return False, mesg fd_ok, mesg = validate_project_fd(file_path, args.hypo) if not fd_ok: return False, mesg return True, ""
[docs]def validate_args(args): """ Checks the validity of command line arguments. :param args: argparser.Namespace with command line arguments :type args: argparser.Namespace :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ hypo_ok, mesg = validate_hypo(args) if not hypo_ok: return False, mesg hypo_out = args.o if hypo_out is not None: if not fileutils.is_hypothesis_file(hypo_out): mesg = "Illegal output hypothesis file name: \"%s\"" % hypo_out return False, mesg actives_decoys_ok, mesg = validate_actives_decoys(args) if not actives_decoys_ok: return False, mesg refine_options_ok, mesg = validate_refinement_options(args) if not refine_options_ok: return False, mesg proj_options_ok, mesg = validate_project_creation_options(args) if not proj_options_ok: return False, mesg return True, ""
[docs]def validate_hypo(args): """ Checks the validity of hypothesis. :param args: argparser.Namespace with command line arguments :type args: argparser.Namespace :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ if not fileutils.is_hypothesis_file(args.hypo): return False, "Illegal hypothesis file name: \"%s\"" % args.hypo if not os.path.isfile(args.hypo): return False, "Hypothesis file \"%s\" not found" % args.hypo hypo = phase.PhpHypoAdaptor(args.hypo) if hypo.getSiteCount() < 3: return False, "Hypothesis must have at least 3 features" if args.miss: site_count = hypo.getSiteCount() # Don't require the user to specify -miss 0 simply to avoid failing on # a 3-point hypothesis. if site_count > 3: max_miss = site_count - 3 if args.miss > max_miss: mesg = "Number of missed features cannot exceed %d" % max_miss return False, mesg if args.tol: min_tol = phase.PHASE_DEFAULT_TOL if hypo.hasTol(): min_tol = args.tol + 1.0 for site in hypo.getHypoSites(): min_tol = min(min_tol, site.getTol()) if args.tol >= min_tol: mesg = "Maximum tolerance change must be less than %.2f" % min_tol return False, mesg return True, ""
[docs]def validate_project_creation_options(args): """ Checks the validity of project creation options. :param args: argparser.Namespace with command line arguments :type args: argparser.Namespace :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ nddo_ok, mesg = conformer_reader.validate_confgen_nddo(args) if not nddo_ok: return False, mesg if args.exit and not args.save_projects: return False, "-exit is allowed only with -save_projects" return True, ""
[docs]def validate_project_fd(project_path, hypo_path): """ Validates that the feature definitions in the provided zipped project are equivalent to those in the provided hypothesis. :param project_path: Path to zipped project file :type project_path: str :param hypo_path: Path to hypothesis file :type hypo_path: str :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ fd_hypo = phase.PhpHypoAdaptor(hypo_path).getFd() fd_file_base = phase.getDefaultFdFileBaseName() fd_proj_zip, fd_proj_disk = project_utils.get_project_file_names( project_path, fd_file_base) with tempfile.TemporaryDirectory() as tmpdir_name: with zipfile.ZipFile(project_path, 'r') as zfile: zfile.extract(fd_proj_zip, tmpdir_name) fd_proj_tmpdir = os.path.join(tmpdir_name, fd_proj_disk) project = phase.PhpProject() fd_proj = project.getFeatureDef(fd_proj_tmpdir) if fd_proj != fd_hypo: mesg = "Hypothesis feature definitions differ from those in " + \ "project \"%s\"" % project_path return False, mesg return True, ""
[docs]def validate_project_sites(project_path): """ Validates that the provided zipped project contains pharmacophores sites. :param project_path: Path to zipped project file :type project_path: str :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ settings_zip, _settings_disk = project_utils.get_project_file_names( project_path, "Settings.dat") with zipfile.ZipFile(project_path, 'r') as zfile: with zfile.open(settings_zip, 'r') as settings_file: settings_string = str(settings_file.read()) if "step import" in settings_string: mesg = "Project \"{}\" contains no pharmacophore sites" return False, mesg.format(project_path) return True, ""
[docs]def validate_refinement_options(args): """ Checks the validity of hypothesis refinement options. :param args: argparser.Namespace with command line arguments :type args: argparser.Namespace :return: tuple of validity and non-empty error message if not valid :rtype: bool, str """ if args.w1 + args.w2 == 0: return False, "Weights for BEDROC1 and BEDROC2 cannot both be 0" return True, ""