Source code for schrodinger.application.phase.packages.phase_screen_utils

"""
Module with common functionality for Phase screening backends.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import os

import schrodinger.utils.subprocess as subprocess
from schrodinger.infra import phase
from schrodinger.job import jobcontrol
from schrodinger.utils import fileutils

from . import phase_utils


[docs]def add_database_options(parser): """ Adds database screening options to the provided parser and returns the argument group object that holds those options. :param parser: Argument parser object :type parser: argparser.ArgumentParser :return: Argument group object :rtype: argparse._ArgumentGroup """ db_options = parser.add_argument_group(title="Database Screening Options") db_options.add_argument( "-isub", metavar="<subset>", help="Screen only the subset of records in the file <subset>_phase.inp. " "Not valid when screening multiple databases.") return db_options
[docs]def add_reporting_options(parser, sort_prop): """ Adds reporting options to the provided parser and returns the argument group object that holds those options. :param parser: Argument parser object :type parser: argparser.ArgumentParser :param sort_prop: Sort property name to display in help message :type sort_prop: str :return: Argument group object :rtype: argparse._ArgumentGroup """ reporting_options = parser.add_argument_group(title="Reporting Options") sort_keep = reporting_options.add_mutually_exclusive_group(required=False) sort_keep.add_argument( "-nosort", action="store_true", help="Output hits in the order they are screened. The default is to sort " "hits by decreasing %s." % sort_prop) sort_keep.add_argument( "-keep", type=int, metavar="<maxhits>", choices=[phase_utils.RestrictedRange(0, None, False)], help="Cap sorted hits at <maxhits> (default: %d)." % phase.PHASE_DEFAULT_MAX_HITS) reporting_options.add_argument("-osd", action="store_true", help="Output hits in SD format.") return reporting_options
[docs]def combine_hit_files(hit_files_in, hit_file_out, max_hits=None, sort_prop=None): """ Combines a list of hit files. :param hit_files_in: Hit files to combine (Maestro format) :type hit_files_in: list(str) :param hit_file_out: Destination hit file (Maestro or SD format) :param max_hits: Maximum number of sorted hits, or None if not sorting :type max_ints: int :sort_prop: CT-level property for decreasing sort, or None if not sorting :type: sort_prop: str """ out_file_type = phase.get_phase_file_format(hit_file_out) if out_file_type == phase.PhpFileFormat_PHP_FORMAT_SD: base_file, ext = fileutils.splitext(hit_file_out) hit_file_out_mae = base_file + ".maegz" osd = True else: hit_file_out_mae = hit_file_out osd = False # Note that fileutils.cat chokes on a missing hit file, which can happen # if a -nosort subjob produces no hits. hit_files_found = [] for hit_file in hit_files_in: if os.path.isfile(hit_file): hit_files_found.append(hit_file) if not max_hits: fileutils.cat(hit_files_found, hit_file_out_mae) else: run_glide_sort(hit_files_found, hit_file_out_mae, max_hits, sort_prop) if osd: phase_utils.convert_to_sd(hit_file_out_mae, hit_file_out)
[docs]def get_max_hits(args): """ Returns the maximum number of hits to keep. :param args: argparser.Namespace with command line options :type args: argparser.Namespace :return: Maximum number of hits :rtype: int """ if not args.keep: return phase.PHASE_DEFAULT_MAX_HITS return args.keep
[docs]def get_subset_file(args): """ Returns the name of the input subset file if -isub was specified on the the command line. The leading path to the subset file is not modified. Returns an empty string if -isub was not specified. :param args: Command line arguments :type args: argparse.Namespace :return: Subset file name or empty string :rtype: str """ if args.isub: ext = phase.PHASE_SUBSET_EXT if args.isub.endswith(ext): ext = "" return args.isub + ext return ""
[docs]def run_glide_sort(hit_files, hit_file_out, max_hits, sort_prop): """ Invokes glide_sort to sort subjob hit_files by decreasing group fitness. :param hit_files: Subjob hit files in Maestro format :type hit_files: list[str] :param hit_file_out: Output hit file (`*.maegz`) :type hit_file_out: str :param max_hits: Number of hits to keep :type max_hits: int :param sort_prop: CT-level property for decreasing sort :type sort_prop: str """ schrodinger = os.environ["SCHRODINGER"] glide_sort = os.path.join(schrodinger, "utilities", "glide_sort") base_file, ext = fileutils.splitext(hit_file_out) list_file = base_file + ".list" phase_utils.write_list_to_file(list_file, hit_files) command = [ glide_sort, "-o", hit_file_out, "-f", list_file, "-use_prop_d", sort_prop, "-norecep", "-n", str(max_hits), "-nofilter", "-allow_empty_output" ] # TODO: Use subprocess.getstatusoutput when we switch over to Python 3. rc = subprocess.call(command) if rc: raise OSError("Command failed: %s" % " ".join(command))
[docs]def validate_source_dbs(source_dbs): """ Checks the validity of Phase databases to be screened. Existence is checked only if the current process is running under job control. :param source_dbs: Phase database names :type source_dbs: list(str) :return: tuple of validity and error message if not valid :rtype: bool, str """ isjob = jobcontrol.under_job_control() for db in source_dbs: if not os.path.isabs(db): return False, "Database path \"%s\" is not absolute" % db if isjob: if not os.path.isdir(db): return False, "Database \"%s\" not found on job host" % db if not phase.is_phase_database(db): return False, "\"%s\" is not a valid Phase database" % db return True, ""
[docs]def validate_subset(args): """ Checks for the existence of the input subset file, where we allow it to be missing at startup time only if it's specified using an absolute path. :param args: argparser.Namespace with command line options :type args: argparser.Namespace :return: tuple of validity and error message if required file is missing :rtype: bool, str """ subset_file = get_subset_file(args) if subset_file: subset_file = phase_utils.get_proper_path(subset_file) must_exist = True if not jobcontrol.under_job_control() and os.path.isabs(subset_file): must_exist = False if must_exist and not os.path.isfile(subset_file): return False, "Missing subset file: \"%s\"" % subset_file return True, ""