Source code for schrodinger.application.phase.packages.hypo_refine.screen_utils

"""
Module with phase_hypo_refine screening functionality.

Copyright Schrodinger LLC, All Rights Reserved.
"""

import os
import time

import schrodinger.utils.subprocess as subprocess
from schrodinger import structure
from schrodinger.application.phase.packages import bedroc_screener
from schrodinger.application.phase.packages import phase_utils
from schrodinger.application.phase.packages.hypo_refine import hypo_utils
from schrodinger.application.phase.packages.hypo_refine import job_utils
from schrodinger.application.phase.packages.hypo_refine import option_utils
from schrodinger.application.phase.packages.hypo_refine import project_utils
from schrodinger.infra import phase
from schrodinger.utils import fileutils
from schrodinger.utils import log

logger = log.get_output_logger(__file__)

XVOL_CLASH = os.path.join(os.environ["SCHRODINGER"], "utilities",
                          "create_xvolClash")


[docs]def compute_score(args, screener):
    """
    Computes the weighted BEDROC score from the provded BedrocScreener

    :param args: Command line arguments
    :type args: argparse.Namespace

    :param screener: BedrocScreener for which a screen has been run
    :type screener: bedroc_screener.BedrocScreener

    :return: Weighted BEDROC score
    :rtype: float
    """

    bedroc1 = screener.calcBEDROC(args.a1)
    bedroc2 = screener.calcBEDROC(args.a2)
    return args.w1 * bedroc1 + args.w2 * bedroc2


[docs]def compute_tol_search_direct(tol_probe_hypo_files):
    """
    Returns a pseudo-gradient search direction based on changes in weighted
    BEDROC score from +/- changes to positional tolerances.

    :param tol_probe_hypo_files: Hypotheses with +/- changes in each tolerance
    :type tol_probe_hypo_files: list(str)

    :return: Tolerance search direction
    :rtype: list(float)
    """

    direct = []
    for file1, file2 in zip(*[iter(tol_probe_hypo_files)] * 2):
        # file1 will be 0, 2, 4, etc.; file2 will be 1, 3, 5, etc.
        hypo1 = phase.PhpHypoAdaptor(file1)
        score1 = float(hypo1.getProp(phase.PHASE_WEIGHTED_BEDROC))
        hypo2 = phase.PhpHypoAdaptor(file2)
        score2 = float(hypo2.getProp(phase.PHASE_WEIGHTED_BEDROC))
        d = 0.0
        if score1 > score2:
            d = 1.0
        elif score2 > score1:
            d = -1.0
        direct.append(d)

    return direct


[docs]def refine_hypo_sites(args):
    """
    Adjust positions/orientations of hypothesis sites to improve fitness
    scores of actives relative to decoys.

    :param args: Command line arguments
    :type args: argparse.Namespace

    :return: Adjusted hypothesis
    :rtype: phase.PhpHypoAdaptor
    """

    hypo = phase.PhpHypoAdaptor(args.hypo)
    optimizer_settings = phase.PhpHypoOptimizerSettings()
    optimizer_settings.max_cycles = args.move
    optimizer_settings.verbose = True
    if hypo.hasRules() and hypo.getRules().hasMixedFeatures():
        optimizer_settings.vector_weight = 0.0
    jobname = phase_utils.get_jobname(args, args.hypo)
    hit_file = jobname + "-hits.maegz"
    active_titles = project_utils.get_ligand_titles(args.actives)
    optimizer = phase.PhpHypoOptimizer(hypo, hit_file, active_titles,
                                       optimizer_settings)
    hypo_opt = optimizer.getHypoOpt()
    # Restore original attributes from extended sites data.
    if hypo.hasRad() or hypo.hasRules() or hypo.hasTol():
        attr_pairs = [(hypo.hasRad(), "rad"), (hypo.hasRules(), "rules"),
                      (hypo.hasTol(), "tol")]
        hypo_opt.buildAllAttr(True)
        for has_attr, attr_name in attr_pairs:
            if not has_attr:
                hypo_opt.deleteAttr(attr_name)
    # Need to copy baseline BEDROC score from incoming hypothesis because
    # properties are not preserved during optimization.
    baseline_score = float(hypo.getProp(phase.PHASE_WEIGHTED_BEDROC_BASELINE))
    hypo_opt.addProp(phase.PHASE_WEIGHTED_BEDROC_BASELINE, baseline_score)
    return hypo_opt


[docs]def refine_hypo_xvol(args):
    """
    Runs an active/decoy screen with the current hypothesis, adds excluded
    volumes that clash only with the decoys, reruns the active/decoy screen,
    and saves the higher scoring hypothesis to args.hypo. Note that excluded
    volumes will not necessarily yield a higher score if exhaustive matching
    isn't used throughout the refinement procedure. Returns the weighted BEDROC
    score obtained with the excluded volumes (even if lower) and the elapsed
    time. If excluded volumes could not be added because no decoys were matched,
    the returned score will be None.

    :param args: Command line arguments
    :type args: argparse.Namespace

    :return: tuple of xvol weighted BEDROC score and elapsed time
    :rtype: float, float
    """

    t1 = time.time()
    jobname = phase_utils.get_jobname(args, args.hypo)
    hit_file = jobname + "-hits.maegz"
    score_orig = run_bedroc_screen(args,
                                   args.hypo,
                                   args.actives,
                                   args.decoys,
                                   hit_file=hit_file,
                                   report_time=False)

    active_titles = set(
        project_utils.get_ligand_titles(args.actives, unique=True))
    active_hit_file = jobname + "_active-hits.maegz"
    decoy_hit_file = jobname + "_decoy-hits.maegz"

    decoy_count = 0
    with structure.StructureReader(hit_file) as reader, \
         structure.StructureWriter(active_hit_file) as active_writer, \
         structure.StructureWriter(decoy_hit_file) as decoy_writer:
        for st in reader:
            if st.title in active_titles:
                active_writer.append(st)
            else:
                decoy_writer.append(st)
                decoy_count += 1

    if decoy_count == 0:
        t2 = time.time()
        return None, t2 - t1

    hypo_orig = phase.PhpHypoAdaptor(args.hypo)
    hypo_id = fileutils.splitext(args.hypo)[0]
    command = [
        XVOL_CLASH, "-hypo", hypo_id, "-pos", active_hit_file, "-neg",
        decoy_hit_file, "-buff",
        str(args.xvol), "-grid",
        str(args.grid)
    ]
    subprocess.run(command)

    score_xvol = run_bedroc_screen(args,
                                   args.hypo,
                                   args.actives,
                                   args.decoys,
                                   report_time=False)

    # Keep new hypothesis only if score increased.
    if score_xvol > score_orig:
        hypo = phase.PhpHypoAdaptor(args.hypo)
        hypo_utils.save_hypo_with_score(hypo, score_xvol, args.hypo)
    else:
        # We have to re-save because create_xvolClash overwrote it.
        hypo_orig.save(args.hypo, True)

    t2 = time.time()
    return score_xvol, t2 - t1


[docs]def run_bedroc_screen(args,
                      hypo_file,
                      actives_proj,
                      decoys_proj,
                      hit_file=None,
                      report_time=True):
    """
    Runs an active/decoy screen and returns the weighted BEDROC score and
    elapsed time.

    :param args: Command line arguments
    :type args: argparse.Namespace

    :param hypo_file: Hypothesis file
    :type hypo_file: str

    :param actives_proj: Zipped actives project
    :type actives_proj: str

    :param decoys_proj: Zipped decoys project
    :type decoys_proj: str

    :param hit_file: Maestro file for hits, or None if hits are not needed
    :type hit_file: str

    :param report_time: Whether to report elapsed time for screen
    :type report_time: bool

    :return: Weighted BEDROC score
    :rtype: float
    """

    # Tampering with licensing is a violation of the license agreement
    lic = phase.PhpLicenseDBSearch()
    lic.checkLicense()
    t1 = time.time()
    match_options = hypo_utils.get_match_options(
        phase.PhpHypoAdaptor(hypo_file), args.ex)
    screener = bedroc_screener.BedrocScreener(hypo_file, match_options)
    screener.screen(actives_proj, decoys_proj, hit_file)
    score = compute_score(args, screener)
    if report_time:
        elapsed_time = time.time() - t1
        logger.info("Elapsed time for screen = %.2f sec" % elapsed_time)

    return score


[docs]def setup_distributed_mask_screens(args):
    """
    Sets up distributed BEDROC screens that do exhaustive exploration of site
    masks which allow one or more sites to be missed.

    :param args: argparser.Namespace with command line options
    :type args: argparser.Namespace

    :return: list of subjob commands
    :rtype: list(list(str))
    """

    commands = []
    hypo = phase.PhpHypoAdaptor(args.hypo)
    site_masks = hypo_utils.get_site_masks(hypo, args.miss)
    common_args = job_utils.get_common_args(args) + ["-mask_screen", "-subjob"]
    prefix = phase_utils.get_jobname(args, args.hypo) + "_mask"
    subjob_names = phase_utils.get_subjob_names(len(site_masks), prefix)
    for subjob_name, site_mask in zip(subjob_names, site_masks):
        hypo_with_mask = phase.PhpHypoAdaptor(hypo)
        hypo_with_mask.addMask(site_mask)
        min_sites = sum(site_mask.getSiteMaskVector())
        hypo_with_mask.addProp(phase.PHASE_MIN_SITES, min_sites)
        hypo_file = subjob_name + phase.PHASE_HYPO_FILE_EXT
        hypo_with_mask.save(hypo_file, True)
        command = [
            option_utils.PHASE_HYPO_REFINE, hypo_file, args.actives, args.decoys
        ] + common_args + [subjob_name]
        commands.append(command)

    return commands


[docs]def setup_distributed_tol_direct_screens(args, direct):
    """
    Sets up distributed BEDROC screens that vary positional tolerances along
    the provided direction.

    :param args: argparser.Namespace with command line options
    :type args: argparser.Namespace

    :param direct: Direction of tolerance shift for each site in args.hypo
    :type direct: list(float)

    :return: list of subjob commands
    :rtype: list(list(str))
    """

    commands = []
    hypo = phase.PhpHypoAdaptor(args.hypo)
    site_tol_direct = []
    dstep = args.tol / float(args.steps)
    for i in range(1, args.steps + 1):
        step_size = i * dstep
        site_tol_direct.append(hypo_utils.get_site_tol(hypo, step_size, direct))
    common_args = job_utils.get_common_args(args) + ["-tol_screen", "-subjob"]
    prefix = phase_utils.get_jobname(args, args.hypo) + "_tol_direct"
    subjob_names = phase_utils.get_subjob_names(len(site_tol_direct), prefix)
    for subjob_name, site_tol in zip(subjob_names, site_tol_direct):
        hypo_with_tol = phase.PhpHypoAdaptor(hypo)
        hypo_with_tol.addTol(site_tol)
        hypo_file = subjob_name + phase.PHASE_HYPO_FILE_EXT
        hypo_with_tol.save(hypo_file, True)
        command = [
            option_utils.PHASE_HYPO_REFINE, hypo_file, args.actives, args.decoys
        ] + common_args + [subjob_name]
        commands.append(command)

    return commands


[docs]def setup_distributed_tol_probe_screens(args):
    """
    Sets up distributed BEDROC screens that do exploration of +/- changes to
    each positional tolerance.

    :param args: argparser.Namespace with command line options
    :type args: argparser.Namespace

    :return: list of subjob commands
    :rtype: list(list(str))
    """

    commands = []
    hypo = phase.PhpHypoAdaptor(args.hypo)
    # Take half the maximum step to probe the BEDROC surface.
    site_tol_probes = hypo_utils.get_site_tol_probes(hypo, 0.5 * args.tol)
    common_args = job_utils.get_common_args(args) + ["-tol_screen", "-subjob"]
    prefix = phase_utils.get_jobname(args, args.hypo) + "_tol_probes"
    subjob_names = phase_utils.get_subjob_names(len(site_tol_probes), prefix)
    for subjob_name, site_tol in zip(subjob_names, site_tol_probes):
        hypo_with_tol = phase.PhpHypoAdaptor(hypo)
        hypo_with_tol.addTol(site_tol)
        hypo_file = subjob_name + phase.PHASE_HYPO_FILE_EXT
        hypo_with_tol.save(hypo_file, True)
        command = [
            option_utils.PHASE_HYPO_REFINE, hypo_file, args.actives, args.decoys
        ] + common_args + [subjob_name]
        commands.append(command)

    return commands