Source code for schrodinger.application.phase.packages.align_ligands_driver_options

"""
Provides argument parsing and validation for align_ligands_driver.py"

Copyright Schrodinger LLC, All Rights Reserved.
"""

import argparse
import os

from schrodinger import structure
from schrodinger.application.phase.packages import ligand_aligner
from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import canvas
from schrodinger.infra import phase
from schrodinger.utils import cmdline

ALIGN_LIGANDS = "align_ligands"

LEGAL_STRUCTURE_FILE_FORMATS = [
    phase.PhpFileFormat_PHP_FORMAT_MAE, phase.PhpFileFormat_PHP_FORMAT_SD
]

INPUT_TREATMENT = {
    "keep": phase.RefinerInputTreatment_KEEP,
    "replace": phase.RefinerInputTreatment_REPLACE,
    "auto": phase.RefinerInputTreatment_AUTO
}

SAMPLE_TERMINAL_HYDROGENS = "sample"
ALIGN_TERMINAL_HYDROGENS = "align"


[docs]def get_aligner_options(args): """ Constructs ligand aligner options from command line arguments. :param args: argparser.Namespace with command line options :type args: argparser.Namespace :return: Ligand aligner options. :rtype: ligand_aligner.LigandAlignerOptions """ options = ligand_aligner.LigandAlignerOptions() options.refine = args.refine options.fail_on_bad = args.fail_on_bad options.align_terminal_hydrogens = args.terminal == ALIGN_TERMINAL_HYDROGENS options.ignore_sidechains = args.ignore_sidechains options.sampling_method = args.sample options.max_confs = args.max options.minimize_confs = args.minimize options.use_sampled_ref = args.use_sampled_ref options.close_contact_tol = args.close_contact return options
[docs]def get_parser(): """ Creates argparse.ArgumentParser with supported command line options. :return: Argument parser object :rtype: argparser.ArgumentParser """ parser = argparse.ArgumentParser( prog=ALIGN_LIGANDS, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( "infile", metavar="<infile>", help="Input Maestro or SD file with the structures to be aligned.") parser.add_argument( "-o", dest="outfile", metavar="<outfile>", help="Output Maestro or SD file for aligned structures. Defaults to " "<jobname>_align.maegz, where <jobname> is derived from the basename " "of <infile>.") parser.add_argument( "-ref", type=int, metavar="<ligand_number>", choices=[phase_utils.RestrictedRange(0, None, False)], help="Use the indicated input ligand (1, 2, etc.) as the reference " "structure to which all other ligands should be aligned. By default, " "one or more reference structures are chosen automatically after " "clustering by largest common Bemis-Murcko scaffold.") parser.add_argument( "-core", metavar="{<smarts>,%s}" % ligand_aligner.MCS_CORES, help="If a reference structure has been designated, use the supplied " "SMARTS string or the MCS between each structure and the reference as " "the common core. In the case of MCS, bond orders and elemental types " "must match, ring atoms may match only other ring atoms, and complete " "rings must be matched. The default behavior is to align on the " "largest shared Bemis-Murcko scaffold.") parser.add_argument( "-refine", action="store_true", help="Refine alignments by generating additional conformers with core " "atoms held fixed, and identifying conformers which increase the " "average in-place shape similarity between all pairs of ligands. " "Conformers are not generated for the primary reference structure or a " "user-designated reference structure.") parser.add_argument( "-freeze", metavar="<atomsfile>", help="Refine input alignments directly, holding user-defined sets of " "atoms fixed when generating conformers. Each non-blank line in " "<atomsfile> must contain a comma-separated list of 3 or more atom " "numbers that comprise a connected substructure in the associated " "ligand. A blank line (i.e., one that contains only a newline " "character) is interpreted to mean that conformers should not be " "generated for that ligand. Note that when this option is used, " "structures are not snapped onto one another, nor is there any concept " "of a reference structure. Rather, an attempt is made to select a " "conformer for each ligand that maximizes the average in-place shape " "similarity over all pairs of ligands.") parser.add_argument( "-input_treatment", choices=list(INPUT_TREATMENT), default="keep", help="Input structure treatment when doing direct refinement: keep = " "use inputs as a starting point for refinement; replace = replace all " "inputs with the closest sampled conformer and perform refinement on " "those conformers; auto = make a pass with \"keep\" treatment and " "follow up with \"replace\" treatment if no improvement was achieved " "in first pass (default: %(default)s).") parser.add_argument( "-sample", choices=[phase.CONF_SAMPLE_COARSE_NAME, phase.CONF_SAMPLE_FINE_NAME], default=phase.CONF_SAMPLE_COARSE_NAME, help="Conformational sampling method (default: %s)." % phase.CONF_SAMPLE_COARSE_NAME) parser.add_argument( "-max", type=int, metavar="<numconfs>", default=ligand_aligner.DEFAULT_MAX_CONFS, choices=[phase_utils.RestrictedRange(0, None, False)], help="Maximum number of conformers to generate (default: %d)." % ligand_aligner.DEFAULT_MAX_CONFS) parser.add_argument("-minimize", action="store_true", help="Perform energy minimization on each conformer.") parser.add_argument( "-terminal", default=ALIGN_TERMINAL_HYDROGENS, choices=[SAMPLE_TERMINAL_HYDROGENS, ALIGN_TERMINAL_HYDROGENS], help="Whether to conformationally sample rotatable terminal atoms (" "e.g., -CH3, -NH2, -OH) if they are part of the core, or align their " "closest pairs of hydrogens (default: %(default)s).") parser.add_argument( "-fail_on_bad", action="store_true", help="Fail immediately if any structure contains multiple disconnected " "fragments or no rings. These structures cannot be processed and are " "quietly skipped by default.") parser.add_argument( "-ignore_sidechains", action="store_true", help="Do not attempt to snap chemically identical sidechains onto " "one another in pairs of structures that share a common snapped core.") parser.add_argument( "-use_sampled_ref", action="store_true", help="Replace the primary reference structure, which is chosen " "automatically or specified via -ref, with a sampled conformer that " "yields the best shape-based superposition to the corresponding input " "structure. This can sometimes result in a better consensus alignment " "since the primary reference structure is an actual sampled conformer, " "and as such may produce better superpositions with sampled conformers " "for other structures.") parser.add_argument( "-close_contact", type=float, default=phase.DEFAULT_CLOSE_CONTACT_TOL, metavar="<d>", help="Non-bonded close contact distance. If greater than 0, a snapped " "ligand structure is rejected when the act of snapping brings two non-" "bonded atoms within this distance of each other. Flexible least-" "squares alignment is done if every mapping of the core results in the " "creation of a close contact. A value of 0 disables close contact " "detection (default: {}).".format(phase.DEFAULT_CLOSE_CONTACT_TOL)) parser.add_argument( "-verbosity", type=int, choices=[0, 1, 2], default=1, help="Level of printed output: 0 = none, 1 = informative messages " "during the alignment process, 2 = additional messages triggered by " "core snapping failures (default: 1).") jobcontrol_options = [ cmdline.HOST, cmdline.JOBNAME, cmdline.TMPDIR, cmdline.NOJOBID ] cmdline.add_jobcontrol_options(parser, options=jobcontrol_options) return parser
[docs]def validate_args(args): """ Checks the validity of command line options. :param args: argparser.Namespace with command line options :type args: argparser.Namespace :return: tuple of validity and error message if not valid :rtype: bool, str """ infile_format = phase.get_phase_file_format(args.infile) if infile_format not in LEGAL_STRUCTURE_FILE_FORMATS: return False, "Input structures must be in Maestro or SD format" if not os.path.isfile(args.infile): return False, f"Input structure file \"{args.infile}\" not found" outfile_format = phase.get_phase_file_format(args.outfile) if outfile_format not in LEGAL_STRUCTURE_FILE_FORMATS: mesg = "Aligned structures must be written to a Maestro or SD file" return False, mesg if args.ref: total_ligands = structure.count_structures(args.infile) if args.ref > total_ligands: mesg = "Reference structure number (%d) exceeds total number " + \ "of ligands (%d)" return False, mesg % (args.ref, total_ligands) if args.core: if not args.ref: mesg = "Must designate a reference structure when specifying core" return False, mesg if args.core != ligand_aligner.MCS_CORES: try: query = canvas.ChmQuery(args.core) except: return False, f'Invalid SMARTS supplied: "{args.core}"' if args.freeze: if not os.path.isfile(args.freeze): return False, f"Frozen atoms file \"{args.freeze}\" not found" return True, ""