"""
Provides argument parsing and validation for align_ligands_driver.py"
Copyright Schrodinger LLC, All Rights Reserved.
"""
import argparse
import os
from schrodinger import structure
from schrodinger.application.phase.packages import ligand_aligner
from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import canvas
from schrodinger.infra import phase
from schrodinger.utils import cmdline
ALIGN_LIGANDS = "align_ligands"
LEGAL_STRUCTURE_FILE_FORMATS = [
phase.PhpFileFormat_PHP_FORMAT_MAE, phase.PhpFileFormat_PHP_FORMAT_SD
]
INPUT_TREATMENT = {
"keep": phase.RefinerInputTreatment_KEEP,
"replace": phase.RefinerInputTreatment_REPLACE,
"auto": phase.RefinerInputTreatment_AUTO
}
SAMPLE_TERMINAL_HYDROGENS = "sample"
ALIGN_TERMINAL_HYDROGENS = "align"
[docs]def get_aligner_options(args):
"""
Constructs ligand aligner options from command line arguments.
:param args: argparser.Namespace with command line options
:type args: argparser.Namespace
:return: Ligand aligner options.
:rtype: ligand_aligner.LigandAlignerOptions
"""
options = ligand_aligner.LigandAlignerOptions()
options.refine = args.refine
options.fail_on_bad = args.fail_on_bad
options.align_terminal_hydrogens = args.terminal == ALIGN_TERMINAL_HYDROGENS
options.ignore_sidechains = args.ignore_sidechains
options.sampling_method = args.sample
options.max_confs = args.max
options.minimize_confs = args.minimize
options.use_sampled_ref = args.use_sampled_ref
options.close_contact_tol = args.close_contact
return options
[docs]def get_parser():
"""
Creates argparse.ArgumentParser with supported command line options.
:return: Argument parser object
:rtype: argparser.ArgumentParser
"""
parser = argparse.ArgumentParser(
prog=ALIGN_LIGANDS,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
"infile",
metavar="<infile>",
help="Input Maestro or SD file with the structures to be aligned.")
parser.add_argument(
"-o",
dest="outfile",
metavar="<outfile>",
help="Output Maestro or SD file for aligned structures. Defaults to "
"<jobname>_align.maegz, where <jobname> is derived from the basename "
"of <infile>.")
parser.add_argument(
"-ref",
type=int,
metavar="<ligand_number>",
choices=[phase_utils.RestrictedRange(0, None, False)],
help="Use the indicated input ligand (1, 2, etc.) as the reference "
"structure to which all other ligands should be aligned. By default, "
"one or more reference structures are chosen automatically after "
"clustering by largest common Bemis-Murcko scaffold.")
parser.add_argument(
"-core",
metavar="{<smarts>,%s}" % ligand_aligner.MCS_CORES,
help="If a reference structure has been designated, use the supplied "
"SMARTS string or the MCS between each structure and the reference as "
"the common core. In the case of MCS, bond orders and elemental types "
"must match, ring atoms may match only other ring atoms, and complete "
"rings must be matched. The default behavior is to align on the "
"largest shared Bemis-Murcko scaffold.")
parser.add_argument(
"-refine",
action="store_true",
help="Refine alignments by generating additional conformers with core "
"atoms held fixed, and identifying conformers which increase the "
"average in-place shape similarity between all pairs of ligands. "
"Conformers are not generated for the primary reference structure or a "
"user-designated reference structure.")
parser.add_argument(
"-freeze",
metavar="<atomsfile>",
help="Refine input alignments directly, holding user-defined sets of "
"atoms fixed when generating conformers. Each non-blank line in "
"<atomsfile> must contain a comma-separated list of 3 or more atom "
"numbers that comprise a connected substructure in the associated "
"ligand. A blank line (i.e., one that contains only a newline "
"character) is interpreted to mean that conformers should not be "
"generated for that ligand. Note that when this option is used, "
"structures are not snapped onto one another, nor is there any concept "
"of a reference structure. Rather, an attempt is made to select a "
"conformer for each ligand that maximizes the average in-place shape "
"similarity over all pairs of ligands.")
parser.add_argument(
"-input_treatment",
choices=list(INPUT_TREATMENT),
default="keep",
help="Input structure treatment when doing direct refinement: keep = "
"use inputs as a starting point for refinement; replace = replace all "
"inputs with the closest sampled conformer and perform refinement on "
"those conformers; auto = make a pass with \"keep\" treatment and "
"follow up with \"replace\" treatment if no improvement was achieved "
"in first pass (default: %(default)s).")
parser.add_argument(
"-sample",
choices=[phase.CONF_SAMPLE_COARSE_NAME, phase.CONF_SAMPLE_FINE_NAME],
default=phase.CONF_SAMPLE_COARSE_NAME,
help="Conformational sampling method (default: %s)." %
phase.CONF_SAMPLE_COARSE_NAME)
parser.add_argument(
"-max",
type=int,
metavar="<numconfs>",
default=ligand_aligner.DEFAULT_MAX_CONFS,
choices=[phase_utils.RestrictedRange(0, None, False)],
help="Maximum number of conformers to generate (default: %d)." %
ligand_aligner.DEFAULT_MAX_CONFS)
parser.add_argument("-minimize",
action="store_true",
help="Perform energy minimization on each conformer.")
parser.add_argument(
"-terminal",
default=ALIGN_TERMINAL_HYDROGENS,
choices=[SAMPLE_TERMINAL_HYDROGENS, ALIGN_TERMINAL_HYDROGENS],
help="Whether to conformationally sample rotatable terminal atoms ("
"e.g., -CH3, -NH2, -OH) if they are part of the core, or align their "
"closest pairs of hydrogens (default: %(default)s).")
parser.add_argument(
"-fail_on_bad",
action="store_true",
help="Fail immediately if any structure contains multiple disconnected "
"fragments or no rings. These structures cannot be processed and are "
"quietly skipped by default.")
parser.add_argument(
"-ignore_sidechains",
action="store_true",
help="Do not attempt to snap chemically identical sidechains onto "
"one another in pairs of structures that share a common snapped core.")
parser.add_argument(
"-use_sampled_ref",
action="store_true",
help="Replace the primary reference structure, which is chosen "
"automatically or specified via -ref, with a sampled conformer that "
"yields the best shape-based superposition to the corresponding input "
"structure. This can sometimes result in a better consensus alignment "
"since the primary reference structure is an actual sampled conformer, "
"and as such may produce better superpositions with sampled conformers "
"for other structures.")
parser.add_argument(
"-close_contact",
type=float,
default=phase.DEFAULT_CLOSE_CONTACT_TOL,
metavar="<d>",
help="Non-bonded close contact distance. If greater than 0, a snapped "
"ligand structure is rejected when the act of snapping brings two non-"
"bonded atoms within this distance of each other. Flexible least-"
"squares alignment is done if every mapping of the core results in the "
"creation of a close contact. A value of 0 disables close contact "
"detection (default: {}).".format(phase.DEFAULT_CLOSE_CONTACT_TOL))
parser.add_argument(
"-verbosity",
type=int,
choices=[0, 1, 2],
default=1,
help="Level of printed output: 0 = none, 1 = informative messages "
"during the alignment process, 2 = additional messages triggered by "
"core snapping failures (default: 1).")
jobcontrol_options = [
cmdline.HOST, cmdline.JOBNAME, cmdline.TMPDIR, cmdline.NOJOBID
]
cmdline.add_jobcontrol_options(parser, options=jobcontrol_options)
return parser
[docs]def validate_args(args):
"""
Checks the validity of command line options.
:param args: argparser.Namespace with command line options
:type args: argparser.Namespace
:return: tuple of validity and error message if not valid
:rtype: bool, str
"""
infile_format = phase.get_phase_file_format(args.infile)
if infile_format not in LEGAL_STRUCTURE_FILE_FORMATS:
return False, "Input structures must be in Maestro or SD format"
if not os.path.isfile(args.infile):
return False, f"Input structure file \"{args.infile}\" not found"
outfile_format = phase.get_phase_file_format(args.outfile)
if outfile_format not in LEGAL_STRUCTURE_FILE_FORMATS:
mesg = "Aligned structures must be written to a Maestro or SD file"
return False, mesg
if args.ref:
total_ligands = structure.count_structures(args.infile)
if args.ref > total_ligands:
mesg = "Reference structure number (%d) exceeds total number " + \
"of ligands (%d)"
return False, mesg % (args.ref, total_ligands)
if args.core:
if not args.ref:
mesg = "Must designate a reference structure when specifying core"
return False, mesg
if args.core != ligand_aligner.MCS_CORES:
try:
query = canvas.ChmQuery(args.core)
except:
return False, f'Invalid SMARTS supplied: "{args.core}"'
if args.freeze:
if not os.path.isfile(args.freeze):
return False, f"Frozen atoms file \"{args.freeze}\" not found"
return True, ""