Source code for schrodinger.application.macromodel.apps

"""
Functions that combine macromodel.utils and macromodel.tools to create
mini-applications and workflows.

While these may be useful in themselves, they also serve as examples of
how to use the other modules in this package.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Contributors: K Shawn Watts

################################################################################
# Packages
################################################################################

import os
import re

import schrodinger.application.macromodel.tools as mt
import schrodinger.application.macromodel.utils as mu
import schrodinger.job.jobcontrol as jc
import schrodinger.structure as structure

################################################################################
# Functions
################################################################################


[docs]def converge_search(mae_file="", method="MCMM", steps=5000, blocks=5, **kwargs):
    """
    This function runs a series of conformational searches plus multiple
    minimization blocks, in order to converge a search of conformational
    space.  Convergence in this context is defined as the searches locating
    no new conformers. This function returns False if the blocks fail to
    converge on a number of conformers, else it returns True.

    The function accepts optional keyword=value pairs to build the
    macromodel.utils.ComUtil instance so that non-default parameters can
    be used to run the job.

    Each block consists of:

    a) running 'steps' iterations of confsearch 'method' with mild
    minimization convergence threshold (0.05 RMSD gradient).  Method
    values may include 'MCMM' (monte carlo multiple minima), 'LMOD'
    (low mode), 'SPMC' (systematic torsional sampling), 'LLMD' (large
    scale low mode), 'MCMMLMOD' (mcmm+lmod), 'MCMMLLMD' (mcmm+llmd),
    'CGEN' (ConfGen) or 'MIXED' (a series of different methods: cgen,
    mcmm, spmc, lmod, mcmm+lmod, mcmm, mcmm, ...).

    b) running a Mult minimization with tighter convergence threshold
    (0.001 RMSD gradient)

    c) counting the number of minimized confs found

    d) initializing the next search with minimized confs, and a new SEED.

    Blocks are repeated until a total of blocks have been performed OR
    the number of minimized confs found does not increase between two
    successive blocks and more than 3 blocks have been performed.

    Depends on ComUtil for writing job files.  Depends on
    macromodel.tools.count() for assessing convergence.

    Raises Runtime exception if a task does not succeed.  Raises generic
    exception if search method can't be determined.

    """

    # get instance, customized with **kwargs
    mcu = mu.ComUtil(**kwargs)

    #
    # Set up
    #
    i = 1  # block counter
    srch_conf_count = mt.count(mae_file)
    mini_conf_count = mt.count(mae_file)
    block_conf_count = mt.count(mae_file)
    mcu.MINI[1] = 9  # TNCG, should be close to minimized
    mcu.SEED[1] = 80000

    # perform the searchr+minimization blocks
    while (i <= blocks):
        print(
            "converge_search() starting stage %d of %d conf searches with %d confs."
            % (i, blocks, block_conf_count))

        #
        # do the conf search
        #

        # fashion comfile name
        com_file = "".join(["search_", str(i), ".com"])

        # relax the convergence criteria for the search (it is tight for Mult)
        mcu.CONV[1] = 2  # converge on derivative gradient
        mcu.CONV[2] = 0  # extent modifier
        mcu.CONV[5] = 0.05  # extent converge on gradient

        # increment seed to get different trajectory
        mcu.SEED[1] += 1

        # select method, write the com file
        if method == 'MCMM':
            mcu.MCMM[1] = steps
            com_file = mcu.mcmm(mae_file, com_file)

        elif method == 'LMOD':
            mcu.LMCS[1] = steps
            com_file = mcu.lmod(mae_file, com_file)

        elif method == 'SPMC':
            mcu.SPMC[1] = steps
            com_file = mcu.spmc(mae_file, com_file)

        elif method == 'LLMD':
            mcu.LMC2[1] = steps
            com_file = mcu.llmd(mae_file, com_file)

        elif method == 'CGEN':
            mcu.CGEN[1] = steps
            com_file = mcu.cgen(mae_file, com_file)

        elif method == 'MCMMLMOD':
            mcu.LMCS[1] = steps
            com_file = mcu.mcmmlmod(mae_file, com_file)

        elif method == 'MCMMLLMD':
            mcu.LMC2[1] = steps
            com_file = mcu.mcmmllmd(mae_file, com_file)

        elif method == 'MIXED':
            if i == 1:
                mcu.CGEN[1] = steps
                com_file = mcu.cgen(mae_file, com_file)
            if i == 2:
                mcu.MCMM[1] = steps
                com_file = mcu.mcmm(mae_file, com_file)
            if i == 3:
                mcu.SPMC[1] = steps
                com_file = mcu.spmc(mae_file, com_file)
            if i == 4:
                mcu.LMCS[1] = steps
                com_file = mcu.lmod(mae_file, com_file)
            if i == 5:
                mcu.LMCS[1] = steps
                com_file = mcu.mcmmlmod(mae_file, com_file)
            if i > 5:
                mcu.MCMM[1] = steps
                com_file = mcu.mcmm(mae_file, com_file)
        # panic
        else:
            raise Exception("converge_search() can't determine method. ")

        # Run the csearch job
        csearch_job = jc.launch_job(mcu.getLaunchCommand(com_file))
        csearch_job.wait(mcu.interval)
        if not csearch_job.succeeded():
            msg = "converge_search() job failed at stage %d of %d csearch.  Please see log files for details." % (
                i, blocks)
            raise RuntimeError(msg)
        mae_file = csearch_job.StructureOutputFile

        #
        # minimize the results
        #
        srch_conf_count = mt.count(mae_file)
        print(
            "converge_search() starting stage %d of %d multmini with %d confs."
            % (i, blocks, srch_conf_count))

        # tighten convergence threshold to get unique structures
        mcu.CONV[1] = 2  # converge on derivative gradient
        mcu.CONV[2] = 0  # extent modifier
        mcu.CONV[5] = 0.001  # extent converge on gradient
        com_file = "".join(["mini_", str(i), ".com"])
        mult_mini_job = jc.launch_job(
            mcu.getLaunchCommand(mcu.mult(mae_file, com_file)))
        mult_mini_job.wait(mcu.interval)
        if not mult_mini_job.succeeded():
            msg = "converge_search() job failed at stage %d of %d minimization.  Please see logs file for details." % (
                i, blocks)
            raise RuntimeError(msg)
        mae_file = mult_mini_job.StructureOutputFile
        mini_conf_count = mt.count(mae_file)

        # test for early exit (3 blocks, and no more new confs)
        if i > 2 and block_conf_count >= mini_conf_count:

            # we are done
            print(
                "converge_search() converged on %d conformers in %d stages of %d steps each."
                % (mini_conf_count, i, steps))
            return True

        # increment stage, and set up for next search
        i += 1
        block_conf_count = mini_conf_count

    print(
        "converge_search() found %d conformers in %d stages, of %d steps each, but no evidence for convergence."
        % (block_conf_count, blocks, steps))
    return False


[docs]def get_rep_confs(mae_file="", max_confs=5, steps=5000, **kwargs):
    """
    This function combines a series of tasks to automate the processing of
    non-conformers in to a smaller representative collection of conformers.
    Returns a list of the file names that contain representative confs,
    grouped by input serial number.

    This method accepts the name of a mae file containing a set of
    non-conformers, optionally the max number of representative confs,
    optionally the number of search steps, optional kwargs for the
    ComUtil object, and then:

    a) performs a serial conformation search of non-conformers
    b) groups output of conformational search by serial
       number into multiple files
    c) performs multiple minimization of each serial number
    d) reduces output of each serial number with cluster,
       to N representative confs.

    The default maximum number of conformers per serial filtered by
    Cluster is 5.  The default number steps used in the Monte Carlo
    Multiple Minima search is 5000.  Returns the array of representative
    file names.

    Depends on macromodel.utils.CluUtil() class for clustering.  Depends
    on macromodel.utils.ComUtil() for writing and running com files.
    Depends on macromodel.tools.serial_split() for grouping output.

    """

    # get instances
    mcu = mu.ComUtil(**kwargs)
    mxu = mu.CluUtil()

    # set object values
    mcu.serial = True  # mandatory serial
    mcu.MCMM[1] = steps
    mcu.MCOP[1] = 100

    # define lists for intermediate files
    serial_files = []  # name of files from serial split
    mult_files = []  # name of files mult mini
    qp_files = []  # name of files clustered (used to be sent to QikProp)
    cleanup_files = []  # all the intermediate file names for removal

    # Perform serial AUTO search
    print("get_rep_confs():  starting serial conformation search...")
    search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file)))
    search_job.wait(mcu.interval)
    search_out_mae_file = search_job.StructureOutputFile
    cleanup_files.append(search_out_mae_file)

    # Filter CTs in file, write groups of like serial numbers.
    print(
        "get_rep_confs():  separating conformation search output by serial number..."
    )
    serial_files = mt.serial_split(search_out_mae_file)

    # Perform  MultMini
    print("get_rep_confs():  starting sequence of multiple minimizations...")
    for file in serial_files:
        mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file)))
        mult_job.wait(mcu.interval)
        mult_files.append(mult_job.StructureOutputFile)

    # Cluster to reduce output to representative sample
    print(
        "get_rep_confs():  starting sequence of cluster conformation reductions..."
    )
    for file in mult_files:
        # figure out cluster thresh level that generates the desired max confs
        # thresh level = N - ( M + 1)
        # N = total number of confs
        # M = desired max number of confs

        # generate the name
        out_file = file
        mae_re = re.compile(r'-out\.mae$')
        if mae_re.search(out_file):
            out_file = mae_re.sub('-clust_rep.mae', out_file, 1)
        else:
            raise Exception(
                "get_rep_confs() can't determine file name pattern. ")

        # determine if user's max is possible if not just copy the file
        count = mt.count(file)

        # user input desired max
        thresh = count - max_confs + 1

        if count > max_confs:

            # generate command file args
            mxu.arms[0] = 'heavy'  # atoms for RMS after superim
            mxu.writerep = " ".join([repr(thresh), out_file, "all"])

            # run cluster
            mxu.doCluster(file)

        else:
            os.rename(file, out_file)

        # add file to list for cleanup
        qp_files.append(out_file)

    # Clean up
    print("get_rep_confs():  cleaning up intermediate files...")
    for file in mult_files:
        cleanup_files.append(file)
    for file in serial_files:
        cleanup_files.append(file)

    for file in cleanup_files:
        if os.path.exists(file):
            os.unlink(file)

    print("get_rep_confs():  finished.  ")

    return qp_files


[docs]def get_lead_confs(mae_file="", max_confs=5, steps=5000, **kwargs):
    """
    This function combines a series of tasks to automate the processing of
    non-conformers in to a smaller representative collection of conformers.
    Returns a list of the file names that contain leading (lowest energy)
    cluster confs, grouped by input serial number.

    This method accepts the name of a mae file containing a set of
    non-conformers, optionally the max number of leading energy confs,
    optionally the number of search steps, optional kwargs for the
    ComUtil object, and then:

    a) performs a serial conformation search of non-conformers
    b) groups output of conformational search by serial
       number into multiple files
    c) performs multiple minimization of each serial number
    d) reduces output of each serial number with cluster,
       to N leading energy confs.

    The default maximum number of conformers per serial filtered by
    Cluster is 5.  The default number steps used in the Monte Carlo
    Multiple Minima search is 5000.  Returns the array of leading energy
    conf file names.

    Depends on macromodel.utils.CluUtil() class for clustering.  Depends
    on macromodel.utils.ComUtil() for writing and running com files.
    Depends on macromodel.tools.serial_split() for grouping output.

    """

    # get instances
    mcu = mu.ComUtil(**kwargs)
    mxu = mu.CluUtil()

    # set object values
    mcu.wait = True  # mandatory wait, jc_wait optionally defined in object
    mcu.serial = True  # mandatory serial
    mcu.MCMM[1] = steps
    mcu.MCOP[1] = 100

    # define lists for intermediate files
    serial_files = []  # name of files from serial split
    mult_files = []  # name of files mult mini
    qp_files = []  # name of files clustered (used to be sent to QikProp)
    cleanup_files = []  # all the intermediate file names for removal

    # Perform serial AUTO search
    print("get_lead_confs():  starting serial conformation search...")
    search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file)))
    search_job.wait(mcu.interval)
    search_out_mae_file = search_job.StructureOutputFile
    cleanup_files.append(search_out_mae_file)

    # Filter CTs in file, write groups of like serial numbers.
    print(
        "get_lead_confs():  separating conformation search output by serial number..."
    )
    serial_files = mt.serial_split(search_out_mae_file)

    # Perform  MultMini
    print("get_lead_confs():  starting sequence of multiple minimizations...")
    for file in serial_files:
        mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file)))
        mult_job.wait(mcu.interval)
        mult_files.append(mult_job.StructureOutputFile)

    # Cluster to reduce output to leading energy sample
    print(
        "get_lead_confs():  starting sequence of cluster conformation reductions..."
    )
    for file in mult_files:
        # figure out cluster thresh level that generates the desired max confs
        # thresh level = N - M + 1
        # N = total number of confs
        # M = desired max number of confs

        # generate the name
        out_file = file
        mae_re = re.compile(r'-out\.mae$')
        if mae_re.search(out_file):
            out_file = mae_re.sub('-clust_lead.mae', out_file, 1)
        else:
            raise Exception(
                "get_lead_confs() can't determine file name pattern. ")

        # determine if user's max is possible if not just copy the file
        count = mt.count(file)

        # user input desired max
        thresh = count - max_confs + 1

        if count > max_confs:

            # generate command file args
            mxu.arms[0] = 'heavy'  # atoms for RMS after superim
            mxu.writelead = " ".join([repr(thresh), out_file, "all"])

            # run cluster
            mxu.doCluster(file)

        else:
            os.rename(file, out_file)

        # add file to list for cleanup
        qp_files.append(out_file)

    # Clean up
    print("get_lead_confs():  cleaning up intermediate files...")
    for file in mult_files:
        cleanup_files.append(file)
    for file in serial_files:
        cleanup_files.append(file)

    for file in cleanup_files:
        if os.path.exists(file):
            os.unlink(file)

    print("get_lead_confs():  finished.  ")

    return qp_files


[docs]def get_energy(structure=structure.Structure,
               ffld='OPLS_2005',
               solvent=False,
               cleanup=True,
               units=""):
    """
    Returns a dictionary of MacroModel's molecular mechanics terms
    generated from a single point energy calculation.

    The dictionary may be restricted to a collection of values with
    a particular unit, kJ/mol or kcal/mol.  By default, the function
    cleans up all intermediate files created by the job.

    The 'structure' argument must be a schrodinger.structure.Structure
    object.

    """

    # Returned dict
    mmod_energy = {}
    mmod_energy['title'] = structure.title

    # Mmod energy terms
    terms = "|".join([
        'Total energy', 'Stretch', 'Bend', 'Proper torsion', 'Out-of-plane',
        'Van der Waals', 'Electrostatic', 'Explcit Hydrogen Bonds',
        'All solvation', 'Solvation Term 1', 'Solvation Term 2'
    ])

    # Compile the re for snarfing energy terms
    energy_re = re.compile(r'(%s):\s+(\S+)\s+kJ\/mol\s+\(\s+(\S+) kcal' % terms)

    # JobIO file prep and clean up
    base_name = 'mmod_energy_calc'
    mae_file_name = ''.join([base_name, '.mae'])
    mmo_file_name = ''.join([base_name, '-out.mmo'])
    out_file_name = ''.join([base_name, '-out.mae'])
    com_file_name = ''.join([base_name, '.com'])
    cleanup_files = [mae_file_name, mmo_file_name, out_file_name, com_file_name]

    # Remove any previous debris
    for file in cleanup_files:
        if cleanup and os.path.exists(file):
            os.remove(file)

    # Get ComUtil customized with ffld but no jobcontrol, we will capture stdout
    mcu = mu.ComUtil(ffld=ffld, solv=solvent)
    mcu.no_redirect = True
    mcu.ELST[1] = 0  # terse reporting of terms
    mcu.ELST[2] = 1  # report in kcal/mol units

    # Write out our structure file, clobber if needed
    structure.write(mae_file_name)

    # Write the comfile, get the command to invoke the job
    cmd = mcu.getLaunchCommand(mcu.elst(mae_file_name))
    cmd[0] = os.path.join(os.getenv('SCHRODINGER'), cmd[0])

    # Run job, capture stdout
    child_stdin, child_stdout, child_stderr = os.popen3(cmd)

    # Parse stdout for the terms
    for line in child_stdout.readlines():
        print(line)
        match = energy_re.search(line)
        if match:
            # make keys for both unit types
            key_kJ_per_mol = " ".join([match.group(1), "kJ/mol"])
            key_kcal_per_mol = " ".join([match.group(1), "kcal/mol"])

            # but only populate with requested values
            if units.startswith('kcal'):
                mmod_energy[key_kcal_per_mol] = match.group(3)
            elif units.startswith('kJ'):
                mmod_energy[key_kJ_per_mol] = match.group(2)
            else:
                mmod_energy[key_kJ_per_mol] = match.group(2)
                mmod_energy[key_kcal_per_mol] = match.group(3)

    # Don't leave a mess
    for file in cleanup_files:
        if cleanup and os.path.exists(file):
            os.remove(file)

    child_stdin.close()
    child_stdout.close()
    child_stderr.close()

    return mmod_energy


# EOF