"""
Functions that combine macromodel.utils and macromodel.tools to create
mini-applications and workflows.
While these may be useful in themselves, they also serve as examples of
how to use the other modules in this package.
Copyright Schrodinger, LLC. All rights reserved.
"""
# Contributors: K Shawn Watts
################################################################################
# Packages
################################################################################
import os
import re
import schrodinger.application.macromodel.tools as mt
import schrodinger.application.macromodel.utils as mu
import schrodinger.job.jobcontrol as jc
import schrodinger.structure as structure
################################################################################
# Functions
################################################################################
[docs]def converge_search(mae_file="", method="MCMM", steps=5000, blocks=5, **kwargs):
"""
This function runs a series of conformational searches plus multiple
minimization blocks, in order to converge a search of conformational
space. Convergence in this context is defined as the searches locating
no new conformers. This function returns False if the blocks fail to
converge on a number of conformers, else it returns True.
The function accepts optional keyword=value pairs to build the
macromodel.utils.ComUtil instance so that non-default parameters can
be used to run the job.
Each block consists of:
a) running 'steps' iterations of confsearch 'method' with mild
minimization convergence threshold (0.05 RMSD gradient). Method
values may include 'MCMM' (monte carlo multiple minima), 'LMOD'
(low mode), 'SPMC' (systematic torsional sampling), 'LLMD' (large
scale low mode), 'MCMMLMOD' (mcmm+lmod), 'MCMMLLMD' (mcmm+llmd),
'CGEN' (ConfGen) or 'MIXED' (a series of different methods: cgen,
mcmm, spmc, lmod, mcmm+lmod, mcmm, mcmm, ...).
b) running a Mult minimization with tighter convergence threshold
(0.001 RMSD gradient)
c) counting the number of minimized confs found
d) initializing the next search with minimized confs, and a new SEED.
Blocks are repeated until a total of blocks have been performed OR
the number of minimized confs found does not increase between two
successive blocks and more than 3 blocks have been performed.
Depends on ComUtil for writing job files. Depends on
macromodel.tools.count() for assessing convergence.
Raises Runtime exception if a task does not succeed. Raises generic
exception if search method can't be determined.
"""
# get instance, customized with **kwargs
mcu = mu.ComUtil(**kwargs)
#
# Set up
#
i = 1 # block counter
srch_conf_count = mt.count(mae_file)
mini_conf_count = mt.count(mae_file)
block_conf_count = mt.count(mae_file)
mcu.MINI[1] = 9 # TNCG, should be close to minimized
mcu.SEED[1] = 80000
# perform the searchr+minimization blocks
while (i <= blocks):
print(
"converge_search() starting stage %d of %d conf searches with %d confs."
% (i, blocks, block_conf_count))
#
# do the conf search
#
# fashion comfile name
com_file = "".join(["search_", str(i), ".com"])
# relax the convergence criteria for the search (it is tight for Mult)
mcu.CONV[1] = 2 # converge on derivative gradient
mcu.CONV[2] = 0 # extent modifier
mcu.CONV[5] = 0.05 # extent converge on gradient
# increment seed to get different trajectory
mcu.SEED[1] += 1
# select method, write the com file
if method == 'MCMM':
mcu.MCMM[1] = steps
com_file = mcu.mcmm(mae_file, com_file)
elif method == 'LMOD':
mcu.LMCS[1] = steps
com_file = mcu.lmod(mae_file, com_file)
elif method == 'SPMC':
mcu.SPMC[1] = steps
com_file = mcu.spmc(mae_file, com_file)
elif method == 'LLMD':
mcu.LMC2[1] = steps
com_file = mcu.llmd(mae_file, com_file)
elif method == 'CGEN':
mcu.CGEN[1] = steps
com_file = mcu.cgen(mae_file, com_file)
elif method == 'MCMMLMOD':
mcu.LMCS[1] = steps
com_file = mcu.mcmmlmod(mae_file, com_file)
elif method == 'MCMMLLMD':
mcu.LMC2[1] = steps
com_file = mcu.mcmmllmd(mae_file, com_file)
elif method == 'MIXED':
if i == 1:
mcu.CGEN[1] = steps
com_file = mcu.cgen(mae_file, com_file)
if i == 2:
mcu.MCMM[1] = steps
com_file = mcu.mcmm(mae_file, com_file)
if i == 3:
mcu.SPMC[1] = steps
com_file = mcu.spmc(mae_file, com_file)
if i == 4:
mcu.LMCS[1] = steps
com_file = mcu.lmod(mae_file, com_file)
if i == 5:
mcu.LMCS[1] = steps
com_file = mcu.mcmmlmod(mae_file, com_file)
if i > 5:
mcu.MCMM[1] = steps
com_file = mcu.mcmm(mae_file, com_file)
# panic
else:
raise Exception("converge_search() can't determine method. ")
# Run the csearch job
csearch_job = jc.launch_job(mcu.getLaunchCommand(com_file))
csearch_job.wait(mcu.interval)
if not csearch_job.succeeded():
msg = "converge_search() job failed at stage %d of %d csearch. Please see log files for details." % (
i, blocks)
raise RuntimeError(msg)
mae_file = csearch_job.StructureOutputFile
#
# minimize the results
#
srch_conf_count = mt.count(mae_file)
print(
"converge_search() starting stage %d of %d multmini with %d confs."
% (i, blocks, srch_conf_count))
# tighten convergence threshold to get unique structures
mcu.CONV[1] = 2 # converge on derivative gradient
mcu.CONV[2] = 0 # extent modifier
mcu.CONV[5] = 0.001 # extent converge on gradient
com_file = "".join(["mini_", str(i), ".com"])
mult_mini_job = jc.launch_job(
mcu.getLaunchCommand(mcu.mult(mae_file, com_file)))
mult_mini_job.wait(mcu.interval)
if not mult_mini_job.succeeded():
msg = "converge_search() job failed at stage %d of %d minimization. Please see logs file for details." % (
i, blocks)
raise RuntimeError(msg)
mae_file = mult_mini_job.StructureOutputFile
mini_conf_count = mt.count(mae_file)
# test for early exit (3 blocks, and no more new confs)
if i > 2 and block_conf_count >= mini_conf_count:
# we are done
print(
"converge_search() converged on %d conformers in %d stages of %d steps each."
% (mini_conf_count, i, steps))
return True
# increment stage, and set up for next search
i += 1
block_conf_count = mini_conf_count
print(
"converge_search() found %d conformers in %d stages, of %d steps each, but no evidence for convergence."
% (block_conf_count, blocks, steps))
return False
[docs]def get_rep_confs(mae_file="", max_confs=5, steps=5000, **kwargs):
"""
This function combines a series of tasks to automate the processing of
non-conformers in to a smaller representative collection of conformers.
Returns a list of the file names that contain representative confs,
grouped by input serial number.
This method accepts the name of a mae file containing a set of
non-conformers, optionally the max number of representative confs,
optionally the number of search steps, optional kwargs for the
ComUtil object, and then:
a) performs a serial conformation search of non-conformers
b) groups output of conformational search by serial
number into multiple files
c) performs multiple minimization of each serial number
d) reduces output of each serial number with cluster,
to N representative confs.
The default maximum number of conformers per serial filtered by
Cluster is 5. The default number steps used in the Monte Carlo
Multiple Minima search is 5000. Returns the array of representative
file names.
Depends on macromodel.utils.CluUtil() class for clustering. Depends
on macromodel.utils.ComUtil() for writing and running com files.
Depends on macromodel.tools.serial_split() for grouping output.
"""
# get instances
mcu = mu.ComUtil(**kwargs)
mxu = mu.CluUtil()
# set object values
mcu.serial = True # mandatory serial
mcu.MCMM[1] = steps
mcu.MCOP[1] = 100
# define lists for intermediate files
serial_files = [] # name of files from serial split
mult_files = [] # name of files mult mini
qp_files = [] # name of files clustered (used to be sent to QikProp)
cleanup_files = [] # all the intermediate file names for removal
# Perform serial AUTO search
print("get_rep_confs(): starting serial conformation search...")
search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file)))
search_job.wait(mcu.interval)
search_out_mae_file = search_job.StructureOutputFile
cleanup_files.append(search_out_mae_file)
# Filter CTs in file, write groups of like serial numbers.
print(
"get_rep_confs(): separating conformation search output by serial number..."
)
serial_files = mt.serial_split(search_out_mae_file)
# Perform MultMini
print("get_rep_confs(): starting sequence of multiple minimizations...")
for file in serial_files:
mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file)))
mult_job.wait(mcu.interval)
mult_files.append(mult_job.StructureOutputFile)
# Cluster to reduce output to representative sample
print(
"get_rep_confs(): starting sequence of cluster conformation reductions..."
)
for file in mult_files:
# figure out cluster thresh level that generates the desired max confs
# thresh level = N - ( M + 1)
# N = total number of confs
# M = desired max number of confs
# generate the name
out_file = file
mae_re = re.compile(r'-out\.mae$')
if mae_re.search(out_file):
out_file = mae_re.sub('-clust_rep.mae', out_file, 1)
else:
raise Exception(
"get_rep_confs() can't determine file name pattern. ")
# determine if user's max is possible if not just copy the file
count = mt.count(file)
# user input desired max
thresh = count - max_confs + 1
if count > max_confs:
# generate command file args
mxu.arms[0] = 'heavy' # atoms for RMS after superim
mxu.writerep = " ".join([repr(thresh), out_file, "all"])
# run cluster
mxu.doCluster(file)
else:
os.rename(file, out_file)
# add file to list for cleanup
qp_files.append(out_file)
# Clean up
print("get_rep_confs(): cleaning up intermediate files...")
for file in mult_files:
cleanup_files.append(file)
for file in serial_files:
cleanup_files.append(file)
for file in cleanup_files:
if os.path.exists(file):
os.unlink(file)
print("get_rep_confs(): finished. ")
return qp_files
[docs]def get_lead_confs(mae_file="", max_confs=5, steps=5000, **kwargs):
"""
This function combines a series of tasks to automate the processing of
non-conformers in to a smaller representative collection of conformers.
Returns a list of the file names that contain leading (lowest energy)
cluster confs, grouped by input serial number.
This method accepts the name of a mae file containing a set of
non-conformers, optionally the max number of leading energy confs,
optionally the number of search steps, optional kwargs for the
ComUtil object, and then:
a) performs a serial conformation search of non-conformers
b) groups output of conformational search by serial
number into multiple files
c) performs multiple minimization of each serial number
d) reduces output of each serial number with cluster,
to N leading energy confs.
The default maximum number of conformers per serial filtered by
Cluster is 5. The default number steps used in the Monte Carlo
Multiple Minima search is 5000. Returns the array of leading energy
conf file names.
Depends on macromodel.utils.CluUtil() class for clustering. Depends
on macromodel.utils.ComUtil() for writing and running com files.
Depends on macromodel.tools.serial_split() for grouping output.
"""
# get instances
mcu = mu.ComUtil(**kwargs)
mxu = mu.CluUtil()
# set object values
mcu.wait = True # mandatory wait, jc_wait optionally defined in object
mcu.serial = True # mandatory serial
mcu.MCMM[1] = steps
mcu.MCOP[1] = 100
# define lists for intermediate files
serial_files = [] # name of files from serial split
mult_files = [] # name of files mult mini
qp_files = [] # name of files clustered (used to be sent to QikProp)
cleanup_files = [] # all the intermediate file names for removal
# Perform serial AUTO search
print("get_lead_confs(): starting serial conformation search...")
search_job = jc.launch_job(mcu.getLaunchCommand(mcu.mcmm(mae_file)))
search_job.wait(mcu.interval)
search_out_mae_file = search_job.StructureOutputFile
cleanup_files.append(search_out_mae_file)
# Filter CTs in file, write groups of like serial numbers.
print(
"get_lead_confs(): separating conformation search output by serial number..."
)
serial_files = mt.serial_split(search_out_mae_file)
# Perform MultMini
print("get_lead_confs(): starting sequence of multiple minimizations...")
for file in serial_files:
mult_job = jc.launch_job(mcu.getLaunchCommand(mcu.mult(file)))
mult_job.wait(mcu.interval)
mult_files.append(mult_job.StructureOutputFile)
# Cluster to reduce output to leading energy sample
print(
"get_lead_confs(): starting sequence of cluster conformation reductions..."
)
for file in mult_files:
# figure out cluster thresh level that generates the desired max confs
# thresh level = N - M + 1
# N = total number of confs
# M = desired max number of confs
# generate the name
out_file = file
mae_re = re.compile(r'-out\.mae$')
if mae_re.search(out_file):
out_file = mae_re.sub('-clust_lead.mae', out_file, 1)
else:
raise Exception(
"get_lead_confs() can't determine file name pattern. ")
# determine if user's max is possible if not just copy the file
count = mt.count(file)
# user input desired max
thresh = count - max_confs + 1
if count > max_confs:
# generate command file args
mxu.arms[0] = 'heavy' # atoms for RMS after superim
mxu.writelead = " ".join([repr(thresh), out_file, "all"])
# run cluster
mxu.doCluster(file)
else:
os.rename(file, out_file)
# add file to list for cleanup
qp_files.append(out_file)
# Clean up
print("get_lead_confs(): cleaning up intermediate files...")
for file in mult_files:
cleanup_files.append(file)
for file in serial_files:
cleanup_files.append(file)
for file in cleanup_files:
if os.path.exists(file):
os.unlink(file)
print("get_lead_confs(): finished. ")
return qp_files
[docs]def get_energy(structure=structure.Structure,
ffld='OPLS_2005',
solvent=False,
cleanup=True,
units=""):
"""
Returns a dictionary of MacroModel's molecular mechanics terms
generated from a single point energy calculation.
The dictionary may be restricted to a collection of values with
a particular unit, kJ/mol or kcal/mol. By default, the function
cleans up all intermediate files created by the job.
The 'structure' argument must be a schrodinger.structure.Structure
object.
"""
# Returned dict
mmod_energy = {}
mmod_energy['title'] = structure.title
# Mmod energy terms
terms = "|".join([
'Total energy', 'Stretch', 'Bend', 'Proper torsion', 'Out-of-plane',
'Van der Waals', 'Electrostatic', 'Explcit Hydrogen Bonds',
'All solvation', 'Solvation Term 1', 'Solvation Term 2'
])
# Compile the re for snarfing energy terms
energy_re = re.compile(r'(%s):\s+(\S+)\s+kJ\/mol\s+\(\s+(\S+) kcal' % terms)
# JobIO file prep and clean up
base_name = 'mmod_energy_calc'
mae_file_name = ''.join([base_name, '.mae'])
mmo_file_name = ''.join([base_name, '-out.mmo'])
out_file_name = ''.join([base_name, '-out.mae'])
com_file_name = ''.join([base_name, '.com'])
cleanup_files = [mae_file_name, mmo_file_name, out_file_name, com_file_name]
# Remove any previous debris
for file in cleanup_files:
if cleanup and os.path.exists(file):
os.remove(file)
# Get ComUtil customized with ffld but no jobcontrol, we will capture stdout
mcu = mu.ComUtil(ffld=ffld, solv=solvent)
mcu.no_redirect = True
mcu.ELST[1] = 0 # terse reporting of terms
mcu.ELST[2] = 1 # report in kcal/mol units
# Write out our structure file, clobber if needed
structure.write(mae_file_name)
# Write the comfile, get the command to invoke the job
cmd = mcu.getLaunchCommand(mcu.elst(mae_file_name))
cmd[0] = os.path.join(os.getenv('SCHRODINGER'), cmd[0])
# Run job, capture stdout
child_stdin, child_stdout, child_stderr = os.popen3(cmd)
# Parse stdout for the terms
for line in child_stdout.readlines():
print(line)
match = energy_re.search(line)
if match:
# make keys for both unit types
key_kJ_per_mol = " ".join([match.group(1), "kJ/mol"])
key_kcal_per_mol = " ".join([match.group(1), "kcal/mol"])
# but only populate with requested values
if units.startswith('kcal'):
mmod_energy[key_kcal_per_mol] = match.group(3)
elif units.startswith('kJ'):
mmod_energy[key_kJ_per_mol] = match.group(2)
else:
mmod_energy[key_kJ_per_mol] = match.group(2)
mmod_energy[key_kcal_per_mol] = match.group(3)
# Don't leave a mess
for file in cleanup_files:
if cleanup and os.path.exists(file):
os.remove(file)
child_stdin.close()
child_stdout.close()
child_stderr.close()
return mmod_energy
# EOF