"""
Deprecated for schrodinger.job.launchapi.
A base class for applications that can be run under job control.
Copyright Schrodinger, LLC. All rights reserved.
"""
#Contributors: Matvey Adzhigirey, Jeff Saunders
###############################################################################
import optparse
import os
import pickle
import sys
import tarfile
import warnings
import schrodinger.utils.log as log
from schrodinger.job import jobcontrol
from schrodinger.job import launcher
_version = "$Revision: 1.37 $"
logger = log.get_logger("schrodinger.job.app")
try: # to see if we are running inside of Maestro:
from schrodinger.maestro import maestro
except ImportError:
maestro = None
def _remove(file):
""" Removes a file if it exists. """
if os.path.exists(file):
os.remove(file)
[docs]class App:
[docs] def __init__(self, script, args=[]): # noqa: M511
msg = "schrodinger.job.app has been superseded by the use of "
msg += "schrodinger.job.launchapi"
warnings.warn(msg, DeprecationWarning, stacklevel=2)
# If the script is a Python script, we must use the .py version, not
# the .pyc version, because $SCHRODINGER/run doesn't recognize the
# latter and can't set the interpreter properly.
if script.endswith(".pyc"):
self._app_script = script[:-1]
else:
self._app_script = script
## Initialize jobname to the name of the script:
#self._app_jobname = os.path.splitext(os.path.basename(script))[0]
# Initialize the jobname to None so that we can tell if -APPJOBNAME was
# passed to the script when launching the backend.
self._app_jobname = None
self._app_progname = "App"
# Initialize input/output file lists
self._app_input_files = []
self._app_temp_input_files = {}
self._app_output_files = []
self._app_struct_output_file = None
self._app_log_files = []
# By default, use a scratch directory
self._app_always_local = False
self._localdriver = False
# Check for GUI mode. Ignore all arguments except '-gui'.
# It is the responsibility of the user's 'gui()' method to call
# launchBackend() to start the job.
self._run_gui = False
if '-gui' in args:
self._run_gui = True
return
# Parse the arguments that App will handle. While some of these
# arguments (e.g., -LOCAL) aren't needed until the _startup() method,
# we must extract them at this point, because we don't know if the call
# to the user's commandLine() method will preserve them.
self._args, self._restart, self._scratch_file, self._local, self._wait, self._run_under_jobcontrol = self._extractArguments(
args)
[docs] def run(self):
"""
If '-gui' passed to the constructor, runs the GUI OR...
Reads the restart file (-RESTART, existing job) or command-line
arguments (default, new job) and then runs in startup mode (default)
or backend mode (-NOJOBID).
Raises a SyntaxError if the commandLine() method returns no arguments.
"""
# Check for GUI mode. Ignore all arguments except '-gui'.
# It is the responsibility of the user's 'gui()' method to call
# launchBackend() to start the job.
if self._run_gui:
self.gui()
return
# Restarting can happen in two situations: 1) It is used when launching
# the backend without other CL arguments; 2) It is on the command-line
# with other arguments because the user wants to restart an existing
# job. In the first case, the jobname must be determined via a special
# flag (-APPJOBNAME), because the jobname can't be derived from user CL
# arguments. In the second case, we have to get the job name from
# the user CL arguments before finding and reading the restart file.
# Let's have the jobname default to None, so we can distinguish these
# two cases.
# Set the application instance, either from a restart file, or via the
# user's commandLine() method, which sets the current instance
# attributes from the argument list.
if self._app_jobname is None:
# The job name has not been set directly via -APPJOBNAME during
# backend launch, so we must parse the application-specific
# command-line args. This is true even if the user is restarting,
# because the name of the .restart file must be determined from
# the jobname (which is set in 'commandLine').
#
# User method commandLine() should set all required class attributes
# from the command line arguments, including the jobname.
# Note: Should we require commandLine to return None. The args
# list shouldn't be necessary; in fact, all relevant info
# for the backend should be instance data, not command-line
# args.
self._args = self.commandLine(self._args)
if self._args is None:
raise SyntaxError(
"commandLine() method should return an argument list!")
# At this point, we know the jobname, either from -APPJOBNAME or the
# 'commandLine' method.
restart_filename = self._app_jobname + ".restart"
if self._restart:
app_instance = self._readDumpFile(restart_filename)
else:
app_instance = self
# Check if a restart file already exists, in which case the user
# needs to -RESTART or -OVERWRITE explicitly.
restart_filename = self._app_jobname + ".restart"
if os.path.isfile(restart_filename):
if self._overwrite:
_remove(restart_filename)
else:
print("")
print("Restart file '%s' already exists." %
restart_filename)
print("Run with '-OVERWRITE' to start new job.")
print("Run with '-RESTART' to resume existing job.")
print("")
sys.exit(1)
# Remove existing scratchdir file when starting a new job.
# FIXME: This is NOT the extension of the file generated by jobcontrol:
_remove(self._app_jobname + '.scratchdir.tar.gz')
# Call the _startup() method to launch the job (under JC or not)
app_instance._startup(self._args, self._scratch_file, self._local,
self._wait, self._run_under_jobcontrol)
########### Overwrite the following methods with your own code: ###########
[docs] def commandLine(self, args):
"""
Overwrite this method with code that parses the arguments and
initializes the instance variables to appropriate values.
This method should also set the job name and input/output files for
the scriptlauncher via dedicated App methods. The method should
return a revised list of arguments (e.g., removing arguments that
won't be needed when running the backend on the restart file). Output
files also can be registered in the backend() method.
"""
return args
[docs] def gui(self):
"""
Overwrite this method with your GUI code. When a job is ready to be
submited, call 'self.launchBackend()' with appropriate arguments.
'launchBackend' then will run the rerun the script in non-GUI mode, so
the argument list should be like a command-line invocation.
Ex: self.launchBackend(['-i', input_file, '-j', 'testjob',
'-HOST', hostname, '-USER', myname])
"""
print(" No GUI available.")
sys.exit()
[docs] def backend(self):
"""
Overwrite this function with your backend (main) code. This method
also may set output files via dedicated App methods for jobs that
are running under Job Control. To enable restartability at
intermediate stages of the job, record the state of the job in an
instance variable, have the method base its actions on the current
state, and periodically save the instance via the dumpBE() method.
"""
print(" No backend available.")
sys.exit()
################### PUBLIC METHODS: ###########################
[docs] def dumpFE(self, relpath="."):
"""
Dumps the App instance to file named '<jobname>.restart'. This
version is called on the front end to create the initial restart
file during job submission. This is the file from which a user
would resume an interrupted job via a command like...
<myapp>.py -RESTART
When the user runs the application with '-RESTART' option, the App
instance is recovered from the dump file instead of initializing the
instance via the commandLine() method.
If the job was submitted with -LOCAL, then the restart file will be in
the launch directory, otherwise restart file will be in the
<jobname>.zip archive that is created by jobcontrol.
A 'relpath' optional argument is available, in case the restart file
needs to be written somewhere other than the CWD (e.g., in case the
backend is running in a workdir).
"""
restart_file = os.path.join(relpath, self._app_jobname + '.restart')
filehandle = open(restart_file, "wb")
pickle.dump(self, filehandle)
filehandle.close()
self.addOutputFileFE(restart_file)
return restart_file
[docs] def dumpBE(self, relpath="."):
"""
Dumps the App instance to file named '<jobname>.restart'. This
function is to be called by backend() at restartable places in the
code. This is the file from which a user would resume an interrupted
job via a command like...
<myapp>.py -RESTART
When the user runs the application with '-RESTART' option, the App
instance is recovered from the dump file instead of initializing the
instance via the commandLine() method.
If the job was submitted with -LOCAL, then the restart file will be in
the launch directory, otherwise restart file will be in the
<jobname>.zip archive that is created by jobcontrol.
A 'relpath' optional argument is available, in case the restart file
needs to be written somewhere other than the CWD (e.g., in case the
backend is running in a workdir).
"""
restart_file = os.path.join(relpath, self._app_jobname + '.restart')
filehandle = open(restart_file, "wb")
pickle.dump(self, filehandle)
filehandle.close()
self.addOutputFileBE(restart_file)
return restart_file
[docs] def setJobName(self, jobname):
""" Call this method from commandLine() to set the jobname. """
self._app_jobname = jobname
[docs] def getJobName(self):
""" Returns the jobname of the App. """
return self._app_jobname
[docs] def setProgramName(self, progname):
"""
Call this method from commandLine() to set the program name.
"""
self._app_progname = progname
[docs] def getProgramName(self):
"""
Returns the program name of the App.
"""
return self.app_progname
""" FIXME: jobcontrol should now support this:
def getJob(self):
backend = jobcontrol.get_backend()
if backend:
return backend.getJob()
else:
return None
"""
[docs] def getHostStr(self):
"""
What user specified as -HOST when running the script. Use in
backend() when running jobs with JobDJ. Should be in format...
"host1:ncpu1 host2:ncpu2"
NOTE: NOT COMPLETE YET. ALSO, THIS INFORMATION IS AVAILABLE TO JOBDJ
VIA THE SCHRODINGER_NODEFILE.
"""
return 'localhost'
[docs] def alwaysLocal(self):
"""
Call this method from the commandLine() method to make the backend
always run in the local (i.e., launch) directory instead of in a
scratch directory. This enforces -LOCAL without the user having to
use that command-line option. Running in the local directory removes
the need to use addInputFile(), addOutputFile*(), etc., but it imposes
a requirement that the local directory be directly accessible to any
machine where the backend will run, which may or may not be true for
remote jobs. Running locally also can facilitate restarting if the
backend is a driver for other subjobs.
"""
self._app_always_local = True
[docs] def useLocalDriver(self):
"""
The backend will be run on the local host, regardless of the
-HOST setting. This is so the -HOST option can be used to set
the subjob host info, while letting the driver/backend run locally.
"""
self._localdriver = True
############################################################################
# For scripts that run under job control, it is important for job control #
# to know what files to copy to the scratch directory and which files to #
# copy back to the launch directory when the job has completed. #
# Use the following methods to supply those files. If these methods are #
# not used in your script, then your script will be set to not use the #
# the scratch directory and will be run locally (eq. to using -LOCAL). #
############################################################################
[docs] def addOutputFileFE(self, file):
"""
Call from commandLine() to specify output files.
"""
self._app_output_files.append(file)
[docs] def addOutputFileBE(self, file):
"""
Call from backend() to specify output files.
"""
# It isn't safe to call 'get_backend' in startup mode. See Ev:107590
# and Ev:108606.
backend = jobcontrol.get_backend()
if backend:
backend.addOutputFile(file)
# Do not do anything if we are not running under job control
#else:
# raise RuntimeError("addOutputFileBE must be called from the backend (i.e., running under JobControl")
[docs] def setStructureOutputFileFE(self, file):
"""
Call from commandLine() to specify the structure output file (i.e.,
the file that gets incorporated into Maestro upon job completion).
"""
self._app_struct_output_file = file
[docs] def setStructureOutputFileBE(self, file):
"""
Call from backend() to specify the structure output file (i.e.,
the file that gets incorporated into Maestro upon job completion).
"""
# It isn't safe to call 'get_backend' in startup mode. See Ev:107590
# and Ev:108606.
backend = jobcontrol.get_backend()
if backend:
backend.setStructureOutputFile(file)
else:
raise RuntimeError(
"setStructureOutputFileBE must be called from the backend (i.e., running under JobControl"
)
[docs] def addLogFile(self, file):
"""
Call from commandLine() to specify additional log files. The main
log file (i.e., the stdout/stderr of the backend() is registered
automatically.
"""
self._app_log_files.append(file)
[docs] def log(self, *args):
"""
Use this method to print from within backend().
"""
for a in args:
print(a, end=' ')
print("")
sys.stdout.flush()
[docs] def lognotret(self, *args):
"""
Use this method to print from within backend(). Unlike log(), it does
not print a carriage return at end of logged text.
"""
for a in args:
print(a, end=' ')
sys.stdout.flush()
[docs] def launchBackend(self, args=[]): # noqa: M511
"""
Call this method from the gui() method to run the job. Returns the
Job object of the launched job. In order to set up the appropriate
environment variables, a top-level script must be called. Therefore,
launchBackend() will invoke the script in command-line mode, with all
arguments passed to it. The 'args' should be a list of command-line
arguments (e.g., ['-i', input_file, '-HOST', hostname]).
:rtype: jobcontrol.Job object
:return: The job object for the launched backend job.
:raise RuntimeError: If there is a problem launching the job (e.g., no
JobId gets printed). If running within Maestro, an error dialog will
first be shown to the user.
"""
if 'SCHRODINGER' not in os.environ:
sys.exit('Error: SCHRODINGER not defined')
# The new $SCHRODINGER/run script will set the environment variables:
cmd = ["run", self._app_script] + args
print_output = False if maestro else True
job = jobcontrol.launch_job(cmd, print_output)
if maestro:
maestro.job_started(job.job_id)
maestro.command("showpanel monitor")
return job
[docs] def addCommandLineOptions(self, parser, distributed=False, use_group=False):
"""
Add top-level and App options to the OptionParser 'parser'. With this
method, the application-specific parser can include top-level and App
options in its usage/help statement, though the actual parsing of
those arguments is handled elsewhere. The 'distributed' boolean option
(default is False) alters the option descriptions slightly, making
references to the job as the "driver job". If 'use_group' is True
(default is False), the options will be added as an OptionGroup.
"""
if distributed:
dtext = "driver "
else:
dtext = ""
if use_group:
group = optparse.OptionGroup(parser, "Common App options")
group.add_option(
"-RESTART",
action="store_true",
help=
"Resume an interrupted job <jobname> using the restart file.")
group.add_option(
"-OVERWRITE",
action="store_true",
dest="overwrite",
help="Delete any existing job files and start new job.")
# Not sure if the --scratch option works
#group.add_option("--scratch",metavar="<jobname>.restart",help="Resume an interrupted job <jobname> using the scratch directory archive.")
group.add_option("-NOJOBID",
action="store_true",
help="Run %sjob without Job Control." % dtext)
group.add_option(
"-LOCAL",
action="store_true",
help="Don't use a scratch directory for the %sjob." % dtext)
group.add_option("-WAIT",
action="store_true",
help="Do not return until the job completes.")
parser.add_option_group(group)
else:
parser.add_option(
"-RESTART",
action="store_true",
help=
"Resume an interrupted job <jobname> using the restart file.")
parser.add_option(
"-OVERWRITE",
action="store_true",
dest="overwrite",
help="Delete any existing job files and start new job.")
# Not sure if the --scratch option works
#parser.add_option("--scratch",metavar="<jobname>.restart",help="Resume an interrupted job <jobname> using the scratch directory archive.")
parser.add_option("-NOJOBID",
action="store_true",
help="Run %sjob without Job Control." % dtext)
parser.add_option(
"-LOCAL",
action="store_true",
help="Don't use a scratch directory for the %sjob." % dtext)
parser.add_option("-WAIT",
action="store_true",
help="Do not return until the job completes.")
################ PRIVATE METHODS: ############################
def _extractArguments(self, args):
"""
Extract the App-specific command-line arguments. These control
whether the job will bypass Job Control ('-NOJOBID), whether a
restart ('-RESTART) or scratch ('--scratch <file>') will be
used, whether the job will run in the local directory ('-LOCAL'), and
whether the scratch directory, if used, should be cleaned up
('-SAVE'). -NOJOBID, -LOCAL, and -SAVE are JC-like options that
must be handled by startup scripts (i.e., here) instead of the
standard top-level script. The -LOCAL and -SAVE arguments are kept in
the argument list in case the application wants to use that
information (e.g., to propagate -LOCAL to subjobs). Returns 1) list
of unused and retained args, 2) name of the restart file, 3) name of
the scratch file, 4) whether -LOCAL is used, 5) whether -SAVE is used,
and 6) whether -NOJOBID is used.
"""
newargs = []
restart = False
self._overwrite = False
scratch_file = None
local = False
wait = False
run_under_jobcontrol = True
needjobnameargument = False
needscratchargument = False
for a in args:
if needjobnameargument:
needjobnameargument = False
self.setJobName(a)
elif needscratchargument:
needscratchargument = False
scratch_file = a
elif a == '-RESTART':
restart = True
elif a == '-OVERWRITE':
self._overwrite = True
elif a == '-APPJOBNAME':
needjobnameargument = True
elif a == '--scratch':
needscratchargument = True
elif a == '-NOJOBID':
run_under_jobcontrol = False
elif a == '-LOCAL':
local = True
elif a == '-WAIT':
wait = True
else:
newargs.append(a)
if needjobnameargument:
sys.exit("Error: Job name required")
if needscratchargument:
sys.exit("Error: Scratch file name required")
return newargs, restart, scratch_file, local, wait, run_under_jobcontrol
def _printJobInfo(self):
"""
This method is called right before calling backend().
It prints job information to the terminal/log file.
"""
# Get Job info, if running under Job Control
# Returns None if not running under jobcontrol -v
backend = jobcontrol.get_backend()
# Do NOT raise an exception, we still print something if not running under job control
# if not backend:
# raise RuntimeError("_printJobInfo must be called from the backend (i.e., running under JobControl")
# Print info
# Note: I have seen occasions where StartTime doesn't exist
# yet in the Job Record when this method is called, perhaps
# due to delays in updating the Jobs Database for remote jobs?
print("========================================")
print("Job Info...")
print("")
if backend:
job = backend.getJob()
print_fields = [
'Name', 'JobId', 'HostEntry', 'Host', 'JobHost', 'Dir',
'JobDir', 'LaunchTime', 'StartTime'
]
for field in print_fields:
if hasattr(job, field):
print("{:<15}: {}".format(field, getattr(job, field)))
else:
print("Not running under job control")
print("")
print("Name: ", self._app_jobname)
print("========================================")
def _readDumpFile(self, file):
"""
Returns an instance of App loaded from the supplied dump file. Raises
a RuntimeError if the specified file doesn't exist or if there is a
problem reading it.
"""
if not os.path.exists(file):
print("file does not exist:" + file)
sys.stdout.flush()
raise RuntimeError("file does not exist:" + file)
try:
filehandle = open(file, 'rb')
app_instance = pickle.load(filehandle)
filehandle.close()
except Exception as err:
msg = "Could not unpickle the restart file: {}\nError: {}".format(
file, err)
raise RuntimeError(msg)
return app_instance
def _startup(self, args, scratch_file, local, wait, run_under_jobcontrol):
"""
Run the job. Submits the script under Job Control to be run from the
dump/restart file. If -NOJOBID is used, backend() is called
directly (i.e., no Job Control). 'schrodinger.job.launcher' handles
setting all the jlaunch arguments and issuing the jlaunch command.
Returns the Job object if running under Job Control (or None,
otherwise).
"""
if run_under_jobcontrol:
# Set local option
if self._app_always_local:
local = True
# Some applications might register output files at backend time
# only, we we can't assume 'local' at this point.
#if not self._app_input_files and not self._app_output_files:
# local = True
# Launch the job under Job Control via the 'Launcher'
scriptlauncher = launcher.Launcher(
script=self._app_script,
copyscript=True,
jobname=self._app_jobname,
prog=self._app_progname,
localdriver=self._localdriver,
local=local,
wait=wait,
#nice=NICE,
#no_redirect=NOJOBID
)
# Set up the input and output files for the job
if not scratch_file:
# No need to copy the input files if a scratch file is
# being used; those files are included in the scratch archive.
for f in self._app_input_files:
scriptlauncher.addInputFile(f)
for f in self._app_output_files:
scriptlauncher.addOutputFile(f)
if self._app_struct_output_file:
scriptlauncher.setStructureOutputFile(
self._app_struct_output_file)
for f in self._app_log_files:
scriptlauncher.addLogFile(f)
# Dump the App instance to a restart file:
"""
#
# Note: We have to do a trick here...Temporarily change the
# __class__.__module__ attribute of the pickled object so
# that the module name isn't encoded in the pickle file.
# Without this, a Maestro-generated job won't be runnable
# when the script file is copied to the scratch directory
# with a temporary name. I think that the name has to be
# changed to __main__ because the script will be started from
# the command line in driver/backend mode.
old_module = self.__class__.__module__
self.__class__.__module__ = "__main__"
# Use another name so that the *.restart file, if present, is not
# deleted (JobC moves temporary input files to the Jobs Database).
# The file <jobname>.restart is created by the dump() method, called
# by the user from backend(), not from the job launch.
dump_file = self._app_jobname+".tmp.dump"
filehandle = open( dump_file, "w" )
cPickle.dump( self, filehandle )
filehandle.close()
# Reset to the backed-up module name
self.__class__.__module__ = old_module
"""
restart_file = self.dumpFE()
scriptlauncher.addInputFile(restart_file)
# -NOJOBID tells the script to run the _startup() method
# on the compute machine such that backend() is called directly.
# Run it from the restart file created above ('-RESTART'). Since
# the new instance won't know the jobname until the restart file
# is read, but the jobname is needed to identify the restart file,
# we have to pass the jobname to the backend.
scriptlauncher.addScriptArgs(
['-NOJOBID', '-RESTART', '-APPJOBNAME', self._app_jobname])
# If using a scratch file, register it with Job Control and run
# the script on the compute machine with '--scratch' so it is
# extracted for use.
if scratch_file:
scriptlauncher.addInputFile(scratch_file)
scriptlauncher.addScriptArgs(["--scratch", scratch_file])
# Pass remaining arguments to the backend
scriptlauncher.addScriptArgs(args)
# Launch! Return the Job object
return scriptlauncher.launch()
else:
# Run the backend directly
self._printJobInfo()
if scratch_file:
# Extract the scratch file
print("Extracting scratch_file:", scratch_file + '...', end=' ')
tar = tarfile.open(scratch_file)
for name in tar.getnames():
try:
tar.extract(name)
except:
print(" WARNING: Failed to extract:", name)
tar.close()
print("Extraction complete.")
self.backend()
# Remove scratch file upon successful completion
# FIXME: This is NOT the extension of the file generated by jobcontrol:
_remove(self._app_jobname + '.scratchdir.tar.gz')
return None
if __name__ == "__main__":
print("Module documentation:")
print(__doc__)
sys.stdout.flush()
#EOF