Source code for schrodinger.test.stu.joberrors
import os
import pathlib
import zipfile
import sys
from schrodinger.infra.mmjob import mmjob_is_job_server_job
from schrodinger.job import jobcontrol
from schrodinger.job import queue
from schrodinger.job import remote_command
from schrodinger.utils import mmutil
from schrodinger.utils import subprocess
from . import common
logger = common.logger
WINDOWS_LONG_PATH_PREFIX = "\\\\?\\"
[docs]def run_postmortem(job, product_name):
    """
    This is any queue.BaseJob. If it has a jobid, run postmortem for that
    particular job. Otherwise, run postmortem for the job database only.
    """
    job.infoStatus('running postmortem')
    _, jobid, _ = job.getStatusStrings()
    if jobid and jobid.strip() == "[none]":
        jobid = None
    job_directory = job.getCommandDir()
    args = []
    if not job_directory:
        return
    if jobid:
        args.append(jobid)
    elif mmutil.feature_flag_is_enabled(mmutil.JOB_SERVER):
        return
    else:
        args.append("-jobdbonly")
    postmortem_log_path = pathlib.Path(job_directory).joinpath('postmortem.log')
    with open(postmortem_log_path, 'w') as logf:
        if mmjob_is_job_server_job(jobid):
            command = [
                "jsc",
                "postmortem",
                "--with-subjobs",
                "--without-redaction",
            ] + args
        else:
            command = ["postmortem"] + args
        proc = subprocess.run(command,
                              cwd=job_directory,
                              stderr=subprocess.STDOUT,
                              stdout=logf)
        if proc.returncode:
            msg = f'postmortem failed with return code: {proc.returncode}'
            logf.write(msg + '\n')
            msg += f' while investigating {job}'
            logger.warning(msg)
    if os.path.isfile(postmortem_log_path) and jobid:
        unzip_postmortem(postmortem_log_path)
    collect_queue_stats(job) 
[docs]def unzip_postmortem(postmortem_log_path: str):
    """
    Find postmortem file name from the log file, then extract
    :param postmortem_log_path: output of postmortem command
    :param extraction_dir: If None, extract to current working directory
    """
    # Absolute path to postmortem zip should be at the end of the log. Take
    # the last line from the log containing a .zip file
    postmortem_zip_path = None
    with open(postmortem_log_path) as f:
        for line in f:
            if '.zip' in line:
                postmortem_zip_path = pathlib.Path(
                    line.strip().strip('"').split(': ')[-1])
    if not postmortem_zip_path:
        logger.warning(
            f'Postmortem file not found. See log at {postmortem_log_path}')
        return
    if not postmortem_zip_path.is_absolute():
        postmortem_zip_path = postmortem_log_path.parent.joinpath(
            postmortem_zip_path)
    postmortem_extraction_dir = postmortem_zip_path.parent
    if sys.platform == "win32":
        postmortem_extraction_dir = WINDOWS_LONG_PATH_PREFIX + str(
            postmortem_extraction_dir)
    with zipfile.ZipFile(postmortem_zip_path) as zf:
        zf.extractall(postmortem_extraction_dir)
    # Remove zip archive and file manifest
    os.remove(postmortem_zip_path)
    postmortem_files = postmortem_zip_path.with_suffix('.files')
    if postmortem_files.exists():
        os.remove(postmortem_files) 
[docs]def collect_queue_stats(job):
    """
    Invoke remote queue commands to collect queue stats to debug
    a killed queue job.
    :type job: schrodinger.job.queue.JobControlJob
    :param job:
        Instance of schrodinger.job.queue.JobControlJob to collect
        the stats from queue job has run.
    """
    if not is_killed_queue_job(job):
        return
    jobObj = job.getJob()
    job_directory = job.getCommandDir()
    cmd = "bash --login -c '{}'"
    submission_hostname = jobcontrol.get_host(jobObj.HostEntry).host
    submission_username = jobcontrol.get_host(jobObj.HostEntry).user
    rcmd = remote_command._rsh_cmd(submission_hostname,
                                   remoteuser=submission_username)
    with open(os.path.join(job_directory, 'qstat.log'), 'w') as f:
        bash_command = cmd.format('date && echo Running Cmd: qstat && qstat')
        proc = subprocess.run(rcmd + [bash_command], stdout=f)
        if proc.returncode != 0:
            logger.warning(f"{bash_command} exited abnormally")
    with open(os.path.join(job_directory, 'clusutil.log'), 'w') as f:
        clusutil_cmd = 'perl /nfs/working/sysmgr/sysmgr-repo/scripts/clustutil.pl -u -a'
        bash_command = cmd.format('date && echo Running Cmd: {} && {}'.format(
            clusutil_cmd, clusutil_cmd))
        proc = subprocess.run(rcmd + [bash_command], stdout=f)
        if proc.returncode != 0:
            logger.warning(f"{bash_command} exited abnormally") 
[docs]def is_killed_queue_job(job):
    """
    Return True if the job was killed on the queueing system.
    :type job: schrodinger.job.queue.JobControlJob
    :param job: Instance to collect the stats from queue job has run.
    """
    # subprocess jobs don't submit to the queu
    if not isinstance(job, queue.JobControlJob):
        return False
    # job failed to launch
    jobObj = job.getJob()
    if not jobObj:
        return False
    # not a queued job
    if not jobObj.isQueued():
        return False
    return job.canceled_by_timeout