Source code for schrodinger.test.log_parser

"""
Parse pytest log files to help track down parallel test failures.  All log files
must have been generated using subprocess tests (i.e. "-n <NUMBER>" or
"-n auto") and the "-v" flag.  Running this script from the command line will
output statistics about the failures as well as a recipe to reproduce a worker
process failure.

Usage: log_parser.py <log file 1> [<log file 2>...]

Note: To automatically run the Python unit tests 10 times in a row and generate
logs that are approrpiate for input into this script, you can run ::

    mkdir ../test_logs
    for i in {1..10}; do
        make unittest TEST_ARGS="-n=auto -v" 2>&1 | tee ../test_logs/${i}.txt;
    done

from $SCHRODINGER/mmshare-v*/python/test.  This will put the test logs into a
new $SCHRODINGER/mmshare-v*/python/test_logs directory.
"""

import glob
import itertools
import re
import sys


[docs]def parse_log(logfile):
    """
    Parse the specified pytest log file.  The log file must have been generated
    using subprocess tests (i.e. "-n <NUMBER>" or "-n auto") and the "-v" flag.
    This method reports which tests were run on each worker process, the number
    of failed tests per worker process, and the index of the first failed test.

    :param logfile: The path of the log file to parse
    :type logfile: str

    :rtype: tuple(list(list(str)), list(int or None), int)

    Return value is a tuple of:

    - A list of list of tests run on each worker process, formatted as [list
      of tests run on worker process 0, list of tests run on worker
      process 1, ...]
    - A list of the first failed test index on each worker process, or None
      if no tests failed on that worker.
    - The number of worker processes with failed tests.

    """
    per_process = [[]]
    first_fail = [None]
    num_failed_procs = 0
    with open(logfile) as log:
        for curline in log:
            match = re.match(
                r"""
                \[gw(\d+)\]\s*             # process number
                \[\s*\d*%\]\s              # percent complete
                (\w*)\s*                   # passed/failed/error
                ([\w/\\:. -]*)\s*          # test name
                (?:<-\s*[\w/\\. -]*\s*)?$  # arrow for inherited tests (optional)
                """, curline, re.VERBOSE)
            if match:
                proc_num, status, test_name = match.groups()
                test_name = test_name.strip()
                proc_num = int(proc_num)
                if proc_num >= len(per_process):
                    procs_to_add = proc_num - len(per_process) + 1
                    per_process.extend([] for _ in range(procs_to_add))
                    first_fail.extend([None] * procs_to_add)
                if (status in ("FAILED", "ERROR") and
                        first_fail[proc_num] is None):
                    first_fail[proc_num] = len(per_process[proc_num])
                    num_failed_procs += 1
                per_process[proc_num].append(test_name)
    return per_process, first_fail, num_failed_procs


[docs]def parse_logs(logfiles):
    """
    Parse multiple pytest log files.  All log files must have been generated
    using subprocess tests (i.e. "-n <NUMBER>" or "-n auto") and the "-v" flag.

    :param logfiles: A list of log file paths
    :type logfiles: list(str)

    :rtype: tuple(int, int, int, int, int, list(str) or None)

    The return value is a tuple of

    - The total number of pytest runs processed
    - The number of failed runs
    - The total number of worker processes over all runs
    - The total number of worker processes with a test failure over all runs
    - The maximum number of worker processes with a test failure in a single
      test run
    - All tests run on a single worker process up to and including the first
      failed test.  If multiple worker processes contained a failed test,
      then the shortest recipe is provided.  If no worker processes
      contained a failed test, will be None.

    """
    total_runs = len(logfiles)
    num_failed_runs = 0
    total_num_procs = 0
    total_num_failed_procs = 0
    max_num_failed_procs = 0
    shortest_recipe = None
    recipe_length = float("inf")
    for cur_file in logfiles:
        per_process, first_fail, num_failed_procs = parse_log(cur_file)
        total_num_procs += len(first_fail)
        total_num_failed_procs += num_failed_procs
        max_num_failed_procs = max(max_num_failed_procs, num_failed_procs)
        if num_failed_procs:
            num_failed_runs += 1
            fastest_fail_index = min(
                range(len(first_fail)),
                key=lambda i: first_fail[i]
                if first_fail[i] is not None else float("inf"))
            fastest_fail_length = first_fail[fastest_fail_index]
            if fastest_fail_length < recipe_length:
                recipe_length = fastest_fail_length
                shortest_recipe = \
                    per_process[fastest_fail_index][:fastest_fail_length + 1]
    return (total_runs, num_failed_runs, total_num_procs,
            total_num_failed_procs, max_num_failed_procs, shortest_recipe)


[docs]def main():
    """
    Parse all log files provided on the command line.  Print out statistics
    about the failures and a recipe to reproduce the shortest failure.
    """
    # bash will glob the paths for us but cmd on Windows won't, so we glob them
    # here just in case
    files = list(
        itertools.chain.from_iterable(glob.glob(path) for path in sys.argv[1:]))
    if not files:
        print("No logs found", file=sys.stderr)
        sys.exit(1)
    (total_runs, num_failed_runs, total_num_procs, total_num_failed_procs,
     max_num_failed_procs, shortest_recipe) = parse_logs(files)
    print(f"{num_failed_runs} of {total_runs} runs failed")
    if num_failed_runs == 0:
        return
    print(f"{total_num_failed_procs} of {total_num_procs} worker processes "
          "contained a failure")
    print(f"At most, {max_num_failed_procs} worker processes failed per run")
    print("")
    print(f"Fastest failure required {len(shortest_recipe)} tests:")
    print("\n".join(shortest_recipe))
    print("")
    print("To run these tests, run:")
    print(f'make unittest TEST_ARGS="-v {" ".join(shortest_recipe)}"')


if __name__ == "__main__":
    main()