"""
Provides the `jaguarDiff` workup method for use with jaguar and qsite jobs.
It wraps the qsite/jaguar output class 'diff' function so that it produces
results which are meaningful to STU. Can also be used from the command line.
$Revision 0.1 $
@copyright: (c) Schrodinger, LLC. All rights reserved
"""
from math import floor
from math import log10
from past.utils import old_div
# Packages --------------------------------------------------------------------
import numpy
from schrodinger.application.jaguar.results import IncompleteOutput
from schrodinger.application.qsite.output import QSiteOutput as QSOut
from . import failures
#contributors Dan Wandschneider
_version = "$Revision 0.1 $"
#Globals ----------------------------------------------------------------------
minPrecision = 2e-6
minPrecisionPercent = 5.0 #indeed means 5%,
#see line 71 for time difference tolerance.
#see line 321 for memory difference tolerance.
#strFormat = "%-25s %20s %20s\n"
[docs]def jaguarDiff(testFile, refFile, *options):
"""
Compare every property available in the Jaguar/QSite out file against a
reference.
Usage::
# file.out - Path of the file to be compared
# reference.out - Path of the reference file
outcome_workup = jaguarDiff('file.out', 'reference.out', 'tol=1.0',
'log=workup.log', 'config=config.txt',
'value=1e-4')
Optional parameters:
* tol=value - A multipler to by which loosen ALL tolerances
* log=file - Path to an optional output log. If it is not provided, the
output is written to the console.
* config=file - A configuration file format as follows: Any text after a
'#' character is a comment, and is ignored all other text is in key/value
pairs, one per line. possible keys are: log, tol, and any value that
appears in the output file i.e. value = 1e-4 overrides the tolerance for
'value' to 1e-4
* timing=boolean - if true, jobs can 'fail' based on timing alone. Default
is false.
All other arguments are taken to be tolerance overrides for specific values
"""
tolOverrides = {}
for arg in options:
aName, aValue = arg.split("=")
optionTest(aName, aValue, tolOverrides)
if "config" in tolOverrides:
parseConfigFile(tolOverrides)
tolOverrides.pop("config")
test = QSOut(testFile)
ref = QSOut(refFile)
# Did the reference pass?
ref_passed = ref.status == QSOut.OK
# Fast fail if only the test failed.
if test.status != QSOut.OK and ref_passed:
msg = '%s failed with a FATAL ERROR' % testFile
if hasattr(test, 'fatal_errorno'):
msg += ' (%s)' % test.fatal_errorno
msg += ': ' + test.fatal_error
raise failures.WorkupFailure(msg)
#use the jaguar/qsite diff function.
diffs = test.diff(ref, factor=tolOverrides.get("tol", 1.0))
outString = []
#Additional tests:
#memory usage by subjob
diffs.extend(getMemDiffs(testFile, refFile, tolOverrides))
#atom RMSD:
try:
rmsd = getRMSD(test.getStructures()[-1], ref.getStructures()[-1])
except IncompleteOutput:
if ref_passed:
raise
else:
if rmsd > tolOverrides.get('rmsd', minPrecision * 10):
msg = "{:<25} {:>20}\n".format(' rmsd', n2s(rmsd))
outString.append(msg)
#
for d in diffs:
prop, o_value, ref_value = d
#modifies outString to hold new results!
printline(prop, o_value, ref_value, outString, tolOverrides)
#job duration.
testTime, refTime = None, None
try:
refTime = ref.getDuration().total_seconds()
except AttributeError:
if ref_passed:
raise
else:
testTime = test.getDuration().total_seconds()
# time = 0.0 is possible
try:
percentDiff = old_div(abs(testTime - refTime), refTime)
except ZeroDivisionError:
percentDiff = old_div(
tolOverrides.get('duration%', minPrecisionPercent * 7),
100) + 0.1
if (percentDiff > old_div(
tolOverrides.get('duration%', minPrecisionPercent * 7), 100) and
abs(testTime - refTime) > tolOverrides.get('duration', 60.0)):
testTimeStr = str(int(testTime)) + "s"
refTimeStr = str(int(refTime)) + "s"
if diffs:
diffs.append(("duration", testTimeStr, refTimeStr))
if outString:
testJ, refJ = getJagVersion(testFile), getJagVersion(refFile)
message = ("Values shown for " + testFile + "\nDiff = %s - %s\n" %
(testFile, refFile))
if testJ and refJ:
time_expr = ''
if testTime is not None:
time_expr = "Job duration(s): %.2f" % testTime
message += ("{} Jaguar version: {}{}\n".format(
testFile, testJ, time_expr))
if testTime is not None:
time_expr = "Job duration(s): %.2f" % refTime
message += ("{} Jaguar version: {}{}\n".format(
refFile, refJ, time_expr))
message += "\n"
outString.insert(0, message)
outString.append("\n")
logFile = None
if "log" in tolOverrides:
with open(tolOverrides["log"], "w") as logFile:
for line in outString:
logFile.write(line)
msg = " {} differences found. Printing summary to {}".format(
len(outString) - 2, tolOverrides["log"])
raise AssertionError(msg)
else:
raise AssertionError(''.join(outString))
return True
[docs]def get_tol_override(tolOverrides, prop):
"""
search for override -- allows for wildcards as *
"""
import fnmatch
override = minPrecision
# if there is an exact match, grab it
# this is required as fnmatch wont match [] as they have special meaning
if prop in tolOverrides:
override = tolOverrides[prop]
else:
for k, v in tolOverrides.items():
match = fnmatch.fnmatch(prop, k)
if match:
override = v
break
return override
[docs]def printline(prop, o_value, ref_value, logArray, tolOverrides):
"""
Compares the difference between two properties to a tolerance. If the
difference is greater than the tolerance, appends a string explaning
this to an output array. This function recurses to examine differences
within arrays and dictionaries.
:param prop: The name of the property
:param o_value: The 'output value' to be compared
:param ref_value: The reference value to be compared
:param logArray: An array of strings, each of which describes a
difference. Results are appended.
:param tolOverrides: tolerance overrides for specific property names.
:type tolOverrides: dict
"""
def append_line(s1, s2, s3, logArray):
if len(str(s2)) > 20 or len(str(s3)) > 20:
message = f"\n{s1}: \n {s2}\n {s3}\n\n"
else:
message = f"{s1:<25} {s2:>20} {s3:>20}\n"
logArray.append(message)
return
s1 = " " + prop
s2 = ""
s3 = ""
override = get_tol_override(tolOverrides, prop.strip())
try:
if override.lower() == 'ignore':
return
except AttributeError:
pass
if o_value is None and ref_value is None:
return
if o_value is None:
s2 = "is missing from new file"
append_line(s1, s2, s3, logArray)
return
if ref_value is None:
s2 = "is missing from reference"
append_line(s1, s2, s3, logArray)
return
#Strings must match exactly
if isinstance(o_value, str):
if o_value != ref_value:
s2 = o_value
s3 = "%s" % ref_value
append_line(s1, s2, s3, logArray)
return
else:
return
#Dictionaries are compared recursively
#by looking at the differences between matching elements
if hasattr(o_value, "keys"):
if not hasattr(ref_value, "keys"):
s2 = str(len(o_value)) + " values"
s3 = ref_value
append_line(s1, s2, s3, logArray)
return
else:
logArray.append(s1 + "\n")
lenNow = len(logArray)
s1 += "."
for a in set(list(o_value) + list(ref_value)):
s2 = o_value[a]
s3 = ref_value[a]
printline(s1 + a, s2, s3, logArray, tolOverrides)
if len(logArray) == lenNow:
logArray.pop()
return
#Arrays are compared recursively
#by looking at the differences between matching elements
if hasattr(o_value, "__getitem__"):
# numpy floats have __getitem__ but its not actually callable -- lame
try:
o_len = len(o_value)
ok = True
except TypeError:
ok = False
if ok:
if not hasattr(ref_value, "__getitem__"):
s2 = str(len(o_value)) + " values"
s3 = ref_value
append_line(s1, s2, s3, logArray)
return
elif len(o_value) != len(ref_value):
s2 = str(len(o_value)) + " values"
s3 = str(len(ref_value)) + " values"
append_line(s1, s2, s3, logArray)
return
else:
logArray.append(s1 + "\n")
lenNow = len(logArray)
for i in range(len(o_value)):
s1a = "%s[%d]" % (s1, i)
printline(s1a, o_value[i], ref_value[i], logArray,
tolOverrides)
if len(logArray) == lenNow:
logArray.pop()
return
#Other Iterables are compared recursively
#by looking at the differences between matching elements
if hasattr(o_value, "__iter__"):
if not hasattr(ref_value, "__iter__"):
s2 = str(len(o_value)) + " values"
s3 = ref_value
append_line(s1, s2, s3, logArray)
return
elif len(o_value) != len(ref_value):
s2 = str(len(o_value)) + " values"
s3 = str(len(ref_value)) + " values"
append_line(s1, s2, s3, logArray)
return
else:
logArray.append(s1 + "\n")
lenNow = len(logArray)
i = 0
for o_valueI, ref_valueI in zip(o_value, ref_value):
s1a = "%s[%d]" % (s1, i)
printline(s1a, o_valueI, ref_valueI, logArray, tolOverrides)
i += 1
if len(logArray) == lenNow:
logArray.pop()
return
if hasattr(o_value, "_attrs"):
#Property lists are compared recursively
#by looking at the differences between matching elements
logArray.append(s1 + "\n")
lenNow = len(logArray)
s1 += "."
for a in o_value._attrs:
s2 = eval("o_value." + a)
s3 = eval("ref_value." + a)
printline(s1 + a, s2, s3, logArray, tolOverrides)
if len(logArray) == lenNow:
logArray.pop()
return
# objects that dont have _attrs but do have data
if hasattr(o_value, "__dict__"):
if not hasattr(ref_value, "__dict__"):
s2 = str(len(o_value)) + " values"
s3 = ref_value
append_line(s1, s2, s3, logArray)
return
else:
o_keys = list(o_value.__dict__)
r_keys = list(ref_value.__dict__)
# make sure we have all the keys
all_keys = list(o_keys)
for k in r_keys:
if k not in all_keys:
all_keys.append(k)
for k in all_keys:
s1a = f"{s1}.{k}"
printline(s1a, o_value.__dict__.get(k, None),
ref_value.__dict__.get(k, None), logArray,
tolOverrides)
return
#This property can be subtracted. Is the difference greater than
#the tolerance?
if hasattr(o_value, "__sub__"):
diff = o_value - ref_value
if hasattr(diff, "__abs__"):
if abs(diff) < override:
return
else:
s2 = "value=%f" % o_value
s3 = "diff=%f" % diff
append_line(s1, s2, s3, logArray)
return
else:
try:
if (abs(float(str(diff)))) < override:
return
else:
s2 = "value=%f" % float(str(o_value))
s3 = "diff=%f" % float(str(diff))
append_line(s1, s2, s3, logArray)
return
except:
pass
#Next to last resort: Are the two objects equal?
elif hasattr(o_value, "__eq__"):
if not hasattr(ref_value, "__eq__"):
s2 = str(len(o_value)) + " values"
s3 = ref_value
append_line(s1, s2, s3, logArray)
return
else:
# not required because __ne__ may not be defined
if not o_value == ref_value:
s2 = n2s(o_value, 4)
s3 = "ref=%s" % n2s(ref_value)
append_line(s1, s2, s3, logArray)
return
else:
return
# if I havent returned yet then I guess I don't know what to do
# I'll just report the two values
s2 = o_value
s3 = "ref=%s" % ref_value
append_line(s1, s2, s3, logArray)
[docs]def getJagVersion(fileName):
"""Gets the jaguar version from a jaguar/qsite output file"""
with open(fileName) as fh:
for line in fh:
line = line.lower()
if "jaguar version" in line or "qsite version" in line:
line = line.replace(",", "")
line = line.split("version")[-1]
line = line.replace(" ", "")
line = line.replace("|", "")
line = line.split("release")
line = line[0] + "." + line[1]
line = line.strip()
return line
[docs]def optionTest(aName, aValue, tolOverrides):
"""
Parses input arguments. Can be used on either command line or config
file.
:param aName: Name of the parameter
:type aName: str
:param aValue: Parameter's value
:type aValue: str
:param tolOverrides: Tolerances. Results are appended.
:type tolOverrides: dict
"""
global minPrecisionPercent
global minPrecision
if "%" in aValue:
aName = aName + "%"
aValue = aValue.replace("%", "")
if aName == "tol" or aName == "tolerance":
minPrecision *= float(aValue)
tolOverrides["tol"] = float(aValue)
elif aName == "tol%" or aName == "tolp":
minPrecisionPercent *= float(aValue)
elif aName == "minPrecision":
minPrecision = float(aValue)
elif aName == "minPrecisionPercent" or aName == "minPrecision%":
minPrecisionPercent = float(aValue)
elif aName == "log" or aName == "logfile":
tolOverrides["log"] = aValue
elif aName == "config":
tolOverrides["config"] = aValue
elif aName == "timing":
if aValue.lower() == "false":
aValue = False
else:
aValue = bool(aValue)
tolOverrides["timing"] = aValue
else:
try:
tolOverrides[aName] = float(aValue)
except ValueError:
if aValue.lower() == 'ignore':
tolOverrides[aName] = aValue
else:
raise
return tolOverrides
[docs]def parseConfigFile(tolOverrides):
"""
Parses a config file for use in this script. Config file follows this
example format::
log = workup.log #Everything following a pound is a comment
tol 2.0 #equals signs are optional
energy_2_electron = 1e-2 #mostly, the config file is a place to
#put multiple tolerance overrides.
:param tolOverrides: A dictionary containing any known options, mostly
tolerance overrides
"""
tol = 1.0
with open(tolOverrides["config"]) as config:
for line in config:
line = line.split("#")[0]
if not line:
continue
line = line.replace("=", " ")
try:
name, value = line.split()[:2]
optionTest(name, value, tolOverrides)
except:
pass
return tolOverrides
[docs]def n2s(number, precision=3):
"""some hackish formatting stuff"""
try:
number = float(str(number))
if abs(number) > 1e5 or abs(number) < 1e-5:
return "%.2e" % number
elif abs(number) > 1000:
return "%d" % number
else:
return "%s" % round(number,
-int(floor(log10(abs(number)))) + precision - 1)
except:
return str(number)
[docs]def getRMSD(test, ref):
"""finds the RMSD between the atoms in two structure objects"""
rmsd = test.getXYZ() - ref.getXYZ()
rmsd = rmsd * rmsd
rmsd = old_div(numpy.sum(rmsd.flat), len(test.atom))
rmsd = numpy.sqrt(rmsd)
return rmsd
[docs]def get_mems_from_file(filename):
"""
Scan a Jaguar outfile for memory used in each subprogram.
:param filename: file name
:type filename: str
:return: Memory used for each subprogram
:rtype: dictionary
"""
mems = {}
mbBuffer = 0
with open(filename) as afile:
for line in afile:
if "Peak memory" in line:
mbBuffer = float(line.split()[-2])
if "end of program" in line:
prg = line.split()[-1]
if prg in mems:
if mbBuffer > mems[prg]:
mems[prg] = mbBuffer
mbBuffer = 0
else:
mems[prg] = mbBuffer
mbBuffer = 0
return mems
[docs]def getMemDiffs(test, ref, tolOverrides):
"""
Finds the difference in memory usage (for each subprogram) between the
two runs. This diff is skipped unless a tolerance is manually given.
:param test: Name of the test output file
:type test: str
:param ref: Name of the reference output file
:type ref: str
:param tolOverrides: Tolerances for specific tests. This
function is interested in Memory% which provides a minimum
tolerance as a percent difference.
:type tolOverrides: dict
:return: Memory differences of each executable type between the two
files
:rtype: list
"""
if 'Memory%' in tolOverrides:
percentTol = old_div(tolOverrides['Memory%'], 100.0)
else:
return []
testMem = get_mems_from_file(test)
refMem = get_mems_from_file(ref)
memDiffs = []
for key in set(list(testMem) + list(refMem)):
if key not in testMem or key not in refMem:
continue
# only a meaningful comparison if memory usage was recorded in
# both files
elif testMem[key] != 0 and refMem[key] != 0:
if (old_div(abs(testMem[key] - refMem[key]), refMem[key]) >
percentTol):
memDiffs.append(("Program " + key, "%dMb" % testMem[key],
"diff = %dMb" % (testMem[key] - refMem[key])))
return memDiffs
if __name__ == "__main__":
"""
CLI tool to test and rerun diff comparisons using the jaguar/qsite "diff"
function.
usage:
$SCHRODINGER/run jaguarDiff.py file.out reference.out
"""
try:
def printHelp():
"""prints a usage message."""
print(" Finds differences between two Jaguar/QSite output files.")
print(" Usage:")
print(
" $SCHRODINGER/run jaguarDiff.py file.out reference.out [tol=1.0 log=workup.log config=config.txt value=1e-4]"
)
print("file.out - Path of the file to be compared")
print("reference.out - Path of the reference file")
print(" Optional parameters:")
print(
"tol=x - A multipler to by which loosen ALL tolerances"
)
print(
"log - Path to an optional output log. if it is not provided, the output is written to the console."
)
print(
"config - A configuration file format as follows"
)
print(
" Any text after a '#' character is a comment, and is ignored")
print(" all other text is in key/value pairs, one per line. ")
print(
" possible keys are: log, tol, and any value that appears in the output file"
)
print(" i.e.")
print(" value = 1e-4")
print(" overrides the tolerance for 'value' to 1e-4")
print(
"All other arguments are taken to be tolerance overrides for specific values"
)
from sys import argv
if "-h" in argv or len(argv) < 3:
printHelp()
else:
jaguarDiff(*argv[1:])
except KeyboardInterrupt as E:
print("User exited. Analysis not finished: ", str(E))