#!/usr/bin/env python
import collections
import csv
# configure logging
import logging
import optparse
import os
import os.path
import re
import sys
from datetime import date
from datetime import datetime
from datetime import timedelta
from past.utils import old_div
from schrodinger.utils import csv_unicode
log = logging.getLogger("logparser")
log.addHandler(logging.NullHandler())
event = collections.namedtuple('event', "time feature tokens user host")
parser = None
[docs]class UsageError(Exception):
[docs] def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
# Logfile-parsing code
[docs]def parse_logfile(logfile):
"""
Parse an lmgrd logfile and return a list of checkin/checkout events.
Checkin/checkout events are reported as named tuples of type 'event'.
(See the parse_events() header doc for a description.)
"""
with open(logfile, 'r') as fd:
events = parse_events(fd)
return events
[docs]def parse_events(loglines):
"""
Read the lines of an lmgrd log file, and return a list of checkin/checkout
events. Loglines is an iterable that provides the lines of the log file.
Checkin/checkout events are reported as named tuples of type 'event',
which have the elements (time, feature, tokens, user, host). The
'tokens' element is a signed integer; positive token counts represent
checkouts and negative counts represent checkins. The time is a
datetime. The 'feature', 'host', and 'user' elements are strings.
Checkin/checkout events include only the time for the event; the date
needs to be determined separately. The date is reported in the log file
at startup and periodically on TIMESTAMP lines. TIMESTAMP lines are not
written at midnight, for some reason, and so transitions from one day to
the next need to be inferred from the hour of the checkin/checkout time.
NOTE: If the lmgrd log doesn't include the startup output, events logged
before the first TIMESTAMP line will be ignored.
Sample log file checkout/in and timestamp lines are shown below.::
23:06:31 (lmgrd) FLEXnet Licensing (v11.10.0.0 build 95001 x64_mac10) started on Simon-Gaos-MacBook-Pro.local (Mac) (10/30/2012)
13:20:41 (SCHROD) OUT: "MMLIBS" saunders@ancalagon
13:20:41 (SCHROD) IN: "MMLIBS" saunders@ancalagon
9:10:40 (lmgrd) TIMESTAMP 10/31/2012
9:10:40 (SCHROD) TIMESTAMP 10/31/2012
9:42:58 (SCHROD) OUT: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses)
9:42:58 (SCHROD) OUT: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses)
9:42:58 (SCHROD) IN: "SUITE_22JAN2013" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses)
9:42:58 (SCHROD) IN: "GLIDE_SP_DOCKING" saunders@Simon-Gaos-MacBook-Pro.local (4 licenses)
21:37:00 (lmgrd) TIMESTAMP 10/31/2012
11:46:36 (SCHROD) UPGRADE: "PRIMEX_MAIN" golubkov@lori.schrodinger.com (1->8 licenses)
"""
start_regex = re.compile(
r"""
^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS]
\((lmgrd)\)\s # (lmgrd)
FLEXnet\sLicensing # FLEXnet Licensing
.*started.* # started
\((\d+)/(\d+)/(\d+)\)$ # date [MM/DD/YYYY]
""", re.X)
time_regex = re.compile(
r"""
^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS]
\((\w+)\)\s # source [(lmgrd) or (SCHROD)]
TIMESTAMP\s # TIMESTAMP
(\d+)/(\d+)/(\d+) # date [MM/DD/YYYY]
""", re.X)
lic_regex = re.compile(
r"""
^\s*(\d+):(\d\d):(\d\d)\s+ # timestamp [HH:MM:SS]
\((?:lmgrd|SCHROD)\)\s+ # source [(lmgrd) or (SCHROD)]
(IN|OUT|UPGRADE):\s+ # action [IN, OUT, or UPGRADE]
"([^"]+)"\s+ # license feature name ["MMLIBS"]
(\S[^@]*)@(\S+) # user [user@host]
(?:\s+\((\d+)(?:->(\d+))?\slicenses\))? # license count [(4 licenses), (1->8 licenses)]
""", re.X)
curdate = None
lasthour = 0
first_time = None
discarded = 0
events = []
for line in loglines:
# Skip comments
if line.startswith('#'):
continue
match = lic_regex.match(line)
if match:
(thour, tmin, tsec, action, feature, user, host, n1,
n2) = match.groups()
if action == "UPGRADE":
numtokens = int(n2) - int(n1)
elif n1:
numtokens = int(n1)
else:
numtokens = 1
if action == "IN":
numtokens = -numtokens
curhour, curmin, cursec = int(thour), int(tmin), int(tsec)
if curhour < lasthour:
curdate = curdate + timedelta(1)
try:
timestamp = datetime(curdate.year, curdate.month, curdate.day,
curhour, curmin, cursec)
events.append(event(timestamp, feature, numtokens, user, host))
lasthour = curhour # don't record lasthour until we have a usable curdate
if first_time is None:
first_time = timestamp
except AttributeError:
if curdate is None:
discarded += 1
else:
raise
else:
match = time_regex.match(line)
if not match:
match = start_regex.match(line)
if match:
(thour, tmin, tsec, source, mon, day, year) = match.groups()
curdate = date(int(year), int(mon), int(day))
lasthour = int(thour)
continue
if discarded > 0:
log.debug(
"\nWARNING: Discarded %d events logged before first TIMESTAMP @ %s"
% (discarded, str(first_time)))
return events
# Helper functions for usage workup
[docs]def period_start(event_time, period):
"""
Returns the start time for the reporting interval in which the given
event time lies. The event time is a datetime; the period (the length
of a reporting interval) is a timedelta.
Note: If the period is less than a day, there must be an integral number
of reporting intervals per day. If it's greater than a day, the period
must be an integral number of days. These assumptions aren't verified here.
If the period is zero, the event_time is returned.
"""
d = event_time.date()
if period.days == 0:
if period.seconds == 0:
return event_time
day_start = datetime(d.year, d.month, d.day)
day_elapsed = (event_time - day_start).seconds
return day_start + timedelta(
0, period.seconds * int(old_div(day_elapsed, period.seconds)))
else:
year_start = date(d.year, 1, 1)
year_days = (d - year_start).days
start_day = year_start + timedelta(int(old_div(year_days, period.days)),
0)
return datetime(start_day.year, start_day.month, start_day.day)
# Token-usage workup
[docs]def maximum_usage(events,
features,
period,
start=None,
stop=None,
user="",
host="",
init=[]): # noqa: M511
"""
Take the list of checkin/checkout events and features, and return a
table with the maximum number of tokens checked out at a time for each
feature over each time period.
The result is a list of rows (lists). Each row represents a single period,
with the end-time for that period followed by the maximum number of
tokens checked out for each feature (in the same order as the input
'features' list.)
"""
result = []
tokens = {}
maxtokens = {}
for f in features:
if f in init:
tokens[f] = init[f]
else:
tokens[f] = 0
maxtokens[f] = 0
if period > timedelta(0, 0):
periodend = start + period
else:
periodend = stop
log.debug("%20s %16s %8s %12s %-8s" %
("Time", "Feature", "Tokens", "User", "Host"))
for ev in events:
while periodend < ev.time:
# Generate output row for this period.
row = [periodend] + [maxtokens[f] for f in features]
result.append(row)
log.debug(format_usage_row(row, features))
for f in features:
maxtokens[f] = tokens[f]
if periodend < stop:
periodend += period
else:
return result
if (user == "" or ev.user == user) and (host == "" or host == ev.host):
log.debug("%20s %16s %8d %12s %-8s" % ev)
tokens[ev.feature] += ev.tokens
if ev.time >= start and maxtokens[ev.feature] < tokens[ev.feature]:
maxtokens[ev.feature] = tokens[ev.feature]
# Handle intervals following the final event
while periodend <= stop:
row = [periodend] + [maxtokens[f] for f in features]
result.append(row)
log.debug(format_usage_row(row, features))
for f in features:
maxtokens[f] = tokens[f]
if periodend == stop:
break
periodend += period
return result
[docs]def time_weighted_usage(events,
features,
period,
start=None,
stop=None,
user="",
host="",
init=[]): # noqa: M511
"""
Take the list of checkin/checkout events and features, and return a
table with the total usage of each feature in token-hours for each
time period. Events can be filtered by user and host, and by the
start and stop times for the analysis. The number of tokens checked
out for each feature at the beginning of the log can be specified using
the init parameter (a dict, mapping feature name to token count.)
The result is a list of rows (lists). Each row represents a single period,
with the end-time for that period followed by the usage for each feature
(in the same order as the input 'features' list) in token-hours.
"""
result = []
tokens = {}
usage = {}
lasttime = {}
for f in features:
if f in init:
tokens[f] = init[f]
else:
tokens[f] = 0
usage[f] = 0
lasttime[f] = start
if period > timedelta(0, 0):
periodend = start + period
else:
periodend = stop
# log.debug("%20s %16s %8s %12s %-8s" % (
# "Time", "Feature", "Tokens", "User", "Host"))
# Loop over checkin/checkout events
for ev in events:
while periodend < ev.time:
# Generate output row for this period.
for f in features:
elapsed = (periodend - lasttime[f]).total_seconds()
usage[f] += elapsed * tokens[f]
row = [periodend] + [old_div(usage[f], 3600.0) for f in features]
result.append(row)
# log.debug(format_usage_row(row, features))
for f in features:
usage[f] = 0
lasttime[f] = periodend
if periodend < stop:
periodend += period
else:
return result
# Handle this event
if (user == "" or ev.user == user) and (host == "" or host == ev.host):
# log.debug("%20s %16s %8d %12s %-8s" % ev) # event data
if ev.time >= start:
elapsed = (ev.time - lasttime[ev.feature]).total_seconds()
usage[ev.feature] += elapsed * tokens[ev.feature]
lasttime[ev.feature] = ev.time
tokens[ev.feature] += ev.tokens
if tokens[ev.feature] < 0:
log.info("!! %d %s tokens checked out!" %
(tokens[ev.feature], ev.feature))
# Handle intervals following the final event
while periodend <= stop:
for f in features:
elapsed = (periodend - lasttime[f]).total_seconds()
usage[f] += elapsed * tokens[f]
row = [periodend] + [old_div(usage[f], 3600.0) for f in features]
result.append(row)
# log.debug(format_usage_row(row, features))
if periodend == stop:
break
for f in features:
usage[f] = 0
lasttime[f] = periodend
periodend += period
return result
[docs]def initial_tokens(events, features, user="", host=""):
"""
Take the list of checkin/checkout events and features, and returns
dicts reporting the apparent number of checked-out tokens for each feature
at the point where the log starts, and the final number of tokens still
checked out at the point where the log ends.
Ordinarily, this should be zero. If the checkout records are incomplete,
there may be more tokens checked in than checked out. The dict returned by
this function will be used to make sure the total number of tokens checked
out for each feature will never be reported to be negative.
"""
tokens = {}
mintokens = {}
log.debug("\nDetermine the minimum token usage over the entire record...")
log.debug("Host: %s" % host)
log.debug("User: %s" % user)
for f in features:
tokens[f] = 0
mintokens[f] = 0
# log.debug("%20s %16s %8s %12s %-8s" % (
# "Time", "Feature", "Tokens", "User", "Host"))
for ev in events:
if (user == "" or ev.user == user) and (host == "" or host == ev.host):
# log.debug("%20s %16s %8d %12s %-8s" % ev)
tokens[ev.feature] += ev.tokens
if tokens[ev.feature] < mintokens[ev.feature]:
mintokens[ev.feature] = tokens[ev.feature]
init = {}
final = {}
for k, v in mintokens.items():
init[k] = -v
final[k] = init[k] + tokens[k]
return (init, final)
[docs]def write_events_csv(fname, events):
"""
Write the given list of checkin/checkout events to a CSV file.
"""
with csv_unicode.writer_open(fname) as fd:
writer = csv.writer(fd)
writer.writerow(("Time", "Feature", "Tokens", "User", "Host"))
for ev in events:
writer.writerow(ev)
[docs]def write_usage_csv(fname, usage, features):
"""
Write the given timeline of token usage to a CSV file.
"""
with csv_unicode.writer_open(fname) as fd:
writer = csv.writer(fd)
writer.writerow(["Time"] + features)
for row in usage:
writer.writerow(row)
[docs]def write_overall_usage_csv(fname, usage, features):
"""
Write the given overall token usage to a CSV file,
with each token's usage on a separate row.
"""
with csv_unicode.writer_open(fname) as fd:
writer = csv.writer(fd)
writer.writerow(["Feature", "Usage"])
writer.writerows(list(zip(features, usage[0][1:])))
[docs]def report_usage(usage, features):
"""
Write the token usage timeline to the console.
(This is mainly for debugging.)
"""
log.info("")
for row in usage:
line = ("%20s " % row[0]) + "".join(
[" %d:%s" % (u, f) for (u, f) in zip(row[1:], features) if u != 0])
if len(line) > 32:
log.info(line)
[docs]def report_total_usage(total, features):
"""
Write the given total usage data for each feature, one per line.
"""
log.info("\nTotal token usage:")
log.info("%-23s %12s" % ("feature", "token-hours"))
for i in range(len(features)):
log.info("%-28s %7.1f" % (features[i], total[i]))
[docs]def calculate_total_usage(usage, features):
"""
Calculate the total usage for each feature by summing down the list
of per-period usage data.
A tuple of the totals is returned, with the total usage for the nth
feature in the nth position.
"""
total = [0.0 for f in features]
for row in usage:
for i in range(len(features)):
total[i] += row[i + 1]
return total
[docs]def parse_args(args=None):
"""
Parse the commandline arguments and return a Namespace object containing the
option values.
"""
global parser
prolog = "Usage: %prog [<options>] <logfile>"
epilog = """ Parse an lmgrd log file and write a CSV file reporting the
usage of Schrodinger license tokens. By default, the total time-weighted
usage for each license feature is reported, for all licenses checked out
over the entire log file. To calculate the license usage over a specific
time period, use the "--start" and "--stop" options to specify the beginning
and end of the time period. Use the "--user" or "--host" options to report
only checkouts for a specific user or on a specific host. Use '--period
<interval>' to get the time-dependent token usage for each time interval of
the specified length; for example, use "--period 1h" to get hourly token
usage.
"""
parser = optparse.OptionParser(usage=prolog, epilog=epilog)
parser.add_option("-o",
"--out",
dest="outfile",
default="out.csv",
help="CSV output file (default is 'out.csv')")
parser.add_option("--events",
dest="events",
action="store_true",
help="Report checkin/checkout events, rather than usage.")
parser.add_option(
"--maxusage",
dest="maxusage",
action="store_true",
help="Report maximum token usage for each feature, over each period.")
parser.add_option(
"--period",
dest="period",
default="0",
help=
"Report usage totals over each period (eg, '1h', '5m') or '0' to report overall usage. (default is 0)"
)
parser.add_option(
"--start",
dest="start",
default=None,
help=
"Ignore events earlier than this time (eg '2014-07-01' or '2014-06-08 16:00')"
)
parser.add_option(
"--stop",
dest="stop",
default=None,
help=
"Ignore events after this time (eg '2014-07-08' or '2014-06-09 04:00')")
parser.add_option("--user",
dest="user",
default="",
help="Display only data for a specific user.")
parser.add_option("--host",
dest="host",
default="",
help="Display only data for a specific host.")
parser.add_option("--listfeatures",
action="store_true",
help="List names of all features checked in or out.")
parser.add_option("--listusers",
action="store_true",
help="List names of all users found in the log.")
parser.add_option("--listhosts",
action="store_true",
help="List names of all hosts found in the log.")
parser.add_option("-d",
"--debug",
dest="debug",
action="store_true",
help="produce debug output")
parser.add_option("-v",
"--verbose",
dest="verbose",
action="store_true",
help="display more details")
(opt, args) = parser.parse_args(args)
if len(args) == 0:
raise UsageError("No logfile was specified")
opt.infile = args[0]
try:
opt.period = parse_timedelta(opt.period, units="s")
if opt.start:
opt.start = parse_datetime(opt.start)
if opt.stop:
opt.stop = parse_datetime(opt.stop)
except ValueError as exc:
raise UsageError(str(exc))
if opt.debug:
opt.verbose = True
return opt
[docs]def parse_timedelta(text, units="s"):
"""
Parse the given string as a timedelta. Units may be specified
explicitly using one of the symbols "s", "m", "h", "d" (lowercase only).
If no units are specified in the text, the default units are assumed.
"""
orig_text = text
text = text.strip()
text = text.replace(" ", "")
if re.match(r"^\d+$", text):
text += units
if not re.match(r"^(\d+[smhd])+$", text):
raise ValueError("Cannot parse time interval '%s'" % orig_text)
result = timedelta(0, 0)
while len(text) > 0:
m = re.match(r"^(\d+)([smhd])(.*)$", text)
(val, unit, text) = m.groups()
if unit == "m":
result += timedelta(0, int(val) * 60)
elif unit == "h":
result += timedelta(0, int(val) * 3600)
elif unit == "d":
result += timedelta(int(val), 0)
else:
result += timedelta(0, int(val))
return result
[docs]def parse_datetime(text):
"""
Parse the given string as a datetime.
"""
formats = (
"%Y-%m-%d %H:%M",
"%y%m%d %H:%M",
"%Y-%m-%d",
"%y%m%d",
)
for fmt in formats:
try:
return datetime.strptime(text, fmt)
except ValueError:
pass
raise ValueError("Cannot parse datetime '%s'" % text)
[docs]def main(args=None):
"""
Main function
"""
log.addHandler(logging.StreamHandler(sys.stdout))
log.setLevel(logging.INFO)
# Parse the commandline args
opt = parse_args(args)
if opt.debug:
log.setLevel(logging.DEBUG)
# Check input file
if not os.path.isfile(opt.infile):
log.error("Error: Bad log file '%s'" % opt.infile)
return 1
if opt.verbose:
log.info("Input log file: %s" % opt.infile)
# Parse log file
events = parse_logfile(opt.infile)
if opt.user:
events = [ev for ev in events if ev.user == opt.user]
if opt.host:
events = [ev for ev in events if ev.host == opt.host]
features = sorted(list(set([ev.feature for ev in events])))
users = sorted(list(set([ev.user for ev in events])))
hosts = sorted(list(set([ev.host for ev in events])))
first_time = events[0].time
last_time = events[-1].time
if opt.start is None:
opt.start = period_start(first_time, opt.period)
if opt.stop is None:
if opt.period > timedelta(0, 0):
opt.stop = period_start(last_time, opt.period) + opt.period
else:
opt.stop = last_time
log.info("\nParsed %d checkin/checkout events" % len(events))
log.info(" between %s" % first_time)
log.info(" and %s" % last_time)
log.info(" %d different features were checked out" % len(features))
log.info(" by %d different users on %d hosts" % (len(users), len(hosts)))
if opt.listusers:
log.info("\nUsers:")
for u in users:
log.info(u)
if opt.listhosts:
log.info("\nHosts:")
for h in hosts:
log.info(h)
if opt.listfeatures:
log.info("\nFeatures:")
for f in features:
log.info(f)
if opt.events:
write_events_csv(opt.outfile, events)
log.info("\nCheckin/checkout events written to %s" % opt.outfile)
return 0
init, final = initial_tokens(events, features, user=opt.user, host=opt.host)
line = "\ninitial usage: %s" % "".join(
[" %d:%s" % (v, k) for (k, v) in init.items() if v > 0])
log.info(line)
line = "final usage: %s" % "".join(
[" %d:%s" % (v, k) for (k, v) in final.items() if v > 0])
log.info(line)
if opt.maxusage:
if opt.period > timedelta(0, 0):
log.info("\nCalculate the maximum token usage over each period.")
else:
log.info("\nCalculate the maximum token usage for each feature.")
else:
if opt.period > timedelta(0, 0):
log.info(
"\nCalculate time-weighted token usage (token-hours) over each period."
)
else:
log.info(
"\nCalculate total time-weighted token usage (token-hours) for each feature."
)
log.info(" Start time: %s" % str(opt.start))
log.info(" Stop time: %s" % str(opt.stop))
if opt.period > timedelta(0, 0):
log.info(" Period: %s" % str(opt.period))
if opt.host:
log.info(" Host: %s" % opt.host)
if opt.user:
log.info(" User: %s" % opt.user)
if opt.maxusage:
usage = maximum_usage(events,
features,
period=opt.period,
start=opt.start,
stop=opt.stop,
user=opt.user,
host=opt.host,
init=init)
else:
usage = time_weighted_usage(events,
features,
period=opt.period,
start=opt.start,
stop=opt.stop,
user=opt.user,
host=opt.host,
init=init)
if opt.debug:
report_usage(usage, features)
if not opt.maxusage:
if opt.period > timedelta(0, 0):
total = calculate_total_usage(usage, features)
report_total_usage(total, features)
else:
report_total_usage(usage[0][1:], features)
if opt.period == timedelta(0, 0):
write_overall_usage_csv(opt.outfile, usage, features)
else:
write_usage_csv(opt.outfile, usage, features)
log.info("\nUsage data written to %s" % opt.outfile)
return 0
if __name__ == "__main__":
try:
exitcode = main(sys.argv[1:])
except UsageError as exc:
exitcode = 1
print("ERROR:", exc)
parser.print_help()
sys.exit(exitcode)