"""
Functions to parse MSJ files
Copyright Schrodinger, LLC. All rights reserved.
"""
import contextlib
from typing import Tuple
from schrodinger.application.desmond import arkdb
from schrodinger.utils import sea
def _canonicalize_key(key: str) -> str:
key = key.strip()
if key[0] == "[":
return "stage" + key
if key[:5] != "stage":
return "stage[i]." + key
return key
[docs]class Msj(arkdb.ArkDb):
"""
The use of this class is very similar to `arkdb.ArkDb`. Some terminologies and
syntax defined in `arkdb.ArkDb` are not repeated here. So if you are unfamiliar
with `arkdb.ArkDb`, you are encouraged to read the docstrings there. Below are
examples to demonstrate and explain the use of this class::
msj = parse(msj_fname)
# 1. To read the `time` setting in stage 3:
msj.get("stage[2].simulate.time")
msj.get("stage[-1].simulate.time")
# - `stage[2]` in the key corresponds to stage 3 because the stage index in
# the code is always zero based.
# - Negative stage indices are supported and are of the same meaning as in
# Python.
# - The stage type (here it is `simulate`) is always needed in the key. This
# is good for preventing some kind of index errors, and is very useful
# when you do NOT care about the index (see below).
# 2. To read the `time` setting in the first `lambda_hopping` stage:
msj.get("stage[i].lambda_hopping.time")
# 3. Default value for non-existing setting:
msj.get("stage[i].lambda_hopping.phony_setting", 1000)
# 4. To change the value of a setting:
msj.put("stage[i].lambda_hopping.time", 1000)
# 5. To append a new stage `fep_analysis` and use the stage's default
# settings:
msj.put("stage[$].fep_analysis", {})
# 6. To insert a new stage `build_geometry` to be the 2nd stage:
msj.put("stage[@]1.build_geometry", {})
# 7. To insert a new stage `assign_forcefield` with custom settings::
msj.put("stage[@]2.assign_forcefield",
sea.Map(```
hydrogen_mass_repartition = off
make_alchemical_water = on```))
# Note that it's necessary to convert the settings in the form of a string
# into a `sea.Map`.
# 8. To delete the first "simulate" stage:
msj.delete("stage[i].simulate")
# 9. To delete the "simulate" stage whose title reads "production":
msj.delete("stage[i].simulate", matches="title=production")
# 10. To delete all "simulate" stages:
msj.delete("stage[*].simulate")
# 11. To access a particular stage, use the syntax:
# `msj.stage[<index>].<stage-name>.<setting>`:
msj.stage[10].simulate.time.val += 1000
# 12. Support for shorthand keys:
msj.put("lambda_hopping.phony_setting", 2000)
msj.get("lambda_hopping.phony_setting")
msj.put("[@]1.build_geometry", {})
# - If the key starts with the stage type name, it will be assumed to be
# prefixed with 'stage[i].'. In other words, the first instance of the
# stage type will be operated on.
# - The key can start with '[i]', '[*]', '[@]', and '[$]', and it will be
# automatically prefixed with 'stage'.
# 13. Find the index of the first "simulate" stage:
first_simulate_index = msj.find_stage("simulate")[0].STAGE_INDEX
# - `find_stage` always returns a tuple of the found stages, that's why
# `[0]` is used to get the first and the only found "simulate" stage.
# - `STAGE_INDEX` gives the stage's index in the MSJ file. Note that the
# index is one-based.
# 13. Find the index of the last "trim" stage:
last_trim_index = msj.find_stage("[*].trim")[-1].STAGE_INDEX
# - Here we use the key "[*].trim" to find all "trim" stages and then select
# the last one with `[-1]`.
# - We don't use the key "trim" (remember it's a short hand of "[i].trim"),
# because it means to get the first "trim" from the beginning of the stage
# list.
# 14. Find the index of the second "simulate" stage:
second_simulate_index = msj.find_stage("[*].simulate")[1].STAGE_INDEX
# 15. Find the index of the "simulate" stage whose "title" parameter is set
# to "production".::
production_index = msj.find_stage("[*].simulate.title"
picker=lambda title: (title.parent() if title.val == "production"
else None)
)[0].STAGE_INDEX
# - With the key "[*].simulate.title", we find the titles of all simulate
# stages, then we use a lambda function as the `picker` to select the
# stage whose title is "production". Note that the "parent" of the "title"
# parameter is the stage, which is what `title.parent()` gives.
(Feel free to add more examples)
"""
[docs] def __init__(self, *args, **kwargs):
arkdb.ArkDb.__init__(self, *args, **kwargs)
self._reset_stage_indices()
@contextlib.contextmanager
def _ensure_valid_stage_indices(self):
num_stages_before = len(self.stage)
try:
yield num_stages_before
finally:
if num_stages_before != len(self.stage):
self._reset_stage_indices()
def _reset_stage_indices(self):
for i, stg in enumerate(self.stage):
# `stg` has one and only one key-value pair.
stg_param = stg.values()[0]
setattr(stg_param, "STAGE_INDEX", i + 1)
def __str__(self):
s = []
for stage in self._db.stage:
# `stage` (`sea.Map`) has a single key-value pair.
((stg_name, stg_setting),) = stage.key_value()
s.append("%s {\n%s}\n" % (stg_name, stg_setting.__str__(ind=" ")))
return "\n".join(s)
@property
def stage(self):
return self._db.stage
[docs] def get(self, key: str, *args, **kwargs):
return arkdb.ArkDb.get(self, _canonicalize_key(key), *args, **kwargs)
[docs] def put(self, key: str, *args, **kwargs):
with self._ensure_valid_stage_indices():
return arkdb.ArkDb.put(self, _canonicalize_key(key), *args,
**kwargs)
[docs] def delete(self, key: str, *args, **kwargs):
with self._ensure_valid_stage_indices():
arkdb.ArkDb.delete(self, _canonicalize_key(key), *args, **kwargs)
# When we delete a stage like this: `msj.delete("[*].simulate")`,
# `ArkDb.delete` will remove all "simulate" key-value pairs, but
# leave empty `sea.Map` objects in the `self.stage` list. We need to
# delete these empty objects.
empty_map_indices = []
for i, stage in enumerate(self._db.stage):
if len(stage.values()) == 0:
empty_map_indices.append(i)
for i in reversed(empty_map_indices):
del self._db.stage[i]
[docs] def find(self, key: str, *args, **kwargs):
return arkdb.ArkDb.find(self, _canonicalize_key(key), *args, **kwargs)
[docs] def find_stages(self, key: str, *args, **kwargs) -> Tuple[sea.Map]:
"""
Similar to `find`, but to return a tuple of found stages. If no stages
are found, this function returns an empty tuple. Examples::
# To get all simulate stages:
simulate_stages = msj.find_stages("[*].simulate")
for stage in simulate_stages:
print(stage.STAGE_INDEX)
# To get the first simulate stage:
first_simulate_stage = msj.find_stages("simulate")[0]
print(first_simulate_stage.STAGE_INDEX)
# To get the first simulate stage with "time = 200"::
simulate = msj.find_stages(
"[*].simulate.time", picker=\
lambda x: (x.parent() if x.val == 200 else None))[0]
print(simulate.STAGE_INDEX)
`STAGE_INDEX` gives the index of the stage in the MSJ file. Note that
the index is one based. Also, `STAGE_INDEX` is an attribute (as opposed
to a key-value pair) of the returned `sea.Map` objects.
"""
return tuple(arkdb._db for arkdb in self.find(key, *args, **kwargs))
def _pre_parse(string: str) -> str:
msj = sea.Map("stage = [ %s ]" % string).stage
# User might set a stage like: "stagename = {...}" by mistake.
# Raises an exception for this type of errors.
for s in msj:
if isinstance(s, sea.Atom) and s.val == "=":
raise SyntaxError(
"Stage name must not be followed by the assignment symbol: '='")
s = [("{%s = {%s}}" % (stage_name, stage_settings)) \
for stage_name, stage_settings in zip(msj[::2], msj[1::2])]
return "stage = [%s]" % "\n".join(s)
[docs]def parse(fname=None, string=None) -> Msj:
"""
Parses a file or a string, and returns an `Msj` object.
Either `fname` or `string` must be set, but not both.
"""
assert bool(fname) ^ bool(string), \
"Either `fname` and `string` must be specified."
if fname:
with open(fname) as fh:
string = fh.read()
return Msj(string=_pre_parse(string))