Source code for schrodinger.application.livedesign.ld_folder_tree
import re
from collections import defaultdict
from . import login
# Define the string path separator that is used in LDClient
# TODO: if it is ever exposed, reference the separator used in LDClient rather
# than defining it separately here (LDIDEAS-2283)
SEP = '__DEBUG__'
ENDPOINT_UNAVAILABLE = 'Endpoint Unavailable'
MODEL_RE = re.compile(r'(?P<assay>.+?)\s+\((?P<endpoint>.*)\)')
[docs]class LDFolderTree:
"""
A tree structure that holds LD folder/assay hierarchy.
"""
[docs] def __init__(self, project_id, parent=None):
"""
:param project_id: LD project ID
:type project_id: str
"""
self.project_id = project_id
self.toplevel_folders = [
"Computed Properties", "Computational Models", "Experimental Assays"
]
# a dict for keeping track of assays and their endpoints.
# key: assay name, values: list of endpoint names
self.endpoints = defaultdict(set)
# a dict for keeping track of favorite assays and their endpoints.
# key: assay name, values: list of endpoint names
self.favorite_endpoints = defaultdict(set)
_, self.ld_client, _ = login.get_ld_client_and_models()
[docs] def fillFolderTree(self):
"""
Main function to get folders and assays in the same hierarchy as in LD.
"""
for toplevel_folder in self.toplevel_folders:
self._fillToplevelItem(toplevel_folder)
def _fillToplevelItem(self, toplevel_folder):
"""
Fills a toplevel folder, such as "Computational Models" or
"Experimental Assays", in the tree.
:param toplevel_folder: Name of the toplevel folder
:type toplevel_folder: str
"""
folder_tree = self.ld_client.get_folder_tree_data(
self.project_id, toplevel_folder)
try: # 7.5.1 and up
data_tree = folder_tree[0]
except KeyError: # 7.5
data_tree = folder_tree
except IndexError: # empty folder returned from server
return
if data_tree:
self._parseSubfolders(toplevel_folder, data_tree)
def _parseSubfolders(self, parent_path, data):
"""
Iteratively adds subfolders to the tree, until reaches assays.
:param parent_path: The full path to this node
:type parent_path: str
:param data: Data from the LD server to parse.
:type data: json
"""
folder_name = parent_path.rsplit(SEP, 1)[-1]
subfolders = self.ld_client.get_subfolders(folder_name, data)
for subfolder, node in subfolders.items():
# subfolder could be a folder or an endpoint
if not node['children']:
# From LD version 8.0 and up, an original_name field exists that
# can be used to retrieve the original model name. This avoids
# the issue of aliased model names that do not follow the
# standard naming convention. For versions 7.9 and below, the
# subfolder name is used.
model_name = node.get('original_name')
if model_name is None:
model_name = subfolder
assay, endpoint = self._parseModelName(model_name)
# LD will create an extra subfolder for assays with the same
# assay name but different endpoint names. In their UI that is
# necessary because they do not split assay and endpoint into
# separate nodes in the tree. Since they are split in the GUI,
# we can get rid of this redundant folder in the path. (Or in
# other words, we are already using assays as a folder by
# collecting all endpoints with the same assay into a single
# tree entry, so the extra folder provided by LD is
# unnecessary)
if assay != folder_name:
assay_path = SEP.join([parent_path, assay])
else:
assay_path = parent_path
self.endpoints[assay_path].add(endpoint)
if node.get('favorite'):
# 'favorite' is only set on endpoints
self.favorite_endpoints[assay_path].add(endpoint)
else:
subpath = SEP.join([parent_path, subfolder])
self._parseSubfolders(subpath, node)
def _parseModelName(self, model_name):
"""
Given a model name, return the assay and endpoint. E.g.::
assay_name, endpoint = self._parseModelName('Model (type) prot')
assay_name -> 'Model'
endpoint -> 'type'
In the case of multiple parenthetical segments, assigns the endpoint to
be everything between the first "(" and the last ")". If no parenthesis
is found, returns the entire `model_name` string as the assay name and
an `ENDPOINT_UNAVAILABLE` as the endpoint.
:param model_name: the full name of the live design endpoint
:type model_name: str
:return: the separated and stripped assay and endpoint name
:rtype: str, str
"""
model_name = model_name.strip()
match = MODEL_RE.match(model_name)
if match is None:
# If the input model name is not properly formatted, then return the
# full value as the assay name, and define no endpoint
return model_name, ENDPOINT_UNAVAILABLE
return match.group('assay'), match.group('endpoint')