Source code for schrodinger.application.livedesign.ld_folder_tree

import re
from collections import defaultdict

from . import login

# Define the string path separator that is used in LDClient
# TODO: if it is ever exposed, reference the separator used in LDClient rather
# than defining it separately here (LDIDEAS-2283)
SEP = '__DEBUG__'
ENDPOINT_UNAVAILABLE = 'Endpoint Unavailable'
MODEL_RE = re.compile(r'(?P<assay>.+?)\s+\((?P<endpoint>.*)\)')


[docs]class LDFolderTree:
    """
    A tree structure that holds LD folder/assay hierarchy.
    """

[docs]    def __init__(self, project_id, parent=None):
        """
        :param project_id: LD project ID
        :type project_id: str
        """
        self.project_id = project_id
        self.toplevel_folders = [
            "Computed Properties", "Computational Models", "Experimental Assays"
        ]

        # a dict for keeping track of assays and their endpoints.
        # key: assay name, values: list of endpoint names
        self.endpoints = defaultdict(set)
        # a dict for keeping track of favorite assays and their endpoints.
        # key: assay name, values: list of endpoint names
        self.favorite_endpoints = defaultdict(set)
        _, self.ld_client, _ = login.get_ld_client_and_models()

[docs]    def fillFolderTree(self):
        """
        Main function to get folders and assays in the same hierarchy as in LD.
        """
        for toplevel_folder in self.toplevel_folders:
            self._fillToplevelItem(toplevel_folder)

    def _fillToplevelItem(self, toplevel_folder):
        """
        Fills a toplevel folder, such as "Computational Models" or
        "Experimental Assays", in the tree.

        :param toplevel_folder: Name of the toplevel folder
        :type toplevel_folder: str
        """

        folder_tree = self.ld_client.get_folder_tree_data(
            self.project_id, toplevel_folder)
        try:  # 7.5.1 and up
            data_tree = folder_tree[0]
        except KeyError:  # 7.5
            data_tree = folder_tree
        except IndexError:  # empty folder returned from server
            return
        if data_tree:
            self._parseSubfolders(toplevel_folder, data_tree)

    def _parseSubfolders(self, parent_path, data):
        """
        Iteratively adds subfolders to the tree, until reaches assays.

        :param parent_path: The full path to this node
        :type parent_path: str
        :param data: Data from the LD server to parse.
        :type data: json
        """

        folder_name = parent_path.rsplit(SEP, 1)[-1]
        subfolders = self.ld_client.get_subfolders(folder_name, data)

        for subfolder, node in subfolders.items():
            # subfolder could be a folder or an endpoint
            if not node['children']:

                # From LD version 8.0 and up, an original_name field exists that
                # can be used to retrieve the original model name. This avoids
                # the issue of aliased model names that do not follow the
                # standard naming convention. For versions 7.9 and below, the
                # subfolder name is used.
                model_name = node.get('original_name')
                if model_name is None:
                    model_name = subfolder
                assay, endpoint = self._parseModelName(model_name)

                # LD will create an extra subfolder for assays with the same
                # assay name but different endpoint names. In their UI that is
                # necessary because they do not split assay and endpoint into
                # separate nodes in the tree. Since they are split in the GUI,
                # we can get rid of this redundant folder in the path.  (Or in
                # other words, we are already using assays as a folder by
                # collecting all endpoints with the same assay into a single
                # tree entry, so the extra folder provided by LD is
                # unnecessary)
                if assay != folder_name:
                    assay_path = SEP.join([parent_path, assay])
                else:
                    assay_path = parent_path
                self.endpoints[assay_path].add(endpoint)
                if node.get('favorite'):
                    # 'favorite' is only set on endpoints
                    self.favorite_endpoints[assay_path].add(endpoint)
            else:
                subpath = SEP.join([parent_path, subfolder])
                self._parseSubfolders(subpath, node)

    def _parseModelName(self, model_name):
        """
        Given a model name, return the assay and endpoint. E.g.::

            assay_name, endpoint = self._parseModelName('Model (type) prot')
            assay_name -> 'Model'
            endpoint -> 'type'

        In the case of multiple parenthetical segments, assigns the endpoint to
        be everything between the first "(" and the last ")". If no parenthesis
        is found, returns the entire `model_name` string as the assay name and
        an `ENDPOINT_UNAVAILABLE` as the endpoint.

        :param model_name: the full name of the live design endpoint
        :type model_name: str

        :return: the separated and stripped assay and endpoint name
        :rtype: str, str
        """

        model_name = model_name.strip()
        match = MODEL_RE.match(model_name)
        if match is None:
            # If the input model name is not properly formatted, then return the
            # full value as the assay name, and define no endpoint
            return model_name, ENDPOINT_UNAVAILABLE

        return match.group('assay'), match.group('endpoint')