Source code for schrodinger.application.desmond.fep_edge_data_classifier

from enum import Enum
from itertools import chain
from typing import Tuple

import numpy as np


[docs]class Rating(Enum):
    NA = 0
    GOOD = 1
    FAIR = 2
    BAD = 3


class _Const:
    GOOD_RMSD = 2.0  # Angstrom
    FAIR_RMSD = GOOD_RMSD * 2  # Angstrom
    DG_SLOPE = 0.30  # kcal/mol/ns, allowed change rate in dG
    DG_CONVERGENCE_TIME_SPAN = 1  # ns, time span for convergence check
    REST_EX_CUTOFF = 0.15  #


# classifier names
CONVERGENCE = 'convergence'
LIGAND_RMSD = 'ligand RMSD'
REST_EXCHANGE = 'REST exchange'


[docs]def rate(name: str, data) -> Rating:
    """
    Return rating for the FEP edge data with the given `name`. The format of
    `data` varies depending on the case.

    :param name: Name of the classifier. The allowed names are defined above.
    """
    result = Rating.NA
    try:
        result = _CLASSIFIERS[name](data)
    except Exception as e:
        # FIXME: is this enough?
        print(f"ERROR: Failed to classify {name} data for FEP edge: {e}.")
    return result


def _convergence(data: Tuple[float, float, Tuple]) -> Rating:
    """
    :param data: simulation times and dG values in the complex leg, i.e.,
                 [start_time, end_time, dG_values]
    """
    assert len(
        data[2]
    ) > 1, f"Cannot determine convergence with {len(data[2])} data points"

    ts, dt = np.linspace(data[0], data[1], len(data[2]), retstep=True)
    indices = ts >= data[1] - _Const.DG_CONVERGENCE_TIME_SPAN
    indices[-2] = True  # Two points are needed no matter what
    check_ts = ts[indices]
    check_dgs = np.asarray(data[2])[indices]

    # For the last DG_CONVERGENCE_TIME_SPAN ns, check if maximal dG change is too big.
    if _Const.DG_SLOPE * (check_ts[-1] -
                          check_ts[0]) < max(check_dgs) - min(check_dgs):
        return Rating.BAD

    # For the last DG_CONVERGENCE_TIME_SPAN ns, check if any two consecutive dG
    # values are close to each other within DG_SLOPE*dt.
    if all(abs(np.ediff1d(check_dgs)) < _Const.DG_SLOPE * dt):
        return Rating.GOOD

    return Rating.FAIR


def _RESTExchange(data: Tuple[Tuple[int]]) -> Rating:
    """
    :param data: replica history distributions
    """
    # adapted from Dan Sindhikara's script
    stddevs = []
    for hist in data:
        hist = np.asarray(hist)
        stddevs.append(np.std(hist / hist.mean()))
    if np.mean(stddevs) < _Const.REST_EX_CUTOFF:
        return Rating.GOOD
    return Rating.FAIR


def _ligandRMSD(data: Tuple[Tuple[float]]) -> Rating:
    """
    :param data: ligand RMSDs in the complex leg, i.e.,
                 [lambda0_rmsd, lambda1_rmsd]
    """
    if any(x > _Const.FAIR_RMSD for x in chain.from_iterable(data)):
        return Rating.BAD
    if any(x > _Const.GOOD_RMSD for x in chain.from_iterable(data)):
        return Rating.FAIR
    return Rating.GOOD


_CLASSIFIERS = {
    LIGAND_RMSD: _ligandRMSD,
    REST_EXCHANGE: _RESTExchange,
    CONVERGENCE: _convergence
}