Source code for schrodinger.application.steps.scorers

from rdkit import Chem

from schrodinger import stepper
from schrodinger.application.pathfinder import filtering
from schrodinger.models import parameters

from . import utils
from .basesteps import MolReduceStep
from .dataclasses import ScoredMol
from .dataclasses import ScorerMixin

try:
    from ligand_ml.smasher import Smasher
except:
    Smasher = None

INF = float('inf')


[docs]class PropertyScorer(ScorerMixin, MolReduceStep):
    """
    Yield scored molecules where the score is the property value.

    The `property` in the settings is the descriptor name of the property, e.g.,
    'MVCorrMW' or 'r_rdkit_MolWt'.
    """

[docs]    class Settings(parameters.CompoundParam):
        property: str

[docs]    def validateSettings(self):
        mol = Chem.MolFromSmiles('C')
        prop = self.settings.property
        try:
            filtering.add_descriptors(mol, [prop], refs=[])
        except KeyError:
            return [
                stepper.SettingsError(self, f'"{prop}" is not a known property')
            ]
        return []

[docs]    def reduceFunction(self, inputs):
        for mol in inputs:
            filtering.add_descriptors(mol, [self.settings.property], refs=[])
            value = float(mol.GetProp(self.settings.property))
            yield ScoredMol(mol=mol, score=value)


[docs]class LigandMLScorer(ScorerMixin, MolReduceStep):
    """
    Yield scored molecules where the score is the value predicted by the model.

    The only setting is the required `ml_file`: the path to the ML qzip file.
    """

[docs]    class Settings(parameters.CompoundParam):
        ml_file: stepper.StepperFile = None

[docs]    def validateSettings(self, what='ml_file'):
        if issues := utils.validate_file(self, what, required=True):
            return issues
        if issue := utils.validate_smasher_file(self.settings.ml_file):
            return [stepper.SettingsError(self, issue)]
        return []

[docs]    def reduceFunction(self, inputs):
        # For performance reasons, this step combines all inputs into a list
        # for the score prediction, but yields every scored molecule one by one.
        mols = list(inputs)
        smasher = Smasher.load(self.settings.ml_file)
        results = smasher.predict_on_mols(mols)
        for result, mol in zip(results, mols):
            yield ScoredMol(mol=mol, score=result[0])