Source code for schrodinger.application.steps.scorers
from rdkit import Chem
from schrodinger import stepper
from schrodinger.application.pathfinder import filtering
from schrodinger.models import parameters
from . import utils
from .basesteps import MolReduceStep
from .dataclasses import ScoredMol
from .dataclasses import ScorerMixin
try:
    from ligand_ml.smasher import Smasher
except:
    Smasher = None
INF = float('inf')
[docs]class PropertyScorer(ScorerMixin, MolReduceStep):
    """
    Yield scored molecules where the score is the property value.
    The `property` in the settings is the descriptor name of the property, e.g.,
    'MVCorrMW' or 'r_rdkit_MolWt'.
    """
[docs]    class Settings(parameters.CompoundParam):
        property: str 
[docs]    def validateSettings(self):
        mol = Chem.MolFromSmiles('C')
        prop = self.settings.property
        try:
            filtering.add_descriptors(mol, [prop], refs=[])
        except KeyError:
            return [
                stepper.SettingsError(self, f'"{prop}" is not a known property')
            ]
        return [] 
[docs]    def reduceFunction(self, inputs):
        for mol in inputs:
            filtering.add_descriptors(mol, [self.settings.property], refs=[])
            value = float(mol.GetProp(self.settings.property))
            yield ScoredMol(mol=mol, score=value)  
[docs]class LigandMLScorer(ScorerMixin, MolReduceStep):
    """
    Yield scored molecules where the score is the value predicted by the model.
    The only setting is the required `ml_file`: the path to the ML qzip file.
    """
[docs]    class Settings(parameters.CompoundParam):
        ml_file: stepper.StepperFile = None 
[docs]    def validateSettings(self, what='ml_file'):
        if issues := utils.validate_file(self, what, required=True):
            return issues
        if issue := utils.validate_smasher_file(self.settings.ml_file):
            return [stepper.SettingsError(self, issue)]
        return [] 
[docs]    def reduceFunction(self, inputs):
        # For performance reasons, this step combines all inputs into a list
        # for the score prediction, but yields every scored molecule one by one.
        mols = list(inputs)
        smasher = Smasher.load(self.settings.ml_file)
        results = smasher.predict_on_mols(mols)
        for result, mol in zip(results, mols):
            yield ScoredMol(mol=mol, score=result[0])