Source code for schrodinger.application.steps.scorers
from rdkit import Chem
from schrodinger import stepper
from schrodinger.application.pathfinder import filtering
from schrodinger.models import parameters
from . import utils
from .basesteps import MolReduceStep
from .dataclasses import ScoredMol
from .dataclasses import ScorerMixin
try:
from ligand_ml.smasher import Smasher
except:
Smasher = None
INF = float('inf')
[docs]class PropertyScorer(ScorerMixin, MolReduceStep):
"""
Yield scored molecules where the score is the property value.
The `property` in the settings is the descriptor name of the property, e.g.,
'MVCorrMW' or 'r_rdkit_MolWt'.
"""
[docs] class Settings(parameters.CompoundParam):
property: str
[docs] def validateSettings(self):
mol = Chem.MolFromSmiles('C')
prop = self.settings.property
try:
filtering.add_descriptors(mol, [prop], refs=[])
except KeyError:
return [
stepper.SettingsError(self, f'"{prop}" is not a known property')
]
return []
[docs] def reduceFunction(self, inputs):
for mol in inputs:
filtering.add_descriptors(mol, [self.settings.property], refs=[])
value = float(mol.GetProp(self.settings.property))
yield ScoredMol(mol=mol, score=value)
[docs]class LigandMLScorer(ScorerMixin, MolReduceStep):
"""
Yield scored molecules where the score is the value predicted by the model.
The only setting is the required `ml_file`: the path to the ML qzip file.
"""
[docs] class Settings(parameters.CompoundParam):
ml_file: stepper.StepperFile = None
validate_model: bool = True
[docs] def validateSettings(self, what='ml_file'):
if issues := utils.validate_file(self, what, required=True):
return issues
if not self.settings.validate_model:
return []
if issue := utils.validate_smasher_file(self.settings.ml_file):
return [stepper.SettingsError(self, issue)]
return []
[docs] def reduceFunction(self, inputs):
# For performance reasons, this step combines all inputs into a list
# for the score prediction, but yields every scored molecule one by one.
mols = list(inputs)
with Smasher.load(self.settings.ml_file) as model:
results = model.predict_on_mols(mols)
for result, mol in zip(results, mols):
yield ScoredMol(mol=mol, score=result[0])