Source code for schrodinger.application.phase.packages.bedroc_screener

"""
This module contains the BedrocScreener class, which performs in-process
pharmacophore screens of zipped Phase projects containing actives and decoys.
BedrocScreener provides a lightweight alternative to launching a phase_screen
job with a .list file containing the names of the zipped projects.
"""

import os
import tempfile
from operator import itemgetter

from schrodinger import structure
from schrodinger.analysis import enrichment
from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import phase
from schrodinger.utils import fileutils

# Types of files that may be supplied as hit_file:
LEGAL_HIT_FILE_FORMATS = [
    phase.PhpFileFormat_PHP_FORMAT_MAE, phase.PhpFileFormat_PHP_FORMAT_SD
]


[docs]class BedrocScreener(object):
[docs] def __init__(self, hypo, match_options): """ Constructor that takes a pharmacophore hypothesis and matching options. :param hypo: Path to hypothesis file (.phypo) :type hypo: str :param match_options: Hypothesis matching options :type match_options: phase.PhpMatchOptions """ self._validateHypo(hypo) self._hypo = phase.PhpHypoAdaptor(hypo) self._match_options = match_options self._actives = None self._decoys = None self._hit_file = None self._zip_dir = None self._writer = None self._efcalc = None # The following public member variables are assigned/reassigned with # each call to the screen function. # All unique active titles (tuple of str): self.active_titles = None # Total number of decoys (int): self.total_decoys = None # Sorted fitness scores (tuple of float): self.fitness_scores = None # Hit titles in order of decreasing fitness (tuple of str): self.hit_titles = None
[docs] def calcBEDROC(self, alpha=20.0): """ Calculates the BEDROC score after a screen has been performed. :param alpha: Early enrichment factor :type alpha: float :return: BEDROC score :rtype: float """ if self._efcalc: return self._efcalc.calcBEDROC(alpha=alpha)[0] else: return None
[docs] def screen(self, actives, decoys, hit_file=None, zip_dir=None): """ Performs screens. Projects are unzipped to a securely named temporary subdirectory that's created in either a platform-dependent directory (/tmp, /var/tmp, etc.) or a specific named directory. The latter is recommended if the projects are large and/or if numerous screens will be run simultaneously. Unsorted hits are written to hit_file if that parameter is supplied. :param actives: Path to zipped actives project (.phzip) :type actives: str :param decoys: Path to zipped decoys project (.phzip) :type decoys: str :param hit_file: Maestro/SD file for unsorted hits :type hit_file: str :param zip_dir: Overrides platform-dependent temporary directory :type zip_dir: str """ self._validateScreenFiles(actives, decoys, hit_file) self._actives = actives self._decoys = decoys self._hit_file = hit_file self._zip_dir = zip_dir self._writer = None self._efcalc = None self.active_titles = [] self.total_decoys = 0 self.fitness_scores = [] self.hit_titles = [] self._doScreens() self._createCalculator()
def _createCalculator(self): """ Creates enrichment calculator. """ hit_titles = ["Title"] + list(self.hit_titles) # Write active and hit titles to temporary files and create calculator. with tempfile.TemporaryDirectory() as titles_dir: active_titles_file = os.path.join(titles_dir, "active_titles.txt") phase_utils.write_list_to_file(active_titles_file, self.active_titles) hit_titles_file = os.path.join(titles_dir, "hit_titles.csv") phase_utils.write_list_to_file(hit_titles_file, hit_titles) self._efcalc = enrichment.Calculator(active_titles_file, hit_titles_file, self.total_decoys) def _doScreen(self, db_path, db_ids): """ Screens the indicated records in the indicated database. :param db_path: Path to database :type db_path: str :param db_ids: Database record numbers to screen :type db_ids: list of int """ reader = phase.PhpStructureReader(db_path, phase.DB_STRUCT_RECORD, db_ids) screener = phase.PhpScreener(self._hypo, phase.SCREEN_EXISTING_SITES) screener.setMatchOptions(self._match_options) while True: structure_bus = reader.next() if structure_bus.empty(): break hit_cts = screener.screen(structure_bus) if hit_cts: st = structure.Structure(hit_cts[0]) self.hit_titles.append(st.title) self.fitness_scores.append(st.property[phase.PHASE_FITNESS]) if self._writer: self._writer.append(st) def _doScreens(self): """ Unzips projects and performs screens. """ if self._hit_file: self._writer = structure.StructureWriter(self._hit_file) min_sites = self._match_options.getMinSites() delta_dist = self._match_options.getDeltaDist() project = phase.PhpProject() screening_actives = True with tempfile.TemporaryDirectory(dir=self._zip_dir) as dest_dir: for zipped_project in [self._actives, self._decoys]: project_path = project.unzipProject(dest_dir, zipped_project) project.openProject(project_path) ligand_ids = project.getLigandIDs("all") db_ids = [project.getDbID(id) for id in ligand_ids] if screening_actives: titles = [project.getTitle(id) for id in ligand_ids] titles = list(set(titles)) titles.sort() self.active_titles = tuple(titles) screening_actives = False else: self.total_decoys = len(ligand_ids) project.closeProject() db_path = os.path.join(project_path, "ligands.phdb") db_ids = phase.find_index_matches(db_path, self._hypo, min_sites, delta_dist, db_ids) self._doScreen(db_path, db_ids) if self._writer: self._writer.close() self._sortHits() def _sortHits(self): """ Sorts hits by decreasing fitness. """ # Sort strictly on fitness (key=itemgetter(0)) so that titles aren't # used to break ties in fitness. hit_pairs = sorted(zip(self.fitness_scores, self.hit_titles), key=itemgetter(0), reverse=True) # This converts the lists to tuples. self.fitness_scores, self.hit_titles = zip(*hit_pairs) def _validateHypo(self, hypo): """ Ensures that the hypothesis file has the correct extension and exists. :param hypo: Path to hypothesis file (.phypo) :type hypo: str :raise OSError: If hypo is of the wrong type or cannot be found """ if not fileutils.is_hypothesis_file(hypo): raise OSError("Illegal hypothesis file name: \"%s\"" % hypo) if not os.path.isfile(hypo): raise OSError("Hypothesis file \"%s\" not found" % hypo) def _validateScreenFiles(self, actives, decoys, hit_file): """ Ensures that files are of the correct type and exist if applicable. :param actives: Path to zipped actives project (.phzip) :type actives: str :param decoys: Path to zipped decoys project (.phzip) :type decoys: str :param hit_file: Output Maestro/SD file for hits :type hit_file: str :raise OSError: If a file is of the wrong type or cannot be found """ for project in [actives, decoys]: file_format = phase.get_phase_file_format(project) if file_format != phase.PhpFileFormat_PHP_FORMAT_PHZIP: raise OSError("Illegal zipped project name: \"%s\"" % project) if not os.path.isfile(project): raise OSError("Zipped project \"%s\" not found" % project) if hit_file: file_format = phase.get_phase_file_format(hit_file) if file_format not in LEGAL_HIT_FILE_FORMATS: raise OSError("Illegal hit file name: \"%s\"" % hit_file)