Source code for schrodinger.application.phase.packages.bedroc_screener
"""
This module contains the BedrocScreener class, which performs in-process
pharmacophore screens of zipped Phase projects containing actives and decoys.
BedrocScreener provides a lightweight alternative to launching a phase_screen
job with a .list file containing the names of the zipped projects.
"""
import os
import tempfile
from operator import itemgetter
from schrodinger import structure
from schrodinger.analysis import enrichment
from schrodinger.application.phase.packages import phase_utils
from schrodinger.infra import phase
from schrodinger.utils import fileutils
# Types of files that may be supplied as hit_file:
LEGAL_HIT_FILE_FORMATS = [
phase.PhpFileFormat_PHP_FORMAT_MAE, phase.PhpFileFormat_PHP_FORMAT_SD
]
[docs]class BedrocScreener(object):
[docs] def __init__(self, hypo, match_options):
"""
Constructor that takes a pharmacophore hypothesis and matching options.
:param hypo: Path to hypothesis file (.phypo)
:type hypo: str
:param match_options: Hypothesis matching options
:type match_options: phase.PhpMatchOptions
"""
self._validateHypo(hypo)
self._hypo = phase.PhpHypoAdaptor(hypo)
self._match_options = match_options
self._actives = None
self._decoys = None
self._hit_file = None
self._zip_dir = None
self._writer = None
self._efcalc = None
# The following public member variables are assigned/reassigned with
# each call to the screen function.
# All unique active titles (tuple of str):
self.active_titles = None
# Total number of decoys (int):
self.total_decoys = None
# Sorted fitness scores (tuple of float):
self.fitness_scores = None
# Hit titles in order of decreasing fitness (tuple of str):
self.hit_titles = None
[docs] def calcBEDROC(self, alpha=20.0):
"""
Calculates the BEDROC score after a screen has been performed.
:param alpha: Early enrichment factor
:type alpha: float
:return: BEDROC score
:rtype: float
"""
if self._efcalc:
return self._efcalc.calcBEDROC(alpha=alpha)[0]
else:
return None
[docs] def screen(self, actives, decoys, hit_file=None, zip_dir=None):
"""
Performs screens. Projects are unzipped to a securely named temporary
subdirectory that's created in either a platform-dependent directory
(/tmp, /var/tmp, etc.) or a specific named directory. The latter is
recommended if the projects are large and/or if numerous screens will
be run simultaneously. Unsorted hits are written to hit_file if that
parameter is supplied.
:param actives: Path to zipped actives project (.phzip)
:type actives: str
:param decoys: Path to zipped decoys project (.phzip)
:type decoys: str
:param hit_file: Maestro/SD file for unsorted hits
:type hit_file: str
:param zip_dir: Overrides platform-dependent temporary directory
:type zip_dir: str
"""
self._validateScreenFiles(actives, decoys, hit_file)
self._actives = actives
self._decoys = decoys
self._hit_file = hit_file
self._zip_dir = zip_dir
self._writer = None
self._efcalc = None
self.active_titles = []
self.total_decoys = 0
self.fitness_scores = []
self.hit_titles = []
self._doScreens()
self._createCalculator()
def _createCalculator(self):
"""
Creates enrichment calculator.
"""
hit_titles = ["Title"] + list(self.hit_titles)
# Write active and hit titles to temporary files and create calculator.
with tempfile.TemporaryDirectory() as titles_dir:
active_titles_file = os.path.join(titles_dir, "active_titles.txt")
phase_utils.write_list_to_file(active_titles_file,
self.active_titles)
hit_titles_file = os.path.join(titles_dir, "hit_titles.csv")
phase_utils.write_list_to_file(hit_titles_file, hit_titles)
self._efcalc = enrichment.Calculator(active_titles_file,
hit_titles_file,
self.total_decoys)
def _doScreen(self, db_path, db_ids):
"""
Screens the indicated records in the indicated database.
:param db_path: Path to database
:type db_path: str
:param db_ids: Database record numbers to screen
:type db_ids: list of int
"""
reader = phase.PhpStructureReader(db_path, phase.DB_STRUCT_RECORD,
db_ids)
screener = phase.PhpScreener(self._hypo, phase.SCREEN_EXISTING_SITES)
screener.setMatchOptions(self._match_options)
while True:
structure_bus = reader.next()
if structure_bus.empty():
break
hit_cts = screener.screen(structure_bus)
if hit_cts:
st = structure.Structure(hit_cts[0])
self.hit_titles.append(st.title)
self.fitness_scores.append(st.property[phase.PHASE_FITNESS])
if self._writer:
self._writer.append(st)
def _doScreens(self):
"""
Unzips projects and performs screens.
"""
if self._hit_file:
self._writer = structure.StructureWriter(self._hit_file)
min_sites = self._match_options.getMinSites()
delta_dist = self._match_options.getDeltaDist()
project = phase.PhpProject()
screening_actives = True
with tempfile.TemporaryDirectory(dir=self._zip_dir) as dest_dir:
for zipped_project in [self._actives, self._decoys]:
project_path = project.unzipProject(dest_dir, zipped_project)
project.openProject(project_path)
ligand_ids = project.getLigandIDs("all")
db_ids = [project.getDbID(id) for id in ligand_ids]
if screening_actives:
titles = [project.getTitle(id) for id in ligand_ids]
titles = list(set(titles))
titles.sort()
self.active_titles = tuple(titles)
screening_actives = False
else:
self.total_decoys = len(ligand_ids)
project.closeProject()
db_path = os.path.join(project_path, "ligands.phdb")
db_ids = phase.find_index_matches(db_path, self._hypo,
min_sites, delta_dist, db_ids)
self._doScreen(db_path, db_ids)
if self._writer:
self._writer.close()
self._sortHits()
def _sortHits(self):
"""
Sorts hits by decreasing fitness.
"""
# Sort strictly on fitness (key=itemgetter(0)) so that titles aren't
# used to break ties in fitness.
hit_pairs = sorted(zip(self.fitness_scores, self.hit_titles),
key=itemgetter(0),
reverse=True)
# This converts the lists to tuples.
self.fitness_scores, self.hit_titles = zip(*hit_pairs)
def _validateHypo(self, hypo):
"""
Ensures that the hypothesis file has the correct extension and exists.
:param hypo: Path to hypothesis file (.phypo)
:type hypo: str
:raise OSError: If hypo is of the wrong type or cannot be found
"""
if not fileutils.is_hypothesis_file(hypo):
raise OSError("Illegal hypothesis file name: \"%s\"" % hypo)
if not os.path.isfile(hypo):
raise OSError("Hypothesis file \"%s\" not found" % hypo)
def _validateScreenFiles(self, actives, decoys, hit_file):
"""
Ensures that files are of the correct type and exist if applicable.
:param actives: Path to zipped actives project (.phzip)
:type actives: str
:param decoys: Path to zipped decoys project (.phzip)
:type decoys: str
:param hit_file: Output Maestro/SD file for hits
:type hit_file: str
:raise OSError: If a file is of the wrong type or cannot be found
"""
for project in [actives, decoys]:
file_format = phase.get_phase_file_format(project)
if file_format != phase.PhpFileFormat_PHP_FORMAT_PHZIP:
raise OSError("Illegal zipped project name: \"%s\"" % project)
if not os.path.isfile(project):
raise OSError("Zipped project \"%s\" not found" % project)
if hit_file:
file_format = phase.get_phase_file_format(hit_file)
if file_format not in LEGAL_HIT_FILE_FORMATS:
raise OSError("Illegal hit file name: \"%s\"" % hit_file)