Source code for schrodinger.application.glide.ensemble_selection
r"""
Classes for representing receptor ensembles and for choosing the best
ensembles of a specified size from a cross-docking experiment.
Example::
    from schrodinger.application.glide import ensemble_selection
    es = ensemble_selection.EnsembleSelection(fname='xdock.csv',
                                              exp_dg_fname='exp.txt')
    # get the 10 best 3-member ensembles
    print "Best by count"
    for ens in es.best_ensembles_by_count(3, 10):
        print "%d\t%.3f\t%s" % (ens.count, ens.rmsd, ens)
    print "\nBest by rmsd"
    for ens in es.best_ensembles_by_rmsd(3, 10):
        print "%d\t%.3f\t%s" % (ens.count, ens.rmsd, ens)
For more details, see the documentation for the EnsembleSelection class.
"""
#Contributors: Ivan Tubert-Brohman, Matt Repasky
import heapq
[docs]def lignum_reader(reader, lignum_prop="i_i_glide_lignum", ligoffset=0):
    """
    A generator that returns the structures in fname that have
    lignum_prop > ligoffset. 'reader' is an iterator that generates
    Structure objects (e.g., a StructureReader).
    IMPORTANT NOTES:
    1) This requires that the structures are sorted by lignum (as in a raw
       file from Glide), and that each lignum appears once at most. An
       exception will be raised otherwise.
    2) If a ligand is missing in the file, the generator returns None for
       that ligand. For example, if lig2 is not in the file, we get
       (lig1, None, lig3...)
    """
    ilig = ligoffset + 1
    last_lignum = None
    for ct in reader:
        if ct.property.get('b_glide_receptor'):
            continue
        lignum = ct.property[lignum_prop]
        if last_lignum is not None and lignum <= last_lignum:
            raise ValueError("Ligand file is not sorted by lignum in strictly"
                             " monotonically increasing order")
        last_lignum = lignum
        while ilig < lignum:
            ilig += 1
            yield None
        if lignum == ilig:
            ilig += 1
            yield ct
        else:  # lignum < ilig
            pass  # just skip this structure until we reach the one we want 
[docs]class Ensemble_Priority_Queue(object):
    """ Class implementing a priority queue for the purpose
        of maintaining lists of ensembles in memory during
        ensemble selection """
[docs]    def __init__(self, nslots):
        self.heap = []
        self.nslots = nslots 
[docs]    def add(self, ensemble_tuple):
        if ensemble_tuple.ensemble not in self.ensembles:
            heapq.heappush(self.heap, ensemble_tuple) 
[docs]    def pop(self):
        ensemble_tuple = heapq.heappop(self.heap)
        return ensemble_tuple 
[docs]    def best_scoring_element(self):
        return max(self.heap) 
[docs]    def worst_saved_score(self):
        return self.heap[0].score 
[docs]    def pushpop(self, ensemble_tuple):
        if (self.length < self.nslots):
            self.add(ensemble_tuple)
        elif (ensemble_tuple.score > self.worst_saved_score()):
            length_before_add = self.length
            self.add(ensemble_tuple)
            if (self.length > length_before_add):
                self.pop() 
    @property
    def length(self):
        return len(self.heap)
    @property
    def ensembles(self):
        return [element.ensemble for element in self.heap]