"""
Functions and classes for plotting enrichment job outputs. e.g. basic
Sensitivity v 1-Specificity plots, etc.
Copyright Schrodinger, LLC. All rights reserved.
"""
import warnings
import schrodinger.utils.log as log
import schrodinger.utils.moduleproxy as moduleproxy
from schrodinger.analysis.enrichment import metrics
with warnings.catch_warnings():
warnings.simplefilter("ignore")
plt = moduleproxy.try_import("matplotlib.pyplot")
logger = log.get_output_logger(__file__)
[docs]def calc_sensitivity_with_rank(total_actives, active_ranks, rank):
"""
Calculates sensitivity at a particular rank, defined as:
Se(rank) = found_actives / total_actives
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:param rank: Active rank at which to calculate the specificity.
:type rank: int
:return: Sensitivity of the screen at a given rank.
:rtype: float
"""
# Number of actives found at given rank
# offset 0-based index
found_actives = active_ranks.index(rank) + 1
return found_actives / float(total_actives)
[docs]def calc_specificity_with_rank(total_actives, total_ligands, active_ranks,
rank):
"""
Calculates specificity at a particular rank, defined as:
Sp(rank) = discarded_decoys / total_decoys
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param total_ligands: The number of the total number of ligands (actives and
unknowns/decoys) used in the screen.
:type total_ligands: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:param rank: Active rank at which to calculate the specificity.
:type rank: int
:return: Specificity of the screen at a given rank.
:rtype: float
"""
total_decoys = total_ligands - total_actives
if total_decoys == 0:
raise ValueError("calculateSpecificity caught ZeroDivisionError")
# Number of actives found at given rank
# offset 0-based index
found_actives = active_ranks.index(rank) + 1
discarded_decoys = total_decoys - rank + found_actives
return discarded_decoys / float(total_decoys)
[docs]def get_percent_screen_curve_points(total_actives, total_ligands, active_ranks):
"""
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param total_ligands: The number of the total number of ligands (actives and
unknowns/decoys) used in the screen.
:type total_ligands: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:return: List of (%Screen, %Actives Found) tuples for the active ranks.
"""
curve_points = []
for act_idx, rank in enumerate(active_ranks, start=1):
prcnt_act = float(act_idx) / float(total_actives) * 100.0
prcnt_scrn = float(rank) / float(total_ligands) * 100.0
item = (prcnt_scrn, prcnt_act)
logger.debug("getPercentScreenCurvePoints: %s" % str(item))
curve_points.append(item)
return curve_points
[docs]def get_plot_data(total_actives, total_ligands, active_ranks, title_ranks):
"""
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param total_ligands: The number of the total number of ligands (actives and
unknowns/decoys) used in the screen.
:type total_ligands: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:return: A list of active Title, Rank, Sensitivity, Specificity,
%Actives Found, %Screen tuples.
:rtype: list
:note: This function should only be accessed via Calculator unless we
figured how to get title_ranks without an additional O(n) work.
:note: This list may grow, but the relative order of the columns
should remain fixed.
"""
headers = [
'Title',
'Rank',
'Specificity',
'1-Specificity',
'Sensitivity',
'%Screen',
'%Actives Found',
]
rows = []
rows.append(headers)
for rank_idx, rank in enumerate(sorted(title_ranks), start=1):
sp = calc_sensitivity_with_rank(total_actives, active_ranks, rank)
prcnt_act = float(rank_idx) / float(total_actives) * 100.0
prcnt_scrn = float(rank) / float(total_ligands) * 100.0
active_data = [
title_ranks[rank], # Title
rank,
sp,
1.0 - sp,
calc_sensitivity_with_rank(total_actives, active_ranks, rank),
prcnt_scrn,
prcnt_act,
]
rows.append(active_data)
return rows
[docs]def get_roc_curve_points(total_actives, total_ligands, active_ranks):
"""
Calculates set of points in ROC curve along each active rank.
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param total_ligands: The number of the total number of ligands (actives and
unknowns/decoys) used in the screen.
:type total_ligands: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:return: List of (1 - specificity, sensitivity, rank) tuples.
:rtype: list of tuples
"""
roc_curve_points = []
for rank in active_ranks:
# False positive rate
x = 1 - calc_specificity_with_rank(total_actives, total_ligands,
active_ranks, rank)
# True positive rate
y = calc_sensitivity_with_rank(total_actives, active_ranks, rank)
roc_curve_points.append((x, y, rank))
return roc_curve_points
[docs]def save_plot(total_actives,
total_ligands,
active_ranks,
adjusted_active_ranks,
total_ranked,
png_file="plot.png",
title='Screen Results',
xlabel='1-Specificity',
ylabel='Sensitivity'):
"""
Saves an image of the ROC plot, Sensitivity v 1-Specificity,
to a png file.
:param total_actives: The number of all active ligands in the screen, ranked
and unranked.
:type total_actives: int
:param total_ligands: The number of the total number of ligands (actives and
unknowns/decoys) used in the screen.
:type total_ligands: int
:param active_ranks: List of *unadjusted* integer ranks for the actives
found in the screen. For example, a screen result that
placed three actives as the first three ranks has an
active_ranks list of = [1, 2, 3].
:type active_ranks: list(int)
:param adjusted_active_ranks: Modified active ranks; each rank is improved
by the number of preceding actives. For
example, a screen result that placed three
actives as the first three ranks, [1, 2, 3],
has adjusted ranks of [1, 1, 1]. In this way,
actives are not penalized by being outranked
by other actives.
:type adjusted_active_ranks: list(int)
:param total_ranked: The number of unique ranked ligands. Deduced from
results_file or merged_file.
:type total_ranked: int
:param png_file: Path to output file, default is 'plot.png'.
:type png_file: str
:param title: Plot title, default is 'Screen Results'.
:type title: str
:param xlabel: x-axis label, default is '1-Specificity'.
:type xlabel: str
:param ylabel: y-axis label, default is 'Sensitivity'.
:type ylabel: str
"""
# FIXME: this method shares a log of code with Plotter.savePlot()
# Determine if it is more useful to return a 'figure'
# rather than serializing the image.
y_points = [0]
x_points = [0]
for x, y, rank in get_roc_curve_points(total_actives, total_ligands,
active_ranks):
y_points.append(y)
x_points.append(x)
msg = "plotROC: rank %d, Se %f, 1-Sp %.2f" % (rank, y, x)
logger.debug(msg)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.plot(x_points, y_points, 'bo-')
# Reference curve for random performance.
sp_ref = [0, 1.0]
se_ref = [0, 1.0]
plt.plot(sp_ref, se_ref, 'k-')
x_points.append(1.0) # Add the top right corner to the shaded area.
y_points.append(y) # Add the top right corner to the shaded area.
x_points.append(1.0) # Add the bottom right corner to shaded area.
y_points.append(0) # Add the bottom right corner to shaded area.
plt.grid(True)
plt.fill(
x_points,
y_points,
facecolor='blue',
edgecolor='r',
linewidth=5.0,
alpha=0.2 # Transparency.
)
plt.title(title)
plt.text(
0.30, 0.20, "BEDROC(\alpha=20, \alpha*Ra=%.4f): %.3f" %
(metrics.calc_BEDROC(
total_actives, total_ligands, active_ranks, alpha=20)[1],
metrics.calc_BEDROC(
total_actives, total_ligands, active_ranks, alpha=20)[0]))
plt.text(
0.30, 0.15, "RIE: %.3f" %
metrics.calc_RIE(total_actives, total_ligands, active_ranks, alpha=20))
plt.text(
0.30, 0.10, "ROC: %.3f" %
metrics.calc_ROC(total_actives, total_ligands, adjusted_active_ranks))
plt.text(
0.30, 0.05, "AUAC: %.3f" % metrics.calc_AUAC(
total_actives, total_ligands, total_ranked, active_ranks))
plt.savefig(png_file)
plt.close()
################################################################################
# Classes
################################################################################
class _BasePlotter(object):
"""
Class on which Plotter and PercentScreenPlotter are based.
"""
def __init__(self, calculators, title, xlabel, ylabel, xmax, ymax, legend):
"""
:param calculators: List of Calculator instances.
:type calculators: list(calculator.Calculator)
:param title: The plot title.
:type title: string
"""
self.calculators = calculators
self.title = title
self.xlabel = xlabel
self.ylabel = ylabel
self.legend = legend
self.xmax = xmax
self.ymax = ymax
# TODO now that markers are no longer used, consolidate these into
# a single style:
self.styles = ['b-', 'b-', 'b-', 'b-', 'b-', 'b-']
self.series_labels = []
self.legend_location = 4 # Lower right.
self.legend_font_size = 8
self.alpha = 0.1 # Fill transparency.
def getPointsFromCalc(self, calculator):
"""
Returns points for this metric from the given Calculator instance.
Implement this method in subclasses.
:param calculator: Calculator instance.
:type calculator: calculator.Calculator
:return List of (x, y) points
:rtype: list
"""
raise NotImplementedError()
def plot(self, indexes=None):
"""
Launch interactive matplotlib viewer loaded with the plot.
:param indexes: List of indexes into self.calcs to include in the plot.
If the argument is None then all series are plotted.
:type indexes: list
"""
plot_figure = self.getPlotFigure(indexes)
plt.figure(num=plot_figure.number)
plt.show()
def showPlotWindow(self, win_title):
"""
Open a window with this plot.
:param win_title: Title for the plot window.
:type win_title: str
"""
# These imports are here and not at the top of the module because
# many clients of the module don't need this functionality, and because
# the import of navtoolbar itself produces annoying warnings when
# Maestro is not available.
from matplotlib.backends.backend_qt5agg import \
FigureCanvasQTAgg as FigureCanvas
import schrodinger.ui.qt.navtoolbar as navtoolbar
plot_figure = self.getPlotFigure()
graph = FigureCanvas(plot_figure)
nav_toolbar = navtoolbar.NavToolbar(graph, graph.window(), False)
nav_toolbar.lastDir = '.'
nav_toolbar.show()
graph.setWindowTitle(win_title)
graph.show()
def savePlot(self, png_file='plot.png'):
"""
Serialized figure to a png format file.
:param png_file: Path to output png file.
:type png_file: string
:return: None
"""
plot_figure = self.getPlotFigure()
plt.figure(num=plot_figure.number)
plt.savefig(png_file)
plt.close()
def getPlotFigure(self, indexes=None):
"""
Returns a new pyplot figure the plot.
:param indexes: List of indexes into self.calcs to include in the plot.
If the argument is None then all series are plotted.
:type indexes: list
:return: a pyplot figure.
"""
plot_figure = plt.figure()
self.series_labels = [] # Clear any previous labels.
if indexes is None:
for calculator in self.calculators:
self.addSeries(calculator, plot_figure=plot_figure)
else:
for index in indexes:
self.addSeries(self.calcs[index], plot_figure=plot_figure)
self.addReferenceSeries(plot_figure=plot_figure)
plt.title(self.title)
plt.legend(self.series_labels, loc=self.legend_location)
ltext = plt.gca().get_legend().get_texts()
plt.setp(ltext[0], fontsize=self.legend_font_size)
return plot_figure
def addSeries(self, calculator, style=None, plot_figure=None):
"""
:param calculator: Calculator instance.
:type calculator: calculator.Calculator
:param style: The matplotlib linestyle.
If style is None then a style is selected by fetching
the next element from self.styles.
:type style: string
:param plot_figure: A pyplot figure.
If None then the current figure is used.
:type plot_figure: plt.Figure
:return: None
"""
if plot_figure is None:
plot_figure = plt.figure()
plt.figure(num=plot_figure.number)
if style is None:
self.styles = self.styles[1:] + [self.styles[0]]
style = self.styles[-1]
# Unzip points to list of x and y coordinates, adding endpoints
points = [(0.0, 0.0)]
points += self.getPointsFromCalc(calculator)
points += [(self.xmax, self.ymax)]
x, y = list(zip(*points))
# PANEL-8740: Points are changes in True Positive Rate (y-coordinate),
# must use "steps-post" to get correct step plot.
plt.plot(x, y, style, drawstyle="steps-post")
plt.xlim([0.0, self.xmax * 1.05])
plt.ylim([0.0, self.ymax * 1.05])
plt.xlabel(self.xlabel)
plt.ylabel(self.ylabel)
plt.grid(True)
self.series_labels.append(self.legend)
def addReferenceSeries(self, style=None, plot_figure=None):
"""
Adds a diagonal representing random performance.
:param style: The matplotlib linestyle.
If style is None then 'k-' is used.
:type style: string
:param plot_figure: A pyplot figure.
If None then the current figure is used.
:type plot_figure: pyplot.Figure
:return: None
"""
if style is None:
style = 'k-'
if plot_figure is None:
plot_figure = plt.figure()
plt.figure(num=plot_figure.number)
# Reference curve for random performance.
plt.plot([0, self.xmax], [0, self.ymax], '#BFBFBF')
self.series_labels.append('Random')
[docs]class Plotter(_BasePlotter):
"""
A class to plot multiple series of Calculator instances.
API example where `enrich_calc1` and `enrich_calc2` are instances of
Calculator::
enrich_plotter = plotter.Plotter([enrich_calc1, enrich_calc2])
enrich_plotter.plot() # Launch interactive plot window.
enrich_plotter.savePlot('my_plot.png') # Save plot to file.
There are six line styles defined by default. Plotting more than
six results cycles through the styles.
"""
[docs] def __init__(self,
calculators,
title='Screen Results',
xlabel='1-Specificity',
ylabel='Sensitivity',
legend_label='Screen Results'):
"""
:param calculators: List of Calculator instances.
:type calculators: list(calculator.Calculator)
:param title: The plot title.
:type title: string
:param xlabel: The x-axis label.
:type xlabel: str
:param ylabel: The y-axis label.
:type ylabel: str
:param legend_label: The legend label.
:type legend_label: str
"""
super(Plotter, self).__init__(calculators,
title,
xlabel,
ylabel,
xmax=1.0,
ymax=1.0,
legend=legend_label)
[docs] def getPointsFromCalc(self, calculator):
"""
Returns points for this metric from the given Calculator instance.
:param calculator: Calculator instance.
:type calculator: calculator.Calculator
:return List of (x, y) points
:rtype: list
"""
return [(x, y) for x, y, rank in get_roc_curve_points(
calculator.total_actives, calculator.total_ligands,
calculator.active_ranks)]
[docs]class PercentScreenPlotter(_BasePlotter):
"""
A class to plot multiple series of Calculator data as %Actives Found
vs %Screen.
API example where `enrich_calc1` and `enrich_calc2` are instances of
Calculator::
enrich_plotter = plotter.PercentScreenPlotter([enrich_calc1,
enrich_calc2])
enrich_plotter.plot() # Launch interactive plot window.
enrich_plotter.savePlot('my_plot.png') # Save plot to file.
There are six line styles defined by default. Plotting more than
six results cycles through the styles.
"""
[docs] def __init__(self,
calculators,
title='Screen Results',
xlabel='Percent Screen',
ylabel='Percent Actives Found',
legend_label='Screen Results'):
"""
:param calculators: List of Calculator instances.
:type calculators: list(calculator.Calculator)
:param title: The plot title.
:type title: string
:param xlabel: The x-axis label.
:type xlabel: str
:param ylabel: The y-axis label.
:type ylabel: str
:param legend_label: The legend label.
:type legend_label: str
"""
super(PercentScreenPlotter, self).__init__(calculators,
title,
xlabel,
ylabel,
xmax=100.0,
ymax=100.0,
legend=legend_label)
[docs] def getPointsFromCalc(self, calculator):
"""
Returns points for this metric from the given Calculator instance.
:param calculator: Calculator instance.
:type calculator: calculator.Calculator
:return List of (x, y) points
:rtype: list
"""
return get_percent_screen_curve_points(calculator.total_actives,
calculator.total_ligands,
calculator.active_ranks)