"""
Contains math-related utility functions
Copyright Schrodinger, LLC. All rights reserved.
"""
from collections import defaultdict
from math import floor
from math import log10
import numpy
from scipy import interpolate
from sklearn import cluster
from sklearn import metrics
[docs]def round_value(val, precision=3, significant_figures=None):
"""
Return val as a string with the required precision or significant figures.
Either precision or significant_figures should be provided.
Uses scientific notation for very large or small values, if the precision allows.
:param float val: The value to round
:param int precision: The precision needed after rounding. A precision of 2 means
two decimal places should be kept after rounding. A negative precision indicates
how many digits can be replaced with zeros before the decimal point.
-5 means that 123456 can be shown as 1e5, -3 means it can be shown as 1.23e5.
:param int significant_figures: The number of significant figures that should
remain after rounding. If provided, determines the rounding precision.
:rtype: str or None
:return: A string with the required precision, or None if the input is a string
"""
if isinstance(val, str):
return None
val = float(val)
if val == 0:
return '0'
magnitude = int(numpy.floor(numpy.log10(abs(val))))
if significant_figures:
precision = significant_figures - magnitude - 1
# Uses scientific notation if more than 4 digits can be removed in large numbers
if magnitude < -3 or (magnitude > 3 and precision < -3):
decimals = max(0, magnitude + precision)
return f'{val:.{decimals}e}'
elif precision > 0:
return f'{val:.{precision}f}'
else:
return str(round(val))
[docs]def deduplicate_xy_data(x_vals, y_vals):
"""
Remove duplicate x values by averaging the y values for them.
:param list x_vals: The x values
:param list y_vals: The y values
:rtype: list, list
:return: The deduplicated xy data
"""
all_vals = defaultdict(list)
for x_val, y_val in zip(x_vals, y_vals):
all_vals[x_val].append(y_val)
deduped_x = list(all_vals.keys())
deduped_y = [sum(all_vals[x]) / len(all_vals[x]) for x in deduped_x]
return deduped_x, deduped_y
[docs]class Interpolate1D:
"""
Creates a map between values in a source and a target axis, allowing to get
the equivalent target point for each source point.
Wrapper around `scipy.interpolate.interp1d` to allow logarithmic
interpolation or extrapolation.
"""
[docs] def __init__(self, source_vals, target_vals, log_interp=False):
"""
Create an instance.
:param tuple source_vals: The values of points in the source range
:param tuple target_vals: The values of points in the target range
:param bool log_interp: Whether the interpolation is logarithmic.
If False, linear interpolation will be used.
"""
if log_interp:
target_vals = [numpy.log10(x) for x in target_vals]
linear_interp = interpolate.interp1d(source_vals,
target_vals,
fill_value='extrapolate')
self.interp = lambda x: numpy.power(10, linear_interp(x))
else:
self.interp = interpolate.interp1d(source_vals,
target_vals,
fill_value='extrapolate')
def __call__(self, source_val):
"""
Get the equivalent target value of the passed source value
:param float source_val: The value in the source range
:rtype: float
:return: The equivalent value in the target range
"""
# The return value of interp is 'numpy.ndarray' so convert to float
return float(self.interp(source_val))
[docs]class Interpolate2D:
"""
Creates two instances of Interpolate1D to map values between two source axes
and two target axes. Example use case is mapping QGraphicsScene/QChart
XY coordinates to a XY coordinate system being displayed in the scene/chart.
The two axes need to be independent.
"""
[docs] def __init__(self,
x_source_vals,
x_target_vals,
y_source_vals,
y_target_vals,
x_log_interp=False,
y_log_interp=False):
"""
Create an instance.
:param tuple x_source_vals: The values of points in the X source range
:param tuple x_target_vals: The values of points in the X target range
:param tuple y_source_vals: The values of points in the Y source range
:param tuple y_target_vals: The values of points in the Y target range
:param bool x_log_interp: Whether the X axis interpolation is logarithmic
:param bool y_log_interp: Whether the Y axis interpolation is logarithmic
"""
self.x_interp = Interpolate1D(x_source_vals, x_target_vals,
x_log_interp)
self.y_interp = Interpolate1D(y_source_vals, y_target_vals,
y_log_interp)
def __call__(self, source_x_val, source_y_val):
"""
Get the equivalent target values of the passed source values
:param float source_x_val: The X value in the source range
:param float source_y_val: The Y value in the source range
:rtype: float, float
:return: The equivalent x, y values in the target range
"""
return (self.x_interp(source_x_val), self.y_interp(source_y_val))
[docs]def roundup(inp):
"""
Round away from zero (to +/-infinity).
:type inp: float or numpy.array
:param inp: value to be rounded
:rtype: float or numpy.array
:return: Rounded value
"""
# This is NOT default behavior of python3, see also:
# mail.python.org/pipermail/numpy-discussion/2014-October/071302.html
return numpy.trunc(inp + numpy.copysign(.5, inp))
[docs]def sig_fig_round(value, significant_figure=5):
"""
:type value: float
:param value: change the significant figures of this value
:type significant_figure: int
:param significant_figure: significant figure of the displayed value
:rtype: str
:return: str representation of a number with proper significant figures
"""
sig_fig_fmt = '%.{}g'.format(significant_figure)
return sig_fig_fmt % value
[docs]def polyfit(xdata, ydata, degree):
"""
Fit a polynomial of rang degree to data.
:param numpy.array xdata: X data
:param numpy.array ydata: Y data
:param int degree: Degree of the fitting polynomial
:rtype: numpy.array, float
:return: Fitting coefficients, coefficient of determination (R^2)
"""
coeffs = numpy.polyfit(xdata, ydata, degree)
# Get polynomial
polynomial = numpy.poly1d(coeffs)
# fit values, and mean
yhat = polynomial(xdata)
ybar = numpy.sum(ydata) / len(ydata)
ssreg = numpy.sum((yhat - ybar)**2)
sstot = numpy.sum((ydata - ybar)**2)
return coeffs, ssreg / sstot
[docs]def dbscan_cluster(objects, eps, key=None):
"""
Cluster the given objects by numbers obtained via the given key
function using a specified precision. Uses simple interface to
sklearn.cluster.DBSCAN (density based spatial clustering).
:type objects: list
:param objects: the objects to cluster
:type eps: float
:param eps: the precision that controls the size of the clusters,
for example if objects is [102, 104, 307, 309, 99, 919, 918] an
eps of 6 will produce 3 clusters, see sklearn.cluster.DBSCAN
documentation for more details
:type key: function or None
:param key: the function to get a number from the object, if None
is the identity function
:raise ValueError: if there is an issue
:rtype: dict
:return: clustered objects, keys are arbitrary cluster indices, values
are lists of objects for the given cluster
"""
if not key:
key = lambda x: x
# do not use a dict here in case objects is a list of numbers
# with degeneracies, for example [1.1, 1.1, 1.1, 4.5, 4.6, 8.9]
numbers = numpy.array([key(x) for x in objects]).reshape(-1, 1)
dist_matrix = metrics.pairwise_distances(numbers)
labels = cluster.DBSCAN(eps=eps, min_samples=1,
metric='precomputed').fit(dist_matrix).labels_
if -1 in labels:
raise ValueError('Clustering failed.')
groups = {}
for label, obj in zip(labels, objects):
groups.setdefault(label, []).append(obj)
return groups