Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from numbers import Number | |
from typing import Optional, Union | |
import numpy as np | |
from deepscreen.utils import get_logger | |
log = get_logger(__name__) | |
MOLARITY_TO_POTENCY = { | |
'p': lambda x: x, | |
'M': lambda x: -np.log10(x), | |
'mM': lambda x: -np.log10(x) + 3, | |
'μM': lambda x: -np.log10(x) + 6, | |
'uM': lambda x: -np.log10(x) + 6, # in case someone doesn't know how to type micromolar lol | |
'nM': lambda x: -np.log10(x) + 9, | |
'pM': lambda x: -np.log10(x) + 12, | |
'fM': lambda x: -np.log10(x) + 15, | |
} | |
# TODO rewrite for swifter.apply | |
def molar_to_p(labels, units): | |
assert units in MOLARITY_TO_POTENCY, f"Allowed units: {', '.join(MOLARITY_TO_POTENCY)}." | |
unit_converted_labels = [] | |
for label, unit in (labels, units): | |
unit_converted_labels.append(MOLARITY_TO_POTENCY[unit](label)) | |
labels = np.array(unit_converted_labels) | |
return labels | |
def label_discretize(labels, thresholds): | |
# if isinstance(threshold, Number): | |
# labels = np.where(labels < threshold, 1, 0) | |
# else: | |
# labels = np.where(labels < threshold[0], 1, np.where(labels > threshold[1], 0, np.nan)) | |
if isinstance(thresholds, Number): | |
labels = 1 - np.digitize(labels, [thresholds]) | |
else: | |
labels = np.digitize(labels, np.sort(thresholds)[::-1]) | |
return labels | |
def label_transform( | |
labels, | |
units: Optional[list[str]], | |
thresholds: Optional[Union[float, list[Number]]], | |
discard_intermediate: Optional[bool] | |
): | |
f"""Convert labels of all units to p scale (-log10[M]) and binarize them if specified. | |
:param labels: a sequence of labels, continuous or binary values | |
:type labels: array_like | |
:param units: a sequence of label units in {', '.join(MOLARITY_TO_POTENCY)} | |
:type units: array_like, optional | |
:param thresholds: discretization threshold(s) for affinity labels, in p scale (-log10[M]). | |
A single number maps affinities below it to 1 and otherwise to 0. | |
A tuple of two or more thresholds maps affinities to multiple discrete levels descendingly, assigning values | |
values below the lowest threshold to the highest level (e.g. 2) and values above the greatest threshold to 0 | |
:type thresholds: list, float, optional | |
:param discard_intermediate: whether to discard the intermediate (indeterminate) level if provided an odd | |
number of thresholds (>=3) | |
:type discard_intermediate: bool | |
:return: a numpy array of affinity labels in p scale (-log10[M]) or discrete labels | |
""" | |
# # Check if labels are already discrete (ignoring NAs). | |
# discrete = labels.dropna().isin([0, 1]).all() | |
# | |
# if discrete: | |
# assert discretize, "Cannot train a regression model with discrete labels." | |
# if thresholds: | |
# warn("Ignoring 'threshold' because 'Y' (labels) in the data table is already binary.") | |
# if units: | |
# warn("Ignoring 'units' because 'Y' (labels) in the data table is already binary.") | |
# labels = labels | |
if units: | |
labels = molar_to_p(labels, units) | |
if thresholds: | |
labels = label_discretize(labels, thresholds) | |
if discard_intermediate: | |
assert len(thresholds) % 2 == 1 and len(thresholds) >= 3, \ | |
"Must give an odd number of (at least 3) thresholds to discard the intermediate level." | |
intermediate_level = len(thresholds) // 2 | |
# Make the intermediate-level labels NaN (which will be filtered out later) | |
labels[labels == intermediate_level] = np.nan | |
# Reduce all levels above the intermediate level by 1 | |
labels[labels > intermediate_level] -= 1 | |
return labels | |