DeepSEQreen_fast_build

Running on CPU Upgrade

App Files Files Community

DeepSEQreen_fast_build / deepscreen /data /utils /label.py

libokj

Upload 110 files

c0ec7e6 about 1 year ago

raw

history blame

3.75 kB

	from numbers import Number
	from typing import Optional, Union

	import numpy as np

	from deepscreen.utils import get_logger

	log = get_logger(__name__)

	MOLARITY_TO_POTENCY = {
	'p': lambda x: x,
	'M': lambda x: -np.log10(x),
	'mM': lambda x: -np.log10(x) + 3,
	'μM': lambda x: -np.log10(x) + 6,
	'uM': lambda x: -np.log10(x) + 6, # in case someone doesn't know how to type micromolar lol
	'nM': lambda x: -np.log10(x) + 9,
	'pM': lambda x: -np.log10(x) + 12,
	'fM': lambda x: -np.log10(x) + 15,
	}


	# TODO rewrite for swifter.apply
	def molar_to_p(labels, units):
	assert units in MOLARITY_TO_POTENCY, f"Allowed units: {', '.join(MOLARITY_TO_POTENCY)}."

	unit_converted_labels = []
	for label, unit in (labels, units):
	unit_converted_labels.append(MOLARITY_TO_POTENCY[unit](label))
	labels = np.array(unit_converted_labels)

	return labels


	def label_discretize(labels, thresholds):
	# if isinstance(threshold, Number):
	# labels = np.where(labels < threshold, 1, 0)
	# else:
	# labels = np.where(labels < threshold[0], 1, np.where(labels > threshold[1], 0, np.nan))
	if isinstance(thresholds, Number):
	labels = 1 - np.digitize(labels, [thresholds])
	else:
	labels = np.digitize(labels, np.sort(thresholds)[::-1])

	return labels


	def label_transform(
	labels,
	units: Optional[list[str]],
	thresholds: Optional[Union[float, list[Number]]],
	discard_intermediate: Optional[bool]
	):
	f"""Convert labels of all units to p scale (-log10[M]) and binarize them if specified.
	:param labels: a sequence of labels, continuous or binary values
	:type labels: array_like
	:param units: a sequence of label units in {', '.join(MOLARITY_TO_POTENCY)}
	:type units: array_like, optional
	:param thresholds: discretization threshold(s) for affinity labels, in p scale (-log10[M]).
	A single number maps affinities below it to 1 and otherwise to 0.
	A tuple of two or more thresholds maps affinities to multiple discrete levels descendingly, assigning values
	values below the lowest threshold to the highest level (e.g. 2) and values above the greatest threshold to 0
	:type thresholds: list, float, optional
	:param discard_intermediate: whether to discard the intermediate (indeterminate) level if provided an odd
	number of thresholds (>=3)
	:type discard_intermediate: bool
	:return: a numpy array of affinity labels in p scale (-log10[M]) or discrete labels
	"""
	# # Check if labels are already discrete (ignoring NAs).
	# discrete = labels.dropna().isin([0, 1]).all()
	#
	# if discrete:
	# assert discretize, "Cannot train a regression model with discrete labels."
	# if thresholds:
	# warn("Ignoring 'threshold' because 'Y' (labels) in the data table is already binary.")
	# if units:
	# warn("Ignoring 'units' because 'Y' (labels) in the data table is already binary.")
	# labels = labels
	if units:
	labels = molar_to_p(labels, units)

	if thresholds:
	labels = label_discretize(labels, thresholds)
	if discard_intermediate:
	assert len(thresholds) % 2 == 1 and len(thresholds) >= 3, \
	"Must give an odd number of (at least 3) thresholds to discard the intermediate level."
	intermediate_level = len(thresholds) // 2
	# Make the intermediate-level labels NaN (which will be filtered out later)
	labels[labels == intermediate_level] = np.nan
	# Reduce all levels above the intermediate level by 1
	labels[labels > intermediate_level] -= 1

	return labels