JSX_TTS / torch /include /ATen /native /quantized /AffineQuantizerBase.h

Upload 5875 files

9dd3461 over 2 years ago

1.46 kB

	#pragma once
	#include <c10/macros/Export.h>
	#include <c10/core/ScalarType.h>

	namespace at {
	namespace native {

	// Quantize a float value into a uint value given scale and zero_point
	template <typename T>
	TORCH_API T quantize_val(double scale, int64_t zero_point, float value);
	// TODO combine this with quantize_val once the numerics for ARM are aligned
	// with it
	template <typename T>
	T quantize_val_arm(
	const float scale,
	const int32_t zero_point,
	const float value);
	template <typename T, int precision = 8>
	void quantize_vec(
	double scale,
	int64_t zero_point,
	const float* src,
	T* dst,
	size_t count = 8);
	template <typename T>
	TORCH_API float dequantize_val(double scale, int64_t zero_point, T value);
	template <typename T>
	TORCH_API float dequantize_vec(
	double scale,
	int64_t zero_point,
	const T* src,
	float* dst,
	size_t count = 8);
	template <typename SRC_T, typename DST_T>
	TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);

	// Given a multiplier and a zero_point, requantize int32_t computed values back
	// to quantized values. See comment above
	// make_per_tensor_affine_quantizer function for the usage of int64_t
	template <typename DST_T>
	TORCH_API DST_T
	requantize_from_int(double multiplier, int64_t zero_point, int64_t src);

	int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax);

	} // namespace native
	} // namespace at