|
#pragma once |
|
#include <c10/macros/Export.h> |
|
#include <c10/core/ScalarType.h> |
|
|
|
namespace at { |
|
namespace native { |
|
|
|
|
|
template <typename T> |
|
TORCH_API T quantize_val(double scale, int64_t zero_point, float value); |
|
|
|
|
|
template <typename T> |
|
T quantize_val_arm( |
|
const float scale, |
|
const int32_t zero_point, |
|
const float value); |
|
template <typename T, int precision = 8> |
|
void quantize_vec( |
|
double scale, |
|
int64_t zero_point, |
|
const float* src, |
|
T* dst, |
|
size_t count = 8); |
|
template <typename T> |
|
TORCH_API float dequantize_val(double scale, int64_t zero_point, T value); |
|
template <typename T> |
|
TORCH_API float dequantize_vec( |
|
double scale, |
|
int64_t zero_point, |
|
const T* src, |
|
float* dst, |
|
size_t count = 8); |
|
template <typename SRC_T, typename DST_T> |
|
TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src); |
|
|
|
|
|
|
|
|
|
template <typename DST_T> |
|
TORCH_API DST_T |
|
requantize_from_int(double multiplier, int64_t zero_point, int64_t src); |
|
|
|
int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax); |
|
|
|
} |
|
} |
|
|