/* * This implementation is extracted from numpy: * Repo: github.com/numpy/numpy * File: numpy/core/src/npymath/halffloat.c * Commit ID: 25c23f1d956104a072a95355ffaa7a38b53710b7 * Functions are made "static inline" for performance, and * non-conversion functions are removed, and generation of * exceptions is disabled. */ #include typedef uint16_t npy_uint16; typedef uint32_t npy_uint32; typedef uint64_t npy_uint64; /* * This chooses between 'ties to even' and 'ties away from zero'. */ #define NPY_HALF_ROUND_TIES_TO_EVEN 1 /* * If these are 1, the conversions try to trigger underflow, * overflow, and invalid exceptions in the FP system when needed. */ #define NPY_HALF_GENERATE_OVERFLOW 0 #define NPY_HALF_GENERATE_UNDERFLOW 0 #define NPY_HALF_GENERATE_INVALID 0 /* ******************************************************************** * BIT-LEVEL CONVERSIONS * ******************************************************************** */ static inline npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) { npy_uint32 f_exp, f_sig; npy_uint16 h_sgn, h_exp, h_sig; h_sgn = (npy_uint16) ((f&0x80000000u) >> 16); f_exp = (f&0x7f800000u); /* Exponent overflow/NaN converts to signed inf/NaN */ if (f_exp >= 0x47800000u) { if (f_exp == 0x7f800000u) { /* Inf or NaN */ f_sig = (f&0x007fffffu); if (f_sig != 0) { /* NaN - propagate the flag in the significand... */ npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13)); /* ...but make sure it stays a NaN */ if (ret == 0x7c00u) { ret++; } return h_sgn + ret; } else { /* signed inf */ return (npy_uint16) (h_sgn + 0x7c00u); } } else { /* overflow to signed inf */ #if NPY_HALF_GENERATE_OVERFLOW npy_set_floatstatus_overflow(); #endif return (npy_uint16) (h_sgn + 0x7c00u); } } /* Exponent underflow converts to a subnormal half or signed zero */ if (f_exp <= 0x38000000u) { /* * Signed zeros, subnormal floats, and floats with small * exponents all convert to signed zero halfs. */ if (f_exp < 0x33000000u) { #if NPY_HALF_GENERATE_UNDERFLOW /* If f != 0, it underflowed to 0 */ if ((f&0x7fffffff) != 0) { npy_set_floatstatus_underflow(); } #endif return h_sgn; } /* Make the subnormal significand */ f_exp >>= 23; f_sig = (0x00800000u + (f&0x007fffffu)); #if NPY_HALF_GENERATE_UNDERFLOW /* If it's not exactly represented, it underflowed */ if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) { npy_set_floatstatus_underflow(); } #endif f_sig >>= (113 - f_exp); /* Handle rounding by adding 1 to the bit beyond half precision */ #if NPY_HALF_ROUND_TIES_TO_EVEN /* * If the last bit in the half significand is 0 (already even), and * the remaining bit pattern is 1000...0, then we do not add one * to the bit after the half significand. In all other cases, we do. */ if ((f_sig&0x00003fffu) != 0x00001000u) { f_sig += 0x00001000u; } #else f_sig += 0x00001000u; #endif h_sig = (npy_uint16) (f_sig >> 13); /* * If the rounding causes a bit to spill into h_exp, it will * increment h_exp from zero to one and h_sig will be zero. * This is the correct result. */ return (npy_uint16) (h_sgn + h_sig); } /* Regular case with no overflow or underflow */ h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13); /* Handle rounding by adding 1 to the bit beyond half precision */ f_sig = (f&0x007fffffu); #if NPY_HALF_ROUND_TIES_TO_EVEN /* * If the last bit in the half significand is 0 (already even), and * the remaining bit pattern is 1000...0, then we do not add one * to the bit after the half significand. In all other cases, we do. */ if ((f_sig&0x00003fffu) != 0x00001000u) { f_sig += 0x00001000u; } #else f_sig += 0x00001000u; #endif h_sig = (npy_uint16) (f_sig >> 13); /* * If the rounding causes a bit to spill into h_exp, it will * increment h_exp by one and h_sig will be zero. This is the * correct result. h_exp may increment to 15, at greatest, in * which case the result overflows to a signed inf. */ #if NPY_HALF_GENERATE_OVERFLOW h_sig += h_exp; if (h_sig == 0x7c00u) { npy_set_floatstatus_overflow(); } return h_sgn + h_sig; #else return h_sgn + h_exp + h_sig; #endif } static inline npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) { npy_uint64 d_exp, d_sig; npy_uint16 h_sgn, h_exp, h_sig; h_sgn = (d&0x8000000000000000ULL) >> 48; d_exp = (d&0x7ff0000000000000ULL); /* Exponent overflow/NaN converts to signed inf/NaN */ if (d_exp >= 0x40f0000000000000ULL) { if (d_exp == 0x7ff0000000000000ULL) { /* Inf or NaN */ d_sig = (d&0x000fffffffffffffULL); if (d_sig != 0) { /* NaN - propagate the flag in the significand... */ npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42)); /* ...but make sure it stays a NaN */ if (ret == 0x7c00u) { ret++; } return h_sgn + ret; } else { /* signed inf */ return h_sgn + 0x7c00u; } } else { /* overflow to signed inf */ #if NPY_HALF_GENERATE_OVERFLOW npy_set_floatstatus_overflow(); #endif return h_sgn + 0x7c00u; } } /* Exponent underflow converts to subnormal half or signed zero */ if (d_exp <= 0x3f00000000000000ULL) { /* * Signed zeros, subnormal floats, and floats with small * exponents all convert to signed zero halfs. */ if (d_exp < 0x3e60000000000000ULL) { #if NPY_HALF_GENERATE_UNDERFLOW /* If d != 0, it underflowed to 0 */ if ((d&0x7fffffffffffffffULL) != 0) { npy_set_floatstatus_underflow(); } #endif return h_sgn; } /* Make the subnormal significand */ d_exp >>= 52; d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL)); #if NPY_HALF_GENERATE_UNDERFLOW /* If it's not exactly represented, it underflowed */ if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) { npy_set_floatstatus_underflow(); } #endif d_sig >>= (1009 - d_exp); /* Handle rounding by adding 1 to the bit beyond half precision */ #if NPY_HALF_ROUND_TIES_TO_EVEN /* * If the last bit in the half significand is 0 (already even), and * the remaining bit pattern is 1000...0, then we do not add one * to the bit after the half significand. In all other cases, we do. */ if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { d_sig += 0x0000020000000000ULL; } #else d_sig += 0x0000020000000000ULL; #endif h_sig = (npy_uint16) (d_sig >> 42); /* * If the rounding causes a bit to spill into h_exp, it will * increment h_exp from zero to one and h_sig will be zero. * This is the correct result. */ return h_sgn + h_sig; } /* Regular case with no overflow or underflow */ h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42); /* Handle rounding by adding 1 to the bit beyond half precision */ d_sig = (d&0x000fffffffffffffULL); #if NPY_HALF_ROUND_TIES_TO_EVEN /* * If the last bit in the half significand is 0 (already even), and * the remaining bit pattern is 1000...0, then we do not add one * to the bit after the half significand. In all other cases, we do. */ if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { d_sig += 0x0000020000000000ULL; } #else d_sig += 0x0000020000000000ULL; #endif h_sig = (npy_uint16) (d_sig >> 42); /* * If the rounding causes a bit to spill into h_exp, it will * increment h_exp by one and h_sig will be zero. This is the * correct result. h_exp may increment to 15, at greatest, in * which case the result overflows to a signed inf. */ #if NPY_HALF_GENERATE_OVERFLOW h_sig += h_exp; if (h_sig == 0x7c00u) { npy_set_floatstatus_overflow(); } return h_sgn + h_sig; #else return h_sgn + h_exp + h_sig; #endif } static inline npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) { npy_uint16 h_exp, h_sig; npy_uint32 f_sgn, f_exp, f_sig; h_exp = (h&0x7c00u); f_sgn = ((npy_uint32)h&0x8000u) << 16; switch (h_exp) { case 0x0000u: /* 0 or subnormal */ h_sig = (h&0x03ffu); /* Signed zero */ if (h_sig == 0) { return f_sgn; } /* Subnormal */ h_sig <<= 1; while ((h_sig&0x0400u) == 0) { h_sig <<= 1; h_exp++; } f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23; f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13; return f_sgn + f_exp + f_sig; case 0x7c00u: /* inf or NaN */ /* All-ones exponent and a copy of the significand */ return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13); default: /* normalized */ /* Just need to adjust the exponent and shift */ return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13); } } static inline npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) { npy_uint16 h_exp, h_sig; npy_uint64 d_sgn, d_exp, d_sig; h_exp = (h&0x7c00u); d_sgn = ((npy_uint64)h&0x8000u) << 48; switch (h_exp) { case 0x0000u: /* 0 or subnormal */ h_sig = (h&0x03ffu); /* Signed zero */ if (h_sig == 0) { return d_sgn; } /* Subnormal */ h_sig <<= 1; while ((h_sig&0x0400u) == 0) { h_sig <<= 1; h_exp++; } d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52; d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42; return d_sgn + d_exp + d_sig; case 0x7c00u: /* inf or NaN */ /* All-ones exponent and a copy of the significand */ return d_sgn + 0x7ff0000000000000ULL + (((npy_uint64)(h&0x03ffu)) << 42); default: /* normalized */ /* Just need to adjust the exponent and shift */ return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42); } }