// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #ifdef _WIN32 #include #else #include #endif #include #include #include #include #include #include static struct xnn_unary_elementwise_config f16_abs_config = {0}; static struct xnn_unary_elementwise_config f16_clamp_config = {0}; static struct xnn_unary_elementwise_config f16_elu_config = {0}; static struct xnn_unary_elementwise_config f16_hswish_config = {0}; static struct xnn_unary_elementwise_config f16_lrelu_config = {0}; static struct xnn_unary_elementwise_config f16_neg_config = {0}; static struct xnn_unary_elementwise_config f16_rndd_config = {0}; static struct xnn_unary_elementwise_config f16_rndne_config = {0}; static struct xnn_unary_elementwise_config f16_rndu_config = {0}; static struct xnn_unary_elementwise_config f16_rndz_config = {0}; static struct xnn_unary_elementwise_config f16_sigmoid_config = {0}; static struct xnn_unary_elementwise_config f16_sqr_config = {0}; static struct xnn_unary_elementwise_config f16_sqrt_config = {0}; static struct xnn_unary_elementwise_config f16_tanh_config = {0}; static struct xnn_unary_elementwise_config f16_to_f32_cvt_config = {0}; static struct xnn_unary_elementwise_config f32_abs_config = {0}; static struct xnn_unary_elementwise_config f32_clamp_config = {0}; static struct xnn_unary_elementwise_config f32_elu_config = {0}; static struct xnn_unary_elementwise_config f32_hswish_config = {0}; static struct xnn_unary_elementwise_config f32_lrelu_config = {0}; static struct xnn_unary_elementwise_config f32_neg_config = {0}; static struct xnn_unary_elementwise_config f32_relu_config = {0}; static struct xnn_unary_elementwise_config f32_rndd_config = {0}; static struct xnn_unary_elementwise_config f32_rndne_config = {0}; static struct xnn_unary_elementwise_config f32_rndu_config = {0}; static struct xnn_unary_elementwise_config f32_rndz_config = {0}; static struct xnn_unary_elementwise_config f32_sigmoid_config = {0}; static struct xnn_unary_elementwise_config f32_sqr_config = {0}; static struct xnn_unary_elementwise_config f32_sqrt_config = {0}; static struct xnn_unary_elementwise_config f32_tanh_config = {0}; static struct xnn_unary_elementwise_config f32_to_f16_cvt_config = {0}; static struct xnn_unary_elementwise_config f32_to_qs8_cvt_config = {0}; static struct xnn_unary_elementwise_config f32_to_qu8_cvt_config = {0}; static struct xnn_unary_elementwise_config qs8_cvt_config = {0}; static struct xnn_unary_elementwise_config qs8_lrelu_config = {0}; static struct xnn_unary_elementwise_config qs8_to_f32_cvt_config = {0}; static struct xnn_unary_elementwise_config qs16_to_qs8_cvt_config = {0}; static struct xnn_unary_elementwise_config qu8_cvt_config = {0}; static struct xnn_unary_elementwise_config qu8_lrelu_config = {0}; static struct xnn_unary_elementwise_config qu8_to_f32_cvt_config = {0}; static struct xnn_unary_elementwise_config s8_clamp_config = {0}; static struct xnn_unary_elementwise_config u8_clamp_config = {0}; static struct xnn_unary_elementwise_config xx_copy_config = {0}; #if XNN_PLATFORM_WINDOWS static INIT_ONCE init_guard_f16_abs = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_clamp = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_elu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_hswish = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_lrelu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_neg = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_rndd = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_rndne = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_rndu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_rndz = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_sigmoid = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_sqr = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_sqrt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_tanh = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f16_to_f32_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_abs = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_clamp = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_elu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_hswish = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_lrelu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_neg = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_relu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_rndd = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_rndne = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_rndu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_rndz = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_sigmoid = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_sqr = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_sqrt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_tanh = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_to_f16_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_to_qs8_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_f32_to_qu8_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qs8_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qs8_lrelu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qs8_to_f32_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qs16_to_qs8_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qu8_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qu8_lrelu = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_qu8_to_f32_cvt = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_s8_clamp = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_u8_clamp = INIT_ONCE_STATIC_INIT; static INIT_ONCE init_guard_xx_copy = INIT_ONCE_STATIC_INIT; #else static pthread_once_t init_guard_f16_abs = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_clamp = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_elu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_hswish = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_lrelu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_neg = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_rndd = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_rndne = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_rndu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_rndz = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_sigmoid = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_sqr = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_sqrt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_tanh = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f16_to_f32_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_abs = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_clamp = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_elu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_hswish = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_lrelu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_neg = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_relu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_rndd = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_rndne = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_rndu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_rndz = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_sigmoid = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_sqr = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_sqrt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_tanh = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_to_f16_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_to_qs8_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_f32_to_qu8_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qs8_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qs16_to_qs8_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qs8_lrelu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qs8_to_f32_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qu8_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qu8_lrelu = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_qu8_to_f32_cvt = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_s8_clamp = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_u8_clamp = PTHREAD_ONCE_INIT; static pthread_once_t init_guard_xx_copy = PTHREAD_ONCE_INIT; #endif static void init_f16_abs_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vabs_ukernel__neonfp16arith_x16; f16_abs_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vabs_ukernel__neonfp16arith_x16; f16_abs_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE f16_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vabs_ukernel__sse2_x16; f16_abs_config.init.f16_abs = xnn_init_f16_abs_sse_params; f16_abs_config.element_tile = 16; #endif } static void init_f16_clamp_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vclamp_ukernel__neonfp16arith_x16; f16_clamp_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params; f16_clamp_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vclamp_ukernel__neonfp16arith_x16; f16_clamp_config.init.f16_minmax = xnn_init_f16_minmax_fp16arith_params; f16_clamp_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vclamp_ukernel__f16c_x16; f16_clamp_config.init.f16_minmax = xnn_init_f16_minmax_avx_params; f16_clamp_config.element_tile = 16; } #endif } static void init_f16_elu_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16; f16_elu_config.init.f16_elu = xnn_init_f16_elu_fp16arith_rr1_p3_params; f16_elu_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_velu_ukernel__neonfp16arith_rr1_p3_x16; f16_elu_config.init.f16_elu = xnn_init_f16_elu_fp16arith_rr1_p3_params; f16_elu_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { f16_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_velu_ukernel__avx2_rr1_p3_x16; f16_elu_config.init.f16_elu = xnn_init_f16_elu_avx2_rr1_p3_params; f16_elu_config.element_tile = 16; } #endif } static void init_f16_hswish_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vhswish_ukernel__neonfp16arith_x16; f16_hswish_config.init.f16_hswish = xnn_init_f16_hswish_fp16arith_params; f16_hswish_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vhswish_ukernel__neonfp16arith_x16; f16_hswish_config.init.f16_hswish = xnn_init_f16_hswish_fp16arith_params; f16_hswish_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vhswish_ukernel__f16c_x16; f16_hswish_config.init.f16_hswish = xnn_init_f16_hswish_avx_params; f16_hswish_config.element_tile = 16; } #endif } static void init_f16_lrelu_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vlrelu_ukernel__neonfp16arith_x16; f16_lrelu_config.init.f16_lrelu = xnn_init_f16_lrelu_fp16arith_params; f16_lrelu_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vlrelu_ukernel__neonfp16arith_x16; f16_lrelu_config.init.f16_lrelu = xnn_init_f16_lrelu_fp16arith_params; f16_lrelu_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vlrelu_ukernel__f16c_x16; f16_lrelu_config.init.f16_lrelu = xnn_init_f16_lrelu_avx_params; f16_lrelu_config.element_tile = 16; } #endif } static void init_f16_neg_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vneg_ukernel__neonfp16arith_x16; f16_neg_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vneg_ukernel__neonfp16arith_x16; f16_neg_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE f16_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vneg_ukernel__sse2_x16; f16_neg_config.init.f16_neg = xnn_init_f16_neg_sse_params; f16_neg_config.element_tile = 16; #endif } static void init_f16_rndd_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndd_ukernel__neonfp16arith_x16; f16_rndd_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndd_ukernel__neonfp16arith_x16; f16_rndd_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndd_ukernel__f16c_x16; f16_rndd_config.element_tile = 16; } #endif } static void init_f16_rndne_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndne_ukernel__neonfp16arith_x16; f16_rndne_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndne_ukernel__neonfp16arith_x16; f16_rndne_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndne_ukernel__f16c_x16; f16_rndne_config.element_tile = 16; } #endif } static void init_f16_rndu_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndu_ukernel__neonfp16arith_x16; f16_rndu_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndu_ukernel__neonfp16arith_x16; f16_rndu_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndu_ukernel__f16c_x16; f16_rndu_config.element_tile = 16; } #endif } static void init_f16_rndz_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndz_ukernel__neonfp16arith_x16; f16_rndz_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndz_ukernel__neonfp16arith_x16; f16_rndz_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vrndz_ukernel__f16c_x16; f16_rndz_config.element_tile = 16; } #endif } static void init_f16_sigmoid_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x16; f16_sigmoid_config.init.f16_sigmoid = xnn_init_f16_sigmoid_fp16arith_rr2_p2_params; f16_sigmoid_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x40; f16_sigmoid_config.init.f16_sigmoid = xnn_init_f16_sigmoid_fp16arith_rr2_p2_params; f16_sigmoid_config.element_tile = 40; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { f16_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsigmoid_ukernel__avx2_rr1_p2_rcp_x32; f16_sigmoid_config.init.f16_sigmoid = xnn_init_f16_sigmoid_avx2_rr1_p2_params; f16_sigmoid_config.element_tile = 32; } #endif } static void init_f16_sqr_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqr_ukernel__neonfp16arith_x16; f16_sqr_config.element_tile = 16; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqr_ukernel__neonfp16arith_x16; f16_sqr_config.element_tile = 16; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqr_ukernel__f16c_x16; f16_sqr_config.element_tile = 16; } #endif } static void init_f16_sqrt_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqrt_ukernel__neonfp16arith_nr1fma1adj_x8; f16_sqrt_config.element_tile = 8; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqrt_ukernel__aarch64_neonfp16arith_sqrt_x8; f16_sqrt_config.element_tile = 8; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_f16c) { f16_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vsqrt_ukernel__f16c_sqrt_x8; f16_sqrt_config.element_tile = 8; } #endif } static void init_f16_tanh_config(void) { #if XNN_ARCH_ARM && XNN_ENABLE_ARM_FP16_VECTOR && XNN_ENABLE_ARM_FP16_SCALAR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vtanh_ukernel__neonfp16arith_expm1minus_rr1_p3h2ts_nr1fma_x32; f16_tanh_config.element_tile = 32; } #elif XNN_ARCH_ARM64 && XNN_ENABLE_ARM_FP16_VECTOR const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fp16_arith) { f16_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vtanh_ukernel__aarch64_neonfp16arith_expm1minus_rr1_p3h2ts_div_x32; f16_tanh_config.element_tile = 32; } #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_fma3) { f16_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vtanh_ukernel__fma3_polynomial_p19h9t2_x32; f16_tanh_config.init.f16_tanh = xnn_init_f16_tanh_avx_polynomial_p19h9t2_params; f16_tanh_config.element_tile = 32; } else if (hardware_config->use_x86_f16c) { f16_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_vtanh_ukernel__f16c_expm1minus_rr1_p3h2ts_rcp_x72; f16_tanh_config.init.f16_tanh = xnn_init_f16_tanh_avx_expm1minus_rr1_p3h2_params; f16_tanh_config.element_tile = 72; } #endif } static void init_f16_to_f32_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_fp16) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__neonfp16_x16; f16_to_f32_cvt_config.element_tile = 16; } else { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__neon_int16_x16; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_neon_params; f16_to_f32_cvt_config.element_tile = 16; } } else if (!XNN_PLATFORM_MOBILE) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_x4; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params; f16_to_f32_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__neonfp16_x16; f16_to_f32_cvt_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__avx512skx_x16; f16_to_f32_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_f16c) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__f16c_x16; f16_to_f32_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__avx_int16_x16; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params; f16_to_f32_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__sse41_int16_x16; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params; f16_to_f32_cvt_config.element_tile = 16; } else { f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__sse2_int16_x32; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_sse_int16_params; f16_to_f32_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__wasmrelaxedsimd_int16_x16; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_wasmsimd_int16_params; f16_to_f32_cvt_config.element_tile = 16; #else f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_wasmsimd_int16_params; f16_to_f32_cvt_config.element_tile = 16; #endif #elif XNN_ARCH_WASM f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_x1; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params; f16_to_f32_cvt_config.element_tile = 1; #elif XNN_ARCH_RISCV f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__scalar_x4; f16_to_f32_cvt_config.init.f16_f32_cvt = xnn_init_f16_f32_cvt_scalar_params; f16_to_f32_cvt_config.element_tile = 4; #endif } static void init_f32_abs_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__neon_x8; f32_abs_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__scalar_x4; f32_abs_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__neon_x8; f32_abs_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__avx512f_x16; f32_abs_config.init.f32_abs = xnn_init_f32_abs_avx512_params; f32_abs_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__avx_x16; f32_abs_config.init.f32_abs = xnn_init_f32_abs_avx_params; f32_abs_config.element_tile = 16; } else { f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__sse_x8; f32_abs_config.init.f32_abs = xnn_init_f32_abs_sse_params; f32_abs_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__wasmsimd_x8; f32_abs_config.init.f32_abs = xnn_init_f32_abs_wasmsimd_params; f32_abs_config.element_tile = 8; #elif XNN_ARCH_WASM f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__scalar_x4; f32_abs_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__scalar_x4; f32_abs_config.element_tile = 4; #endif } static void init_f32_clamp_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__neon_x8; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__scalar_x4; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__neon_x8; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__avx512f_x16; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__avx_x16; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_avx_params; f32_clamp_config.element_tile = 16; } else { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__sse_x8; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_sse_params; f32_clamp_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__wasmsimd_x86_x8; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params; f32_clamp_config.element_tile = 8; } else { f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__wasmsimd_arm_x8; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_wasmsimd_params; f32_clamp_config.element_tile = 8; } #elif XNN_ARCH_WASM f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__wasm_x4; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__scalar_x4; f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; f32_clamp_config.element_tile = 4; #endif } static void init_f32_elu_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_fma) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__neonfma_rr1_p6_x8; f32_elu_config.init.f32_elu = xnn_init_f32_elu_neonfma_rr1_p6_params; f32_elu_config.element_tile = 8; } else { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8; f32_elu_config.init.f32_elu = xnn_init_f32_elu_neon_rr2_lut16_p3_params; f32_elu_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4; f32_elu_config.init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params; f32_elu_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16; f32_elu_config.init.f32_elu = xnn_init_f32_elu_neonfma_rr1_lut16_p3_params; f32_elu_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64; f32_elu_config.init.f32_elu = xnn_init_f32_elu_avx512_rr1_lut16_p3_params; f32_elu_config.element_tile = 64; } else if (hardware_config->use_x86_avx2) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56; f32_elu_config.init.f32_elu = xnn_init_f32_elu_avx2_rr1_lut4_p4_params; f32_elu_config.element_tile = 56; } else if (hardware_config->use_x86_avx) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32; f32_elu_config.init.f32_elu = xnn_init_f32_elu_avx_rr2_lut4_p4_params; f32_elu_config.element_tile = 32; } else { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12; f32_elu_config.init.f32_elu = xnn_init_f32_elu_sse2_rr2_lut16_p3_params; f32_elu_config.element_tile = 12; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__wasmrelaxedsimd_fma_rr2_p6_x24; f32_elu_config.init.f32_elu = xnn_init_f32_elu_wasmsimd_rr2_p6_params; f32_elu_config.element_tile = 24; #else const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20; f32_elu_config.init.f32_elu = xnn_init_f32_elu_wasmsimd_rr2_p6_params; f32_elu_config.element_tile = 20; } else { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20; f32_elu_config.init.f32_elu = xnn_init_f32_elu_wasmsimd_rr2_p6_params; f32_elu_config.element_tile = 20; } #endif #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2; f32_elu_config.init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params; f32_elu_config.element_tile = 2; } else { f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__wasm_rr2_p6_x6; f32_elu_config.init.f32_elu = xnn_init_f32_elu_scalar_rr2_p6_params; f32_elu_config.element_tile = 6; } #elif XNN_ARCH_RISCV f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4; f32_elu_config.init.f32_elu = xnn_init_f32_elu_scalar_rr2_lut16_p3_params; f32_elu_config.element_tile = 4; #endif } static void init_f32_hswish_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__neon_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 16; } else if (!XNN_PLATFORM_MOBILE) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__scalar_x4; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__neon_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__avx512f_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_avx512_params; f32_hswish_config.element_tile = 16; } else if (hardware_config->use_x86_fma3) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__fma3_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_avx_params; f32_hswish_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__avx_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_avx_params; f32_hswish_config.element_tile = 16; } else { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__sse_x8; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_sse_params; f32_hswish_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__wasmsimd_x16; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_wasmsimd_params; f32_hswish_config.element_tile = 16; #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__scalar_x4; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 4; } else { f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__wasm_x4; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 4; } #elif XNN_ARCH_RISCV f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__scalar_x4; f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_scalar_params; f32_hswish_config.element_tile = 4; #endif } static void init_f32_lrelu_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__neon_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__scalar_x4; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__neon_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__avx512f_x16; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__avx_x16; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_avx_params; f32_lrelu_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__sse41_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_sse_params; f32_lrelu_config.element_tile = 8; } else { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__sse_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_sse_params; f32_lrelu_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); #if XNN_ARCH_WASMRELAXEDSIMD if (hardware_config->is_x86) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_iminmax_x4; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params; f32_lrelu_config.element_tile = 4; } else { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__wasmrelaxedsimd_laneselect_x4; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params; f32_lrelu_config.element_tile = 4; } #else if (hardware_config->is_x86) { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__wasmsimd_iminmax_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params; f32_lrelu_config.element_tile = 8; } else { f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__wasmsimd_laneselect_x8; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_wasmsimd_params; f32_lrelu_config.element_tile = 8; } #endif #elif XNN_ARCH_WASM f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__scalar_x4; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__scalar_x4; f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; f32_lrelu_config.element_tile = 4; #endif } static void init_f32_neg_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__neon_x8; f32_neg_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__scalar_x4; f32_neg_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__neon_x8; f32_neg_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__avx512f_x16; f32_neg_config.init.f32_neg = xnn_init_f32_neg_avx512_params; f32_neg_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__avx_x16; f32_neg_config.init.f32_neg = xnn_init_f32_neg_avx_params; f32_neg_config.element_tile = 16; } else { f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__sse_x8; f32_neg_config.init.f32_neg = xnn_init_f32_neg_sse_params; f32_neg_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__wasmsimd_x8; f32_neg_config.init.f32_neg = xnn_init_f32_neg_wasmsimd_params; f32_neg_config.element_tile = 8; #elif XNN_ARCH_WASM f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__scalar_x4; f32_neg_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__scalar_x4; f32_neg_config.element_tile = 4; #endif } static void init_f32_relu_config(void) { #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_relu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrelu_ukernel__wasmsimd_x16; f32_relu_config.element_tile = 16; #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_relu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrelu_ukernel__scalar_x8; #if XNN_ENABLE_JIT f32_relu_config.generator = xnn_generate_f32_vrelu_ukernel__jit_wasm32_shr; #endif f32_relu_config.element_tile = 8; } else { f32_relu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrelu_ukernel__wasm_x8; f32_relu_config.element_tile = 8; } #endif } static void init_f32_rndd_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__neonv8_x8; f32_rndd_config.element_tile = 8; } else { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__neon_x8; f32_rndd_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__scalar_libm_x1; f32_rndd_config.element_tile = 1; } #elif XNN_ARCH_ARM64 f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__neonv8_x8; f32_rndd_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__avx512f_x16; f32_rndd_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__avx_x16; f32_rndd_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; f32_rndd_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__sse41_x8; f32_rndd_config.element_tile = 8; } else { f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__sse2_x8; f32_rndd_config.init.f32_rnd = xnn_init_f32_rnd_sse2_params; f32_rndd_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__wasmsimd_x8; f32_rndd_config.element_tile = 8; #elif XNN_ARCH_WASM f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__scalar_libm_x4; f32_rndd_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__scalar_libm_x1; f32_rndd_config.element_tile = 1; #endif } static void init_f32_rndne_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__neonv8_x8; f32_rndne_config.element_tile = 8; } else { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__neon_x8; f32_rndne_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__scalar_libm_x1; f32_rndne_config.element_tile = 1; } #elif XNN_ARCH_ARM64 f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__neonv8_x8; f32_rndne_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__avx512f_x16; f32_rndne_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__avx_x16; f32_rndne_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; f32_rndne_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__sse41_x8; f32_rndne_config.element_tile = 8; } else { f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__sse2_x8; f32_rndne_config.init.f32_rnd = xnn_init_f32_rnd_sse2_params; f32_rndne_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__wasmsimd_x8; f32_rndne_config.element_tile = 8; #elif XNN_ARCH_WASM f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__scalar_libm_x4; f32_rndne_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__scalar_libm_x1; f32_rndne_config.element_tile = 1; #endif } static void init_f32_rndu_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__neonv8_x8; f32_rndu_config.element_tile = 8; } else { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__neon_x8; f32_rndu_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__scalar_libm_x1; f32_rndu_config.element_tile = 1; } #elif XNN_ARCH_ARM64 f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__neonv8_x8; f32_rndu_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__avx512f_x16; f32_rndu_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__avx_x16; f32_rndu_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; f32_rndu_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__sse41_x8; f32_rndu_config.element_tile = 8; } else { f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__sse2_x8; f32_rndu_config.init.f32_rnd = xnn_init_f32_rnd_sse2_params; f32_rndu_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__wasmsimd_x8; f32_rndu_config.element_tile = 8; #elif XNN_ARCH_WASM f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__scalar_libm_x4; f32_rndu_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__scalar_libm_x1; f32_rndu_config.element_tile = 1; #endif } static void init_f32_rndz_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__neonv8_x8; f32_rndz_config.element_tile = 8; } else { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__neon_x8; f32_rndz_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__scalar_libm_x1; f32_rndz_config.element_tile = 1; } #elif XNN_ARCH_ARM64 f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__neonv8_x8; f32_rndz_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__avx512f_x16; f32_rndz_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__avx_x16; f32_rndz_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; f32_rndz_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__sse41_x8; f32_rndz_config.element_tile = 8; } else { f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__sse2_x8; f32_rndz_config.init.f32_rnd = xnn_init_f32_rnd_sse2_params; f32_rndz_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__wasmsimd_x8; f32_rndz_config.element_tile = 8; #elif XNN_ARCH_WASM f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__scalar_libm_x4; f32_rndz_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__scalar_libm_x1; f32_rndz_config.element_tile = 1; #endif } static void init_f32_sigmoid_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_neon_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 2; } #elif XNN_ARCH_ARM64 f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_neonfma_rr1_lut64_p2_params; f32_sigmoid_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params; f32_sigmoid_config.element_tile = 64; } else if (hardware_config->use_x86_avx2) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_x40; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_avx2_rr1_p5_params; f32_sigmoid_config.element_tile = 40; } else if (hardware_config->use_x86_avx) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__avx_rr2_p5_nr2_x40; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_avx_rr2_p5_params; f32_sigmoid_config.element_tile = 40; } else if (hardware_config->use_x86_sse4_1) { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 8; } else { f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_sse2_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__wasmrelaxedsimd_fma_rr2_p5_div_x24; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params; f32_sigmoid_config.element_tile = 24; #else f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x16; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_wasmsimd_rr2_p5_params; f32_sigmoid_config.element_tile = 16; #endif #elif XNN_ARCH_WASM f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 2; #elif XNN_ARCH_RISCV f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__scalar_rr2_lut64_p2_div_x2; f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_scalar_rr2_lut64_p2_params; f32_sigmoid_config.element_tile = 2; #endif } static void init_f32_sqr_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__neon_x8; f32_sqr_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__scalar_x4; f32_sqr_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__neon_x8; f32_sqr_config.element_tile = 8; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__avx512f_x16; f32_sqr_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__avx_x16; f32_sqr_config.init.f32_default = xnn_init_f32_default_avx_params; f32_sqr_config.element_tile = 16; } else { f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__sse_x8; f32_sqr_config.element_tile = 8; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__wasmsimd_x8; f32_sqr_config.element_tile = 8; #elif XNN_ARCH_WASM f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__scalar_x4; f32_sqr_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__scalar_x4; f32_sqr_config.element_tile = 4; #endif } static void init_f32_sqrt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1; f32_sqrt_config.element_tile = 1; } else if (!XNN_PLATFORM_MOBILE) { f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1; f32_sqrt_config.element_tile = 1; } #elif XNN_ARCH_ARM64 f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__aarch64_neon_sqrt_x4; f32_sqrt_config.element_tile = 4; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx) { f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__avx_sqrt_x8; f32_sqrt_config.init.f32_sqrt = xnn_init_f32_sqrt_avx_params; f32_sqrt_config.element_tile = 8; } else { f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__sse_sqrt_x4; f32_sqrt_config.element_tile = 4; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8; f32_sqrt_config.element_tile = 8; #elif XNN_ARCH_WASM f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1; f32_sqrt_config.element_tile = 1; #elif XNN_ARCH_RISCV f32_sqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqrt_ukernel__scalar_sqrt_x1; f32_sqrt_config.element_tile = 1; #endif } static void init_f32_tanh_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_fma) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__neonfma_expm1minus_rr1_p6h5ts_nr2fma_x8; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_neon_expm1minus_rr1_p6h5_params; f32_tanh_config.element_tile = 8; } else if (hardware_config->use_arm_neon) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__neon_expm1minus_rr1_p6h5ts_nr2recps_x8; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_neon_expm1minus_rr1_p6h5_params; f32_tanh_config.element_tile = 8; } else if (!XNN_PLATFORM_MOBILE) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_lut8_p4h3ts_div_x4; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_scalar_expm1minus_rr1_lut8_p4h3_params; f32_tanh_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__aarch64_neonfma_expm1minus_rr1_p6h5ts_div_x16; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_neon_expm1minus_rr1_p6h5_params; f32_tanh_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_x64; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx512_expm1minus_rr1_lut4_p4h3_perm_params; f32_tanh_config.element_tile = 64; } else if (hardware_config->use_x86_avx2) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_x32; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h3_perm_params; f32_tanh_config.element_tile = 32; } else if (hardware_config->use_x86_fma3) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__fma3_expm1minus_rr1_lut4_p4h3ts_perm_div_x40; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h2_perm_params; f32_tanh_config.element_tile = 40; } else if (hardware_config->use_x86_avx) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__avx_expm1minus_rr1_lut4_p4h2ts_perm_div_x48; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h2_perm_params; f32_tanh_config.element_tile = 48; } else if (hardware_config->use_x86_sse4_1) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__sse41_expm1minus_rr1_lut8_p4h3ts_div_x20; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_sse_expm1minus_rr1_lut8_p4h3_params; f32_tanh_config.element_tile = 20; } else { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__sse2_expm1minus_rr1_lut8_p4h3ts_div_x16; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_sse_expm1minus_rr1_lut8_p4h3_params; f32_tanh_config.element_tile = 16; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_nabs_pmax_x16; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_wasmsimd_expm1minus_rr1_p6h5_nabs_params; f32_tanh_config.element_tile = 16; } else { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__wasmsimd_expm1minus_rr1_p6h5ts_div_abs_min_x16; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_wasmsimd_expm1minus_rr1_p6h5_abs_params; f32_tanh_config.element_tile = 16; } #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__scalar_expm1minus_rr1_lut8_p4h3ts_div_x4; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_scalar_expm1minus_rr1_lut8_p4h3_params; f32_tanh_config.element_tile = 4; } else { f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__wasm_expm1minus_rr1_p6h5ts_div_x4; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_scalar_expm1minus_rr1_p6h5_params; f32_tanh_config.element_tile = 4; } #elif XNN_ARCH_RISCV f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__fma_expm1minus_rr1_lut8_p4h3ts_div_x4; f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_scalar_expm1minus_rr1_lut8_p4h3_params; f32_tanh_config.element_tile = 4; #endif } static void init_f32_to_f16_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_fp16) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__neonfp16_x16; f32_to_f16_cvt_config.element_tile = 16; } else { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__neon_x8; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_neon_params; f32_to_f16_cvt_config.element_tile = 8; } } else if (!XNN_PLATFORM_MOBILE) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_fabsf_params; f32_to_f16_cvt_config.element_tile = 2; } #elif XNN_ARCH_ARM64 f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__neonfp16_x16; f32_to_f16_cvt_config.element_tile = 16; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__avx512skx_x16; f32_to_f16_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_f16c) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__f16c_x16; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_f16c_params; f32_to_f16_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__avx_x24; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params; f32_to_f16_cvt_config.element_tile = 24; } else if (hardware_config->use_x86_sse4_1) { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__sse41_x8; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params; f32_to_f16_cvt_config.element_tile = 8; } else { f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__sse2_x16; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_sse2_params; f32_to_f16_cvt_config.element_tile = 16; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__wasmrelaxedsimd_x24; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_wasmsimd_params; f32_to_f16_cvt_config.element_tile = 24; #else f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__wasmsimd_x24; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_wasmsimd_params; f32_to_f16_cvt_config.element_tile = 24; #endif #elif XNN_ARCH_WASM f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_bitcast_params; f32_to_f16_cvt_config.element_tile = 4; #elif XNN_ARCH_RISCV f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2; f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_scalar_fabsf_params; f32_to_f16_cvt_config.element_tile = 2; #endif } static void init_f32_to_qs8_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__neonv8_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neonv8_params; f32_to_qs8_cvt_config.element_tile = 32; } else { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__neon_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neon_params; f32_to_qs8_cvt_config.element_tile = 32; } } else if (!XNN_PLATFORM_MOBILE) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__scalar_imagic_x4; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_imagic_params; f32_to_qs8_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__neonv8_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_neonv8_params; f32_to_qs8_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__avx512skx_x128; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx512_params; f32_to_qs8_cvt_config.element_tile = 128; } else if (hardware_config->use_x86_avx2) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__avx2_x64; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx2_params; f32_to_qs8_cvt_config.element_tile = 64; } else if (hardware_config->use_x86_avx) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__avx_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx_params; f32_to_qs8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__sse41_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_sse4_params; f32_to_qs8_cvt_config.element_tile = 32; } else { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__sse2_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_sse2_params; f32_to_qs8_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__wasmsimd_magic_x32; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_wasmsimd_magic_params; f32_to_qs8_cvt_config.element_tile = 32; #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x1; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_imagic_params; f32_to_qs8_cvt_config.element_tile = 1; } else { f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__wasm_fmagic_x4; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_fmagic_params; f32_to_qs8_cvt_config.element_tile = 4; } #elif XNN_ARCH_RISCV f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__scalar_lrintf_x4; f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_scalar_lrintf_params; f32_to_qs8_cvt_config.element_tile = 4; #endif } static void init_f32_to_qu8_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { if (hardware_config->use_arm_neon_v8) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__neonv8_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neonv8_params; f32_to_qu8_cvt_config.element_tile = 32; } else { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__neon_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neon_params; f32_to_qu8_cvt_config.element_tile = 32; } } else if (!XNN_PLATFORM_MOBILE) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x4; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_imagic_params; f32_to_qu8_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__neonv8_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_neonv8_params; f32_to_qu8_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__avx512skx_x128; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx512_params; f32_to_qu8_cvt_config.element_tile = 128; } else if (hardware_config->use_x86_avx2) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__avx2_x64; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx2_params; f32_to_qu8_cvt_config.element_tile = 64; } else if (hardware_config->use_x86_avx) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__avx_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx_params; f32_to_qu8_cvt_config.element_tile = 32; } else { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__sse2_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_sse2_params; f32_to_qu8_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__wasmsimd_magic_x32; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_wasmsimd_magic_params; f32_to_qu8_cvt_config.element_tile = 32; #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__scalar_imagic_x1; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_imagic_params; f32_to_qu8_cvt_config.element_tile = 1; } else { f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__wasm_fmagic_x4; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_fmagic_params; f32_to_qu8_cvt_config.element_tile = 4; } #elif XNN_ARCH_RISCV f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__scalar_lrintf_x4; f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_scalar_lrintf_params; f32_to_qu8_cvt_config.element_tile = 4; #endif } static void init_qs8_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_v8) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__neon_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_neon_params; qs8_cvt_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__armsimd32_x8; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_armsimd32_params; qs8_cvt_config.element_tile = 8; } #elif XNN_ARCH_ARM64 qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__neon_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_neon_params; qs8_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__avx2_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_avx2_params; qs8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_avx) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__avx_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params; qs8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__sse41_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params; qs8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_ssse3) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__ssse3_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_ssse3_params; qs8_cvt_config.element_tile = 32; } else { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__sse2_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_sse2_params; qs8_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__wasmrelaxedsimd_x32; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_wasmsimd_params; qs8_cvt_config.element_tile = 32; #else qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__wasmsimd_x16; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_wasmsimd_params; qs8_cvt_config.element_tile = 16; #endif #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__scalar_x1; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_scalar_params; qs8_cvt_config.element_tile = 1; } else { qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__scalar_x4; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_scalar_params; qs8_cvt_config.element_tile = 4; } #elif XNN_ARCH_RISCV qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vcvt_ukernel__scalar_x4; qs8_cvt_config.init.qs8_cvt = xnn_init_qs8_cvt_scalar_params; qs8_cvt_config.element_tile = 4; #endif } static void init_qs16_to_qs8_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__asm_aarch32_neon_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_neon_params; qs16_to_qs8_cvt_config.element_tile = 16; } else if (!XNN_PLATFORM_MOBILE) { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__scalar_x4; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_scalar_params; qs16_to_qs8_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__neon_x32; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_neon_params; qs16_to_qs8_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx) { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__avx_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_sse4_params; qs16_to_qs8_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_sse4_1) { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__sse41_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_sse4_params; qs16_to_qs8_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_ssse3) { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__ssse3_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_ssse3_params; qs16_to_qs8_cvt_config.element_tile = 16; } else { qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__sse2_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_sse2_params; qs16_to_qs8_cvt_config.element_tile = 16; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__wasmsimd_x16; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_wasmsimd_params; qs16_to_qs8_cvt_config.element_tile = 16; #else qs16_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs16_qs8_vcvt_ukernel__scalar_x4; qs16_to_qs8_cvt_config.init.qs16_qs8_cvt = xnn_init_qs16_qs8_cvt_scalar_params; qs16_to_qs8_cvt_config.element_tile = 4; #endif } static void init_qs8_lrelu_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__neon_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_neon_params; qs8_lrelu_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__armsimd32_x4; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_armsimd32_params; qs8_lrelu_config.element_tile = 4; } #elif XNN_ARCH_ARM64 qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__neon_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_neon_params; qs8_lrelu_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__avx2_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_avx2_params; qs8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_avx) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__avx_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_avx_params; qs8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__sse41_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params; qs8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__ssse3_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params; qs8_lrelu_config.element_tile = 32; } else { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__sse2_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_sse2_params; qs8_lrelu_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); #if XNN_ARCH_WASMRELAXEDSIMD if (hardware_config->is_x86) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__wasmrelaxedsimd_x86_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_x86_params; qs8_lrelu_config.element_tile = 32; } else { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__wasmrelaxedsimd_arm_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_arm_params; qs8_lrelu_config.element_tile = 32; } #else if (hardware_config->is_x86) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__wasmsimd_x86_x16; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_x86_params; qs8_lrelu_config.element_tile = 16; } else { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__wasmsimd_arm_x32; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_wasmsimd_arm_params; qs8_lrelu_config.element_tile = 32; } #endif #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__scalar_select_x4; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_select_params; qs8_lrelu_config.element_tile = 4; } else { qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__scalar_andxor_x4; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_andxor_params; qs8_lrelu_config.element_tile = 4; } #elif XNN_ARCH_RISCV qs8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_vlrelu_ukernel__scalar_andxor_x4; qs8_lrelu_config.init.qs8_lrelu = xnn_init_qs8_lrelu_scalar_andxor_params; qs8_lrelu_config.element_tile = 4; #endif } static void init_qs8_to_f32_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__neon_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_neon_params; qs8_to_f32_cvt_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_x4; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params; qs8_to_f32_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__neon_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_neon_params; qs8_to_f32_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__avx512skx_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx512_params; qs8_to_f32_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_avx2) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__avx2_x16; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx_params; qs8_to_f32_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__avx_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx_params; qs8_to_f32_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__sse41_x16; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_sse4_params; qs8_to_f32_cvt_config.element_tile = 16; } else { qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__sse2_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_sse2_params; qs8_to_f32_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__wasmsimd_x32; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_wasmsimd_params; qs8_to_f32_cvt_config.element_tile = 32; #elif XNN_ARCH_WASM qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_x1; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params; qs8_to_f32_cvt_config.element_tile = 1; #elif XNN_ARCH_RISCV qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__scalar_x4; qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_scalar_params; qs8_to_f32_cvt_config.element_tile = 4; #endif } static void init_qu8_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon_v8) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__neon_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_neon_params; qu8_cvt_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__armsimd32_x8; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_armsimd32_params; qu8_cvt_config.element_tile = 8; } #elif XNN_ARCH_ARM64 qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__neon_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_neon_params; qu8_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__avx2_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_avx2_params; qu8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_avx) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__avx_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params; qu8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__sse41_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params; qu8_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_ssse3) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__ssse3_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_ssse3_params; qu8_cvt_config.element_tile = 32; } else { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__sse2_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_sse2_params; qu8_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD #if XNN_ARCH_WASMRELAXEDSIMD qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__wasmrelaxedsimd_x32; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_wasmsimd_params; qu8_cvt_config.element_tile = 32; #else qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__wasmsimd_x16; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_wasmsimd_params; qu8_cvt_config.element_tile = 16; #endif #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__scalar_x1; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_scalar_params; qu8_cvt_config.element_tile = 1; } else { qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__scalar_x4; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_scalar_params; qu8_cvt_config.element_tile = 4; } #elif XNN_ARCH_RISCV qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vcvt_ukernel__scalar_x4; qu8_cvt_config.init.qu8_cvt = xnn_init_qu8_cvt_scalar_params; qu8_cvt_config.element_tile = 4; #endif } static void init_qu8_lrelu_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__neon_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_neon_params; qu8_lrelu_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__armsimd32_x4; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_armsimd32_params; qu8_lrelu_config.element_tile = 4; } #elif XNN_ARCH_ARM64 qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__neon_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_neon_params; qu8_lrelu_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx2) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__avx2_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_avx2_params; qu8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_avx) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__avx_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_avx_params; qu8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__sse41_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params; qu8_lrelu_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__ssse3_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params; qu8_lrelu_config.element_tile = 32; } else { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__sse2_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_sse2_params; qu8_lrelu_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); #if XNN_ARCH_WASMRELAXEDSIMD if (hardware_config->is_x86) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__wasmrelaxedsimd_x86_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_x86_params; qu8_lrelu_config.element_tile = 32; } else { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__wasmrelaxedsimd_arm_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_arm_params; qu8_lrelu_config.element_tile = 32; } #else if (hardware_config->is_x86) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__wasmsimd_x86_x16; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_x86_params; qu8_lrelu_config.element_tile = 16; } else { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__wasmsimd_arm_x32; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_wasmsimd_arm_params; qu8_lrelu_config.element_tile = 32; } #endif #elif XNN_ARCH_WASM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->is_x86) { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__scalar_select_x4; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_select_params; qu8_lrelu_config.element_tile = 4; } else { qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__scalar_andxor_x4; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_andxor_params; qu8_lrelu_config.element_tile = 4; } #elif XNN_ARCH_RISCV qu8_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_vlrelu_ukernel__scalar_andxor_x4; qu8_lrelu_config.init.qu8_lrelu = xnn_init_qu8_lrelu_scalar_andxor_params; qu8_lrelu_config.element_tile = 4; #endif } static void init_qu8_to_f32_cvt_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__neon_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_neon_params; qu8_to_f32_cvt_config.element_tile = 32; } else if (!XNN_PLATFORM_MOBILE) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_x4; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params; qu8_to_f32_cvt_config.element_tile = 4; } #elif XNN_ARCH_ARM64 qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__neon_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_neon_params; qu8_to_f32_cvt_config.element_tile = 32; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_avx512skx) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__avx512skx_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx512_params; qu8_to_f32_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_avx2) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__avx2_x16; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx_params; qu8_to_f32_cvt_config.element_tile = 16; } else if (hardware_config->use_x86_avx) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__avx_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx_params; qu8_to_f32_cvt_config.element_tile = 32; } else if (hardware_config->use_x86_sse4_1) { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__sse41_x16; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_sse4_params; qu8_to_f32_cvt_config.element_tile = 16; } else { qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__sse2_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_sse2_params; qu8_to_f32_cvt_config.element_tile = 32; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__wasmsimd_x32; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_wasmsimd_params; qu8_to_f32_cvt_config.element_tile = 32; #elif XNN_ARCH_WASM qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_x1; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params; qu8_to_f32_cvt_config.element_tile = 1; #elif XNN_ARCH_RISCV qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__scalar_x4; qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_scalar_params; qu8_to_f32_cvt_config.element_tile = 4; #endif } static void init_s8_clamp_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__neon_x64; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_neon_params; s8_clamp_config.element_tile = 64; } else if (!XNN_PLATFORM_MOBILE) { s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__scalar_x4; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_scalar_params; s8_clamp_config.element_tile = 4; } #elif XNN_ARCH_ARM64 s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__neon_x64; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_neon_params; s8_clamp_config.element_tile = 64; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_x86_sse4_1) { s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__sse41_x64; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_sse4_params; s8_clamp_config.element_tile = 64; } else { s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__sse2_x64; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_sse2_params; s8_clamp_config.element_tile = 64; } #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__wasmsimd_x64; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_wasmsimd_params; s8_clamp_config.element_tile = 64; #elif XNN_ARCH_WASM s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__scalar_x4; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_scalar_params; s8_clamp_config.element_tile = 4; #elif XNN_ARCH_RISCV s8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_s8_vclamp_ukernel__scalar_x4; s8_clamp_config.init.s8_minmax = xnn_init_s8_minmax_scalar_params; s8_clamp_config.element_tile = 4; #endif } static void init_u8_clamp_config(void) { #if XNN_ARCH_ARM const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); assert(hardware_config != NULL); if (hardware_config->use_arm_neon) { u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__neon_x64; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_neon_params; u8_clamp_config.element_tile = 64; } else if (!XNN_PLATFORM_MOBILE) { u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__scalar_x4; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_scalar_params; u8_clamp_config.element_tile = 4; } #elif XNN_ARCH_ARM64 u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__neon_x64; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_neon_params; u8_clamp_config.element_tile = 64; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__sse2_x64; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_sse2_params; u8_clamp_config.element_tile = 64; #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__wasmsimd_x64; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_wasmsimd_params; u8_clamp_config.element_tile = 64; #elif XNN_ARCH_WASM u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__scalar_x4; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_scalar_params; u8_clamp_config.element_tile = 4; #elif XNN_ARCH_RISCV u8_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_u8_vclamp_ukernel__scalar_x4; u8_clamp_config.init.u8_minmax = xnn_init_u8_minmax_scalar_params; u8_clamp_config.element_tile = 4; #endif } static void init_xx_copy_config(void) { #if XNN_ARCH_ARM xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #elif XNN_ARCH_ARM64 xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #elif XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #elif XNN_ARCH_WASM xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #elif XNN_ARCH_RISCV xx_copy_config.ukernel = (xnn_vunary_ukernel_fn) xnn_xx_copy_ukernel__scalar_memcpy; xx_copy_config.element_tile = 1; #endif } #if XNN_PLATFORM_WINDOWS static BOOL CALLBACK init_f16_abs_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_abs_config(); return TRUE; } static BOOL CALLBACK init_f16_clamp_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_clamp_config(); return TRUE; } static BOOL CALLBACK init_f16_elu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_elu_config(); return TRUE; } static BOOL CALLBACK init_f16_hswish_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_hswish_config(); return TRUE; } static BOOL CALLBACK init_f16_lrelu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_lrelu_config(); return TRUE; } static BOOL CALLBACK init_f16_neg_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_neg_config(); return TRUE; } static BOOL CALLBACK init_f16_rndd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_rndd_config(); return TRUE; } static BOOL CALLBACK init_f16_rndne_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_rndne_config(); return TRUE; } static BOOL CALLBACK init_f16_rndu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_rndu_config(); return TRUE; } static BOOL CALLBACK init_f16_rndz_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_rndz_config(); return TRUE; } static BOOL CALLBACK init_f16_sigmoid_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_sigmoid_config(); return TRUE; } static BOOL CALLBACK init_f16_sqr_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_sqr_config(); return TRUE; } static BOOL CALLBACK init_f16_sqrt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_sqrt_config(); return TRUE; } static BOOL CALLBACK init_f16_tanh_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_tanh_config(); return TRUE; } static BOOL CALLBACK init_f32_abs_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_abs_config(); return TRUE; } static BOOL CALLBACK init_f32_clamp_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_clamp_config(); return TRUE; } static BOOL CALLBACK init_f32_elu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_elu_config(); return TRUE; } static BOOL CALLBACK init_f32_hswish_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_hswish_config(); return TRUE; } static BOOL CALLBACK init_f32_lrelu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_lrelu_config(); return TRUE; } static BOOL CALLBACK init_f32_neg_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_neg_config(); return TRUE; } static BOOL CALLBACK init_f32_relu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_relu_config(); return TRUE; } static BOOL CALLBACK init_f32_rndd_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_rndd_config(); return TRUE; } static BOOL CALLBACK init_f32_rndne_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_rndne_config(); return TRUE; } static BOOL CALLBACK init_f32_rndu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_rndu_config(); return TRUE; } static BOOL CALLBACK init_f32_rndz_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_rndz_config(); return TRUE; } static BOOL CALLBACK init_f32_sigmoid_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_sigmoid_config(); return TRUE; } static BOOL CALLBACK init_f32_sqr_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_sqr_config(); return TRUE; } static BOOL CALLBACK init_f32_sqrt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_sqrt_config(); return TRUE; } static BOOL CALLBACK init_f32_tanh_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_tanh_config(); return TRUE; } static BOOL CALLBACK init_qs8_lrelu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qs8_lrelu_config(); return TRUE; } static BOOL CALLBACK init_qu8_lrelu_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qu8_lrelu_config(); return TRUE; } static BOOL CALLBACK init_s8_clamp_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_s8_clamp_config(); return TRUE; } static BOOL CALLBACK init_u8_clamp_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_u8_clamp_config(); return TRUE; } static BOOL CALLBACK init_f16_to_f32_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f16_to_f32_cvt_config(); return TRUE; } static BOOL CALLBACK init_f32_to_f16_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_to_f16_cvt_config(); return TRUE; } static BOOL CALLBACK init_f32_to_qs8_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_to_qs8_cvt_config(); return TRUE; } static BOOL CALLBACK init_f32_to_qu8_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_f32_to_qu8_cvt_config(); return TRUE; } static BOOL CALLBACK init_qs8_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qs8_cvt_config(); return TRUE; } static BOOL CALLBACK init_qs8_to_f32_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qs8_to_f32_cvt_config(); return TRUE; } static BOOL CALLBACK init_qs16_to_qs8_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qs16_to_qs8_cvt_config(); return TRUE; } static BOOL CALLBACK init_qu8_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qu8_cvt_config(); return TRUE; } static BOOL CALLBACK init_qu8_to_f32_cvt_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_qu8_to_f32_cvt_config(); return TRUE; } static BOOL CALLBACK init_xx_copy_config_windows(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { init_xx_copy_config(); return TRUE; } #endif const struct xnn_unary_elementwise_config* xnn_init_f16_abs_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_abs, &init_f16_abs_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_abs, &init_f16_abs_config); #endif return &f16_abs_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_clamp_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_clamp, &init_f16_clamp_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_clamp, &init_f16_clamp_config); #endif return &f16_clamp_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_elu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_elu, &init_f16_elu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_elu, &init_f16_elu_config); #endif return &f16_elu_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_hswish_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_hswish, &init_f16_hswish_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_hswish, &init_f16_hswish_config); #endif return &f16_hswish_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_lrelu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_lrelu, &init_f16_lrelu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_lrelu, &init_f16_lrelu_config); #endif return &f16_lrelu_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_neg_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_neg, &init_f16_neg_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_neg, &init_f16_neg_config); #endif return &f16_neg_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_rndd_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_rndd, &init_f16_rndd_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_rndd, &init_f16_rndd_config); #endif return &f16_rndd_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_rndne_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_rndne, &init_f16_rndne_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_rndne, &init_f16_rndne_config); #endif return &f16_rndne_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_rndu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_rndu, &init_f16_rndu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_rndu, &init_f16_rndu_config); #endif return &f16_rndu_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_rndz_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_rndz, &init_f16_rndz_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_rndz, &init_f16_rndz_config); #endif return &f16_rndz_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_sigmoid_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_sigmoid, &init_f16_sigmoid_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_sigmoid, &init_f16_sigmoid_config); #endif return &f16_sigmoid_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_sqr_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_sqr, &init_f16_sqr_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_sqr, &init_f16_sqr_config); #endif return &f16_sqr_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_sqrt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_sqrt, &init_f16_sqrt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_sqrt, &init_f16_sqrt_config); #endif return &f16_sqrt_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_tanh_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL || !xnn_is_f16_compatible_config(hardware_config)) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_tanh, &init_f16_tanh_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_tanh, &init_f16_tanh_config); #endif return &f16_tanh_config; } const struct xnn_unary_elementwise_config* xnn_init_f16_to_f32_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f16_to_f32_cvt, &init_f16_to_f32_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f16_to_f32_cvt, &init_f16_to_f32_cvt_config); #endif return &f16_to_f32_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_abs_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_abs, &init_f32_abs_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_abs, &init_f32_abs_config); #endif return &f32_abs_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_clamp_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_clamp, &init_f32_clamp_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_clamp, &init_f32_clamp_config); #endif return &f32_clamp_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_elu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_elu, &init_f32_elu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_elu, &init_f32_elu_config); #endif return &f32_elu_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_hswish_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_hswish, &init_f32_hswish_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_hswish, &init_f32_hswish_config); #endif return &f32_hswish_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_lrelu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_lrelu, &init_f32_lrelu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_lrelu, &init_f32_lrelu_config); #endif return &f32_lrelu_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_neg_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_neg, &init_f32_neg_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_neg, &init_f32_neg_config); #endif return &f32_neg_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_relu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_relu, &init_f32_relu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_relu, &init_f32_relu_config); #endif return &f32_relu_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_rndd_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_rndd, &init_f32_rndd_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_rndd, &init_f32_rndd_config); #endif return &f32_rndd_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_rndne_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_rndne, &init_f32_rndne_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_rndne, &init_f32_rndne_config); #endif return &f32_rndne_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_rndu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_rndu, &init_f32_rndu_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_rndu, &init_f32_rndu_config); #endif return &f32_rndu_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_rndz_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_rndz, &init_f32_rndz_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_rndz, &init_f32_rndz_config); #endif return &f32_rndz_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_sigmoid_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_sigmoid, &init_f32_sigmoid_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_sigmoid, &init_f32_sigmoid_config); #endif return &f32_sigmoid_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_sqr_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_sqr, &init_f32_sqr_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_sqr, &init_f32_sqr_config); #endif return &f32_sqr_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_sqrt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_sqrt, &init_f32_sqrt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_sqrt, &init_f32_sqrt_config); #endif return &f32_sqrt_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_tanh_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_tanh, &init_f32_tanh_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_tanh, &init_f32_tanh_config); #endif return &f32_tanh_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_to_f16_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_to_f16_cvt, &init_f32_to_f16_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_to_f16_cvt, &init_f32_to_f16_cvt_config); #endif return &f32_to_f16_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_to_qs8_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_to_qs8_cvt, &init_f32_to_qs8_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_to_qs8_cvt, &init_f32_to_qs8_cvt_config); #endif return &f32_to_qs8_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_f32_to_qu8_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_f32_to_qu8_cvt, &init_f32_to_qu8_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_f32_to_qu8_cvt, &init_f32_to_qu8_cvt_config); #endif return &f32_to_qu8_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_qs8_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qs8_cvt, &init_qs8_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_qs8_cvt, &init_qs8_cvt_config); #endif return &qs8_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_qs16_to_qs8_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qs16_to_qs8_cvt, &init_qs16_to_qs8_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_qs16_to_qs8_cvt, &init_qs16_to_qs8_cvt_config); #endif return &qs16_to_qs8_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_qs8_lrelu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qs8_lrelu, &init_qs8_lrelu_config_windows, NULL, NULL); #else pthread_once(&init_guard_qs8_lrelu, &init_qs8_lrelu_config); #endif return &qs8_lrelu_config; } const struct xnn_unary_elementwise_config* xnn_init_qs8_to_f32_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qs8_to_f32_cvt, &init_qs8_to_f32_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_qs8_to_f32_cvt, &init_qs8_to_f32_cvt_config); #endif return &qs8_to_f32_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_qu8_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qu8_cvt, &init_qu8_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_qu8_cvt, &init_qu8_cvt_config); #endif return &qu8_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_qu8_lrelu_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qu8_lrelu, &init_qu8_lrelu_config_windows, NULL, NULL); #else pthread_once(&init_guard_qu8_lrelu, &init_qu8_lrelu_config); #endif return &qu8_lrelu_config; } const struct xnn_unary_elementwise_config* xnn_init_qu8_to_f32_cvt_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_qu8_to_f32_cvt, &init_qu8_to_f32_cvt_config_windows, NULL, NULL); #else pthread_once(&init_guard_qu8_to_f32_cvt, &init_qu8_to_f32_cvt_config); #endif return &qu8_to_f32_cvt_config; } const struct xnn_unary_elementwise_config* xnn_init_s8_clamp_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_s8_clamp, &init_s8_clamp_config_windows, NULL, NULL); #else pthread_once(&init_guard_s8_clamp, &init_s8_clamp_config); #endif return &s8_clamp_config; } const struct xnn_unary_elementwise_config* xnn_init_u8_clamp_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_u8_clamp, &init_u8_clamp_config_windows, NULL, NULL); #else pthread_once(&init_guard_u8_clamp, &init_u8_clamp_config); #endif return &u8_clamp_config; } const struct xnn_unary_elementwise_config* xnn_init_xx_copy_config() { const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); if (hardware_config == NULL) { return NULL; } #if XNN_PLATFORM_WINDOWS InitOnceExecuteOnce(&init_guard_xx_copy, &init_xx_copy_config_windows, NULL, NULL); #else pthread_once(&init_guard_xx_copy, &init_xx_copy_config); #endif return &xx_copy_config; }