// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. $assert BATCH_TILE >= 1 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" #include #include #include #include #include #include void xnn_f16_vsqrt_ukernel__fp16arith_sqrt_x${BATCH_TILE}( size_t batch, const void* input, void* output, const union xnn_f16_sqrt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_OOB_READS { assert(batch != 0); assert(batch % sizeof(float16_t) == 0); assert(input != NULL); assert(output != NULL); const float16_t* i = (const float16_t*) input; float16_t* o = (float16_t*) output; $if BATCH_TILE > 1: for (; batch >= ${BATCH_TILE} * sizeof(float16_t); batch -= ${BATCH_TILE} * sizeof(float16_t)) { $for N in range(BATCH_TILE): float16_t vacc${ABC[N]} = *i++; $for N in range(BATCH_TILE): vacc${ABC[N]} = vsqrth_f16(vacc${ABC[N]}); $for N in range(BATCH_TILE): *o++ = vacc${ABC[N]}; } if XNN_UNLIKELY(batch != 0) { $if BATCH_TILE > 2: do { float16_t vacc = *i++; vacc = vsqrth_f16(vacc); *o++ = vacc; batch -= sizeof(float16_t); } while (batch != 0); $else: float16_t vacc = *i; vacc = vsqrth_f16(vacc); *o = vacc; } $else: do { float16_t vacc = *i++; vacc = vsqrth_f16(vacc); *o++ = vacc; batch -= sizeof(float16_t); } while (batch != 0); }