// Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include void xnn_f32_gavgpool_cw_ukernel__scalar_x1( size_t elements, size_t channels, const float* input, float* output, const union xnn_f32_gavgpool_params params[restrict XNN_MIN_ELEMENTS(1)]) { assert(elements != 0); assert(elements % sizeof(float) == 0); assert(channels != 0); const float* i0 = input; const float vmultiplier = params->scalar.multiplier; const float voutput_max = params->scalar.output_max; const float voutput_min = params->scalar.output_min; while (channels != 0) { float vsum0 = 0.f; float vsum1 = 0.f; float vsum2 = 0.f; float vsum3 = 0.f; size_t n = elements; while (n >= 4 * sizeof(float)) { vsum0 += i0[0]; vsum1 += i0[1]; vsum2 += i0[2]; vsum3 += i0[3]; i0 += 4; n -= 4 * sizeof(float); } while (n != 0) { vsum0 += *i0++; n -= sizeof(float); } float vout = ( (vsum0 + vsum1) + (vsum2 + vsum3) ) * vmultiplier; vout = math_min_f32(vout, voutput_max); vout = math_max_f32(vout, voutput_min); *output++ = vout; channels -= 1; } }