// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. $assert SAMPLE_TILE >= 1 #include #include #include #include #include void xnn_cs16_fftr_ukernel__scalar_x${SAMPLE_TILE}( size_t samples, int16_t* data, const int16_t* twiddle) { assert(samples != 0); assert(samples % 2 == 0); assert(data != NULL); assert(twiddle != NULL); int16_t* dl = data; int16_t* dr = data + samples * 2; int32_t vdcr = (int32_t) dl[0]; int32_t vdci = (int32_t) dl[1]; vdcr = math_asr_s32(vdcr * 16383 + 16384, 15); vdci = math_asr_s32(vdci * 16383 + 16384, 15); dl[0] = vdcr + vdci; dl[1] = 0; dl += 2; dr[0] = vdcr - vdci; dr[1] = 0; samples >>= 1; $if SAMPLE_TILE > 1: for (; samples >= ${SAMPLE_TILE}; samples -= ${SAMPLE_TILE}) { dr -= ${SAMPLE_TILE} * 2; $for C in range(SAMPLE_TILE): int32_t vilr${C} = (int32_t) dl[${C * 2 + 0}]; int32_t vili${C} = (int32_t) dl[${C * 2 + 1}]; $for C in range(SAMPLE_TILE): int32_t virr${C} = (int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}]; int32_t viri${C} = (int32_t) dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}]; $for C in range(SAMPLE_TILE): const int32_t vtwr${C} = twiddle[${C * 2 + 0}]; const int32_t vtwi${C} = twiddle[${C * 2 + 1}]; twiddle += ${SAMPLE_TILE} * 2; $for C in range(SAMPLE_TILE): vilr${C} = math_asr_s32(vilr${C} * 16383 + 16384, 15); vili${C} = math_asr_s32(vili${C} * 16383 + 16384, 15); virr${C} = math_asr_s32(virr${C} * 16383 + 16384, 15); viri${C} = math_asr_s32(viri${C} * 16383 + 16384, 15); $for C in range(SAMPLE_TILE): const int32_t vacc1r${C} = vilr${C} + virr${C}; const int32_t vacc1i${C} = vili${C} - viri${C}; const int32_t vacc2r${C} = vilr${C} - virr${C}; const int32_t vacc2i${C} = vili${C} + viri${C}; $for C in range(SAMPLE_TILE): const int32_t vaccr${C} = math_asr_s32(vacc2r${C} * vtwr${C} - vacc2i${C} * vtwi${C} + 16384, 15); const int32_t vacci${C} = math_asr_s32(vacc2r${C} * vtwi${C} + vacc2i${C} * vtwr${C} + 16384, 15); $for C in range(SAMPLE_TILE): dl[${C * 2 + 0}] = math_asr_s32(vacc1r${C} + vaccr${C}, 1); dl[${C * 2 + 1}] = math_asr_s32(vacc1i${C} + vacci${C}, 1); $for C in range(SAMPLE_TILE): dr[${(SAMPLE_TILE - 1 - C) * 2 + 0}] = math_asr_s32(vacc1r${C} - vaccr${C}, 1); dr[${(SAMPLE_TILE - 1 - C) * 2 + 1}] = math_asr_s32(vacci${C} - vacc1i${C}, 1); dl += ${SAMPLE_TILE} * 2; } if XNN_UNLIKELY(samples != 0) { do { dr -= 2; int32_t vilr = (int32_t) dl[0]; int32_t vili = (int32_t) dl[1]; int32_t virr = (int32_t) dr[0]; int32_t viri = (int32_t) dr[1]; const int32_t vtwr = twiddle[0]; const int32_t vtwi = twiddle[1]; twiddle += 2; vilr = math_asr_s32(vilr * 16383 + 16384, 15); vili = math_asr_s32(vili * 16383 + 16384, 15); virr = math_asr_s32(virr * 16383 + 16384, 15); viri = math_asr_s32(viri * 16383 + 16384, 15); const int32_t vacc1r = vilr + virr; const int32_t vacc1i = vili - viri; const int32_t vacc2r = vilr - virr; const int32_t vacc2i = vili + viri; const int32_t vaccr = math_asr_s32(vacc2r * vtwr - vacc2i * vtwi + 16384, 15); const int32_t vacci = math_asr_s32(vacc2r * vtwi + vacc2i * vtwr + 16384, 15); dl[0] = math_asr_s32(vacc1r + vaccr, 1); dl[1] = math_asr_s32(vacc1i + vacci, 1); dr[0] = math_asr_s32(vacc1r - vaccr, 1); dr[1] = math_asr_s32(vacci - vacc1i, 1); dl += 2; } while (--samples != 0); } }