|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once |
|
|
|
#include <stdbool.h> |
|
#include <stddef.h> |
|
#include <stdint.h> |
|
|
|
#include <pthreadpool.h> |
|
|
|
#ifdef __cplusplus |
|
extern "C" { |
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
#define XNN_EXTRA_BYTES 16 |
|
|
|
|
|
#define XNN_MAX_TENSOR_DIMS 6 |
|
|
|
|
|
|
|
|
|
#define XNN_FLAG_SPARSE_INFERENCE 0x00000001 |
|
#define XNN_FLAG_HINT_SPARSE_INFERENCE XNN_FLAG_SPARSE_INFERENCE |
|
|
|
|
|
|
|
|
|
#define XNN_FLAG_FP16_INFERENCE 0x00000002 |
|
#define XNN_FLAG_HINT_FP16_INFERENCE XNN_FLAG_FP16_INFERENCE |
|
|
|
|
|
|
|
|
|
|
|
#define XNN_FLAG_FORCE_FP16_INFERENCE 0x00000004 |
|
|
|
|
|
#define XNN_FLAG_BASIC_PROFILING 0x00000008 |
|
|
|
|
|
#define XNN_FLAG_JIT 0x00000010 |
|
|
|
|
|
#define XNN_FLAG_DEPTHWISE_CONVOLUTION 0x00000001 |
|
|
|
|
|
#define XNN_FLAG_TRANSPOSE_WEIGHTS 0x00000001 |
|
|
|
|
|
#define XNN_FLAG_INPUT_NHWC 0x00000002 |
|
|
|
|
|
#define XNN_FLAG_TENSORFLOW_SAME_PADDING 0x00000004 |
|
|
|
|
|
#define XNN_FLAG_TRANSPOSE_B XNN_FLAG_TRANSPOSE_WEIGHTS |
|
|
|
|
|
#define XNN_FLAG_TRANSPOSE_A 0x00000002 |
|
|
|
|
|
#define XNN_FLAG_TENSORFLOW_RESHAPE_2D 0x00000004 |
|
|
|
|
|
#define XNN_FLAG_TENSORFLOW_LEGACY_MODE 0x00000004 |
|
|
|
|
|
#define XNN_FLAG_FP32_STATIC_WEIGHTS 0x00000008 |
|
|
|
|
|
#define XNN_FLAG_ALIGN_CORNERS 0x00000008 |
|
|
|
|
|
#define XNN_FLAG_YIELD_WORKERS 0x00000010 |
|
|
|
|
|
|
|
|
|
|
|
#define XNN_EXTRA_QUANTIZATION_PARAMS 8 |
|
|
|
struct xnn_dynamic_quantization_params { |
|
int32_t zero_point; |
|
float scale; |
|
}; |
|
|
|
|
|
enum xnn_status { |
|
|
|
xnn_status_success = 0, |
|
xnn_status_uninitialized = 1, |
|
xnn_status_invalid_parameter = 2, |
|
xnn_status_invalid_state = 3, |
|
xnn_status_unsupported_parameter = 4, |
|
xnn_status_unsupported_hardware = 5, |
|
xnn_status_out_of_memory = 6, |
|
}; |
|
|
|
struct xnn_allocator { |
|
|
|
void* context; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void* (*allocate)(void* context, size_t size); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void* (*reallocate)(void* context, void* pointer, size_t size); |
|
|
|
|
|
|
|
|
|
|
|
void (*deallocate)(void* context, void* pointer); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void* (*aligned_allocate)(void* context, size_t alignment, size_t size); |
|
|
|
|
|
|
|
|
|
|
|
void (*aligned_deallocate)(void* context, void* pointer); |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_initialize(const struct xnn_allocator* allocator); |
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_deinitialize(void); |
|
|
|
|
|
|
|
typedef struct xnn_subgraph* xnn_subgraph_t; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_create_subgraph( |
|
uint32_t external_value_ids, |
|
uint32_t flags, |
|
xnn_subgraph_t* subgraph_out); |
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_delete_subgraph( |
|
xnn_subgraph_t subgraph); |
|
|
|
#define XNN_VALUE_FLAG_EXTERNAL_INPUT 0x00000001 |
|
#define XNN_VALUE_FLAG_EXTERNAL_OUTPUT 0x00000002 |
|
#define XNN_VALUE_FLAG_PERSISTENT 0x00000004 |
|
|
|
#define XNN_INVALID_VALUE_ID UINT32_MAX |
|
|
|
|
|
enum xnn_datatype { |
|
|
|
xnn_datatype_invalid = 0, |
|
|
|
xnn_datatype_fp32 = 1, |
|
|
|
xnn_datatype_fp16 = 2, |
|
|
|
xnn_datatype_qint8 = 3, |
|
|
|
xnn_datatype_quint8 = 4, |
|
|
|
xnn_datatype_qint32 = 5, |
|
|
|
xnn_datatype_qcint8 = 6, |
|
|
|
xnn_datatype_qcint32 = 7, |
|
|
|
xnn_datatype_qcint4 = 8, |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_tensor_value( |
|
xnn_subgraph_t subgraph, |
|
enum xnn_datatype datatype, |
|
size_t num_dims, |
|
const size_t* dims, |
|
const void* data, |
|
uint32_t external_id, |
|
uint32_t flags, |
|
uint32_t* id_out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_quantized_tensor_value( |
|
xnn_subgraph_t subgraph, |
|
enum xnn_datatype datatype, |
|
int32_t zero_point, |
|
float scale, |
|
size_t num_dims, |
|
const size_t* dims, |
|
const void* data, |
|
uint32_t external_id, |
|
uint32_t flags, |
|
uint32_t* id_out); |
|
|
|
enum xnn_status xnn_define_channelwise_quantized_tensor_value( |
|
xnn_subgraph_t subgraph, |
|
enum xnn_datatype datatype, |
|
const float* scale, |
|
size_t num_dims, |
|
size_t channel_dim, |
|
const size_t* dims, |
|
const void* data, |
|
uint32_t external_id, |
|
uint32_t flags, |
|
uint32_t* id_out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_channelwise_quantized_tensor_value_v2( |
|
xnn_subgraph_t subgraph, |
|
enum xnn_datatype datatype, |
|
int32_t zero_point, |
|
const float* scale, |
|
size_t num_dims, |
|
size_t channel_dim, |
|
const size_t* dims, |
|
const void* data, |
|
uint32_t external_id, |
|
uint32_t flags, |
|
uint32_t* id_out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_convert( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_convolution_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t filter_id, |
|
uint32_t bias_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_deconvolution_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t padding_top, |
|
uint32_t padding_right, |
|
uint32_t padding_bottom, |
|
uint32_t padding_left, |
|
uint32_t adjustment_height, |
|
uint32_t adjustment_width, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t upsampling_height, |
|
uint32_t upsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t filter_id, |
|
uint32_t bias_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_depthwise_convolution_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t depth_multiplier, |
|
size_t input_channels, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t filter_id, |
|
uint32_t bias_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_depth_to_space( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t block_size, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_global_average_pooling_1d( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_global_average_pooling_2d( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_global_sum_pooling_1d( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_global_sum_pooling_2d( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_average_pooling_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_fully_connected( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t filter_id, |
|
uint32_t bias_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_fully_connected_sparse( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t filter_id, |
|
uint32_t bias_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_max_pooling_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_argmax_pooling_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t input_id, |
|
uint32_t output_value_id, |
|
uint32_t output_index_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_unpooling_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t padding_top, |
|
uint32_t padding_right, |
|
uint32_t padding_bottom, |
|
uint32_t padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t input_value_id, |
|
uint32_t input_index_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_add2( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_multiply2( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_subtract( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_divide( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_maximum2( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_minimum2( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_squared_difference( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_constant_pad( |
|
xnn_subgraph_t subgraph, |
|
const size_t* pre_paddings, |
|
const size_t* post_paddings, |
|
float padding_value, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_mean( |
|
xnn_subgraph_t subgraph, |
|
size_t num_reduction_axes, |
|
const size_t* reduction_axes, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_concatenate2( |
|
xnn_subgraph_t subgraph, |
|
size_t axis, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_concatenate3( |
|
xnn_subgraph_t subgraph, |
|
size_t axis, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t input3_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_concatenate4( |
|
xnn_subgraph_t subgraph, |
|
size_t axis, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t input3_id, |
|
uint32_t input4_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_copy( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_even_split2( |
|
xnn_subgraph_t subgraph, |
|
size_t split_dim, |
|
uint32_t input_id, |
|
uint32_t output1_id, |
|
uint32_t output2_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_even_split3( |
|
xnn_subgraph_t subgraph, |
|
size_t split_dim, |
|
uint32_t input_id, |
|
uint32_t output1_id, |
|
uint32_t output2_id, |
|
uint32_t output3_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_even_split4( |
|
xnn_subgraph_t subgraph, |
|
size_t split_dim, |
|
uint32_t input_id, |
|
uint32_t output1_id, |
|
uint32_t output2_id, |
|
uint32_t output3_id, |
|
uint32_t output4_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_reshape( |
|
xnn_subgraph_t subgraph, |
|
size_t num_dims, |
|
const size_t* new_shape, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_resize_bilinear_2d( |
|
xnn_subgraph_t subgraph, |
|
size_t new_height, |
|
size_t new_width, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_prelu( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t slope_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_rope( |
|
xnn_subgraph_t subgraph, |
|
size_t max_sequence_size, |
|
uint32_t input_id, |
|
uint32_t weights_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_abs( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_bankers_rounding( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_batch_matrix_multiply( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input1_id, |
|
uint32_t input2_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_ceiling( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_clamp( |
|
xnn_subgraph_t subgraph, |
|
float output_min, |
|
float output_max, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_elu( |
|
xnn_subgraph_t subgraph, |
|
float alpha, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_floor( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_hardswish( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_leaky_relu( |
|
xnn_subgraph_t subgraph, |
|
float negative_slope, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_negate( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_sigmoid( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_softmax( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_space_to_depth_2d( |
|
xnn_subgraph_t subgraph, |
|
uint32_t block_size, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_square( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_square_root( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_slice( |
|
xnn_subgraph_t subgraph, |
|
size_t num_dims, |
|
const size_t* offsets, |
|
const size_t* sizes, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_static_transpose( |
|
xnn_subgraph_t subgraph, |
|
size_t num_dims, |
|
const size_t* perm, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_define_tanh( |
|
xnn_subgraph_t subgraph, |
|
uint32_t input_id, |
|
uint32_t output_id, |
|
uint32_t flags); |
|
|
|
|
|
typedef struct xnn_code_cache* xnn_code_cache_t; |
|
|
|
|
|
typedef struct xnn_weights_cache* xnn_weights_cache_t; |
|
|
|
enum xnn_status xnn_create_weights_cache(xnn_weights_cache_t* weights_cache_out); |
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_create_weights_cache_with_size(size_t size, xnn_weights_cache_t* weights_cache_out); |
|
|
|
|
|
|
|
enum xnn_weights_cache_finalization_kind { |
|
|
|
|
|
|
|
xnn_weights_cache_finalization_kind_hard, |
|
|
|
|
|
xnn_weights_cache_finalization_kind_soft, |
|
}; |
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_finalize_weights_cache( |
|
xnn_weights_cache_t weights_cache, |
|
enum xnn_weights_cache_finalization_kind finalization_kind); |
|
|
|
|
|
|
|
enum xnn_status xnn_delete_weights_cache(xnn_weights_cache_t weights_cache); |
|
|
|
typedef struct xnn_workspace* xnn_workspace_t; |
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_create_workspace(xnn_workspace_t* workspace_out); |
|
|
|
|
|
|
|
enum xnn_status xnn_release_workspace(xnn_workspace_t workspace); |
|
|
|
|
|
typedef struct xnn_runtime* xnn_runtime_t; |
|
|
|
enum xnn_profile_info { |
|
|
|
xnn_profile_info_num_operators, |
|
|
|
xnn_profile_info_operator_name, |
|
|
|
xnn_profile_info_operator_timing, |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_get_runtime_profiling_info(xnn_runtime_t runtime, |
|
enum xnn_profile_info param_name, |
|
size_t param_value_size, |
|
void* param_value, |
|
size_t* param_value_size_ret); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_create_runtime_v4( |
|
xnn_subgraph_t subgraph, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_workspace_t workspace, |
|
pthreadpool_t threadpool, |
|
uint32_t flags, |
|
xnn_runtime_t* runtime_out); |
|
|
|
enum xnn_status xnn_create_runtime_v3( |
|
xnn_subgraph_t subgraph, |
|
xnn_weights_cache_t weights_cache, |
|
pthreadpool_t threadpool, |
|
uint32_t flags, |
|
xnn_runtime_t* runtime_out); |
|
|
|
enum xnn_status xnn_create_runtime_v2( |
|
xnn_subgraph_t subgraph, |
|
pthreadpool_t threadpool, |
|
uint32_t flags, |
|
xnn_runtime_t* runtime_out); |
|
|
|
enum xnn_status xnn_create_runtime( |
|
xnn_subgraph_t subgraph, |
|
xnn_runtime_t* runtime_out); |
|
|
|
struct xnn_external_value { |
|
uint32_t id; |
|
void* data; |
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_setup_runtime( |
|
xnn_runtime_t runtime, |
|
size_t num_external_values, |
|
const struct xnn_external_value* external_values); |
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_invoke_runtime( |
|
xnn_runtime_t runtime); |
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_delete_runtime( |
|
xnn_runtime_t runtime); |
|
|
|
typedef struct xnn_operator* xnn_operator_t; |
|
|
|
enum xnn_status xnn_run_operator( |
|
xnn_operator_t op, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_delete_operator( |
|
xnn_operator_t op); |
|
|
|
enum xnn_status xnn_create_abs_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* abs_op_out); |
|
|
|
enum xnn_status xnn_reshape_abs_nc_f32( |
|
xnn_operator_t abs_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_abs_nc_f32( |
|
xnn_operator_t abs_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_abs_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_add_nd_f32( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* add_op_out); |
|
|
|
enum xnn_status xnn_reshape_add_nd_f32( |
|
xnn_operator_t add_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_add_nd_f32( |
|
xnn_operator_t add_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_add_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_argmax_pooling2d_nhwc_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* argmax_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_argmax_pooling2d_nhwc_f32( |
|
xnn_operator_t argmax_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_argmax_pooling2d_nhwc_f32( |
|
xnn_operator_t argmax_pooling_op, |
|
const float* input, |
|
float* output, |
|
uint32_t* index); |
|
|
|
enum xnn_status xnn_create_average_pooling2d_nhwc_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_average_pooling2d_nhwc_f32( |
|
xnn_operator_t average_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_average_pooling2d_nhwc_f32( |
|
xnn_operator_t average_pooling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_bankers_rounding_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* rounding_op_out); |
|
|
|
enum xnn_status xnn_reshape_bankers_rounding_nc_f32( |
|
xnn_operator_t rounding_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_bankers_rounding_nc_f32( |
|
xnn_operator_t rounding_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_bankers_rounding_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_batch_matrix_multiply_nc_f32( |
|
uint32_t flags, |
|
xnn_operator_t* batch_matrix_multiply_op); |
|
|
|
enum xnn_status xnn_reshape_batch_matrix_multiply_nc_f32( |
|
xnn_operator_t batch_matrix_multiply_op, |
|
size_t batch_size, |
|
size_t m, |
|
size_t k, |
|
size_t n, |
|
size_t* workspace_size, |
|
size_t* workspace_alignment, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_batch_matrix_multiply_nc_f32( |
|
xnn_operator_t batch_matrix_multiply_op, |
|
void* workspace, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_ceiling_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* ceiling_op_out); |
|
|
|
enum xnn_status xnn_run_ceiling_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_reshape_ceiling_nc_f32( |
|
xnn_operator_t ceiling_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_ceiling_nc_f32( |
|
xnn_operator_t ceiling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_clamp_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* clamp_op_out); |
|
|
|
enum xnn_status xnn_reshape_clamp_nc_f32( |
|
xnn_operator_t clamp_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_clamp_nc_f32( |
|
xnn_operator_t clamp_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_clamp_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convolution2d_nhwc_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
const float* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
|
|
struct xnn_post_operation; |
|
|
|
|
|
|
|
|
|
|
|
|
|
enum xnn_status xnn_create_fused_convolution2d_nhwc_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
const float* kernel, |
|
const float* bias, |
|
size_t num_post_operations, |
|
struct xnn_post_operation* post_operations, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nhwc_f32( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nhwc_f32( |
|
xnn_operator_t convolution_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_deconvolution2d_nhwc_f32( |
|
uint32_t output_padding_top, |
|
uint32_t output_padding_right, |
|
uint32_t output_padding_bottom, |
|
uint32_t output_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
const float* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* deconvolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_deconvolution2d_nhwc_f32( |
|
xnn_operator_t deconvolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
uint32_t adjustment_height, |
|
uint32_t adjustment_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_deconvolution2d_nhwc_f32( |
|
xnn_operator_t deconvolution_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_divide_nd_f32( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* divide_op_out); |
|
|
|
enum xnn_status xnn_reshape_divide_nd_f32( |
|
xnn_operator_t divide_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_divide_nd_f32( |
|
xnn_operator_t divide_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_divide_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_dynamic_fully_connected_nc_f32( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* dynamic_fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f32( |
|
xnn_operator_t dynamic_fully_connected_op, |
|
size_t batch_size, |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t* workspace_size, |
|
size_t* workspace_alignment, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_dynamic_fully_connected_nc_f32( |
|
xnn_operator_t dynamic_fully_connected_op, |
|
void* workspace, |
|
const float* input, |
|
const float* kernel, |
|
const float* bias, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_elu_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float alpha, |
|
uint32_t flags, |
|
xnn_operator_t* elu_op_out); |
|
|
|
enum xnn_status xnn_reshape_elu_nc_f32( |
|
xnn_operator_t elu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_elu_nc_f32( |
|
xnn_operator_t elu_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_elu_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
float alpha, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_floor_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* floor_op_out); |
|
|
|
enum xnn_status xnn_reshape_floor_nc_f32( |
|
xnn_operator_t floor_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_floor_nc_f32( |
|
xnn_operator_t floor_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_floor_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_qd8_f32_qc8w( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const float* kernel_scale, |
|
const int8_t* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_qd8_f32_qc8w( |
|
xnn_operator_t fully_connected_op, |
|
const int8_t* input, |
|
float* output, |
|
const struct xnn_dynamic_quantization_params* quantization_params); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_qd8_f32_qc8w( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_f32( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const float* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_f32( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_f32( |
|
xnn_operator_t fully_connected_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_f32_qc4w( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const float* kernel_scale, |
|
const uint8_t* kernel, |
|
uint8_t kernel_zero_point, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc4w( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_f32_qc4w( |
|
xnn_operator_t fully_connected_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_f32_qc8w( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const float* kernel_scale, |
|
const int8_t* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_f32_qc8w( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_f32_qc8w( |
|
xnn_operator_t fully_connected_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_nwc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_nwc_f32( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_nwc_f32( |
|
xnn_operator_t global_average_pooling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_global_sum_pooling_nwc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_sum_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_sum_pooling_nwc_f32( |
|
xnn_operator_t global_sum_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_sum_pooling_nwc_f32( |
|
xnn_operator_t global_sum_pooling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_hardswish_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* hardswish_op_out); |
|
|
|
enum xnn_status xnn_reshape_hardswish_nc_f32( |
|
xnn_operator_t hardswish_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_hardswish_nc_f32( |
|
xnn_operator_t hardswish_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_hardswish_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_leaky_relu_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float negative_slope, |
|
uint32_t flags, |
|
xnn_operator_t* leaky_relu_op_out); |
|
|
|
enum xnn_status xnn_reshape_leaky_relu_nc_f32( |
|
xnn_operator_t leaky_relu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_leaky_relu_nc_f32( |
|
xnn_operator_t leaky_relu_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_leaky_relu_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
float negative_slope, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_max_pooling2d_nhwc_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* max_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_max_pooling2d_nhwc_f32( |
|
xnn_operator_t max_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_max_pooling2d_nhwc_f32( |
|
xnn_operator_t max_pooling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_maximum_nd_f32( |
|
uint32_t flags, |
|
xnn_operator_t* maximum_op_out); |
|
|
|
enum xnn_status xnn_reshape_maximum_nd_f32( |
|
xnn_operator_t maximum_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_maximum_nd_f32( |
|
xnn_operator_t maximum_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_maximum_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_mean_nd_f32( |
|
uint32_t flags, |
|
xnn_operator_t* mean_op_out); |
|
|
|
enum xnn_status xnn_reshape_mean_nd_f32( |
|
xnn_operator_t mean_op, |
|
size_t num_reduction_axes, |
|
const size_t* reduction_axes, |
|
size_t num_input_dims, |
|
const size_t* input_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_mean_nd_f32( |
|
xnn_operator_t mean_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_minimum_nd_f32( |
|
uint32_t flags, |
|
xnn_operator_t* minimum_op_out); |
|
|
|
enum xnn_status xnn_reshape_minimum_nd_f32( |
|
xnn_operator_t minimum_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_minimum_nd_f32( |
|
xnn_operator_t minimum_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_minimum_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_multiply_nd_f32( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* multiply_op_out); |
|
|
|
enum xnn_status xnn_reshape_multiply_nd_f32( |
|
xnn_operator_t multiply_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_multiply_nd_f32( |
|
xnn_operator_t multiply_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_multiply_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_negate_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* negate_op_out); |
|
|
|
enum xnn_status xnn_reshape_negate_nc_f32( |
|
xnn_operator_t negate_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_negate_nc_f32( |
|
xnn_operator_t negate_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_negate_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_prelu_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const float* negative_slope, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* prelu_op_out); |
|
|
|
enum xnn_status xnn_reshape_prelu_nc_f32( |
|
xnn_operator_t prelu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_prelu_nc_f32( |
|
xnn_operator_t prelu_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f32( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32( |
|
xnn_operator_t resize_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_rope_nthc_f32( |
|
size_t max_sequence_size, |
|
size_t channels, |
|
const float* weights, |
|
uint32_t flags, |
|
xnn_operator_t* rope_op_out); |
|
|
|
enum xnn_status xnn_reshape_rope_nthc_f32( |
|
xnn_operator_t rope_op, |
|
size_t batch_size, |
|
size_t sequence_size, |
|
size_t heads, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_rope_nthc_f32( |
|
xnn_operator_t rope_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_sigmoid_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* sigmoid_op_out); |
|
|
|
enum xnn_status xnn_reshape_sigmoid_nc_f32( |
|
xnn_operator_t sigmoid_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_sigmoid_nc_f32( |
|
xnn_operator_t sigmoid_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_sigmoid_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_softmax_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* softmax_op_out); |
|
|
|
enum xnn_status xnn_reshape_softmax_nc_f32( |
|
xnn_operator_t softmax_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_softmax_nc_f32( |
|
xnn_operator_t softmax_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_square_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* square_op_out); |
|
|
|
enum xnn_status xnn_reshape_square_nc_f32( |
|
xnn_operator_t square_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_square_nc_f32( |
|
xnn_operator_t square_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_square_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_square_root_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* sqrt_op_out); |
|
|
|
enum xnn_status xnn_reshape_square_root_nc_f32( |
|
xnn_operator_t sqrt_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_square_root_nc_f32( |
|
xnn_operator_t sqrt_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_square_root_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_squared_difference_nd_f32( |
|
uint32_t flags, |
|
xnn_operator_t* squared_difference_op_out); |
|
|
|
enum xnn_status xnn_reshape_squared_difference_nd_f32( |
|
xnn_operator_t squared_difference_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_squared_difference_nd_f32( |
|
xnn_operator_t squared_difference_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_squared_difference_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_subtract_nd_f32( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* subtract_op_out); |
|
|
|
enum xnn_status xnn_reshape_subtract_nd_f32( |
|
xnn_operator_t subtract_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_subtract_nd_f32( |
|
xnn_operator_t subtract_op, |
|
const float* input1, |
|
const float* input2, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_subtract_nd_f32( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
const float* input1, |
|
const float* input2, |
|
float* output, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_tanh_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* tanh_op_out); |
|
|
|
enum xnn_status xnn_reshape_tanh_nc_f32( |
|
xnn_operator_t tanh_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_tanh_nc_f32( |
|
xnn_operator_t tanh_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_tanh_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_truncation_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* truncation_op_out); |
|
|
|
enum xnn_status xnn_reshape_truncation_nc_f32( |
|
xnn_operator_t truncation_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_truncation_nc_f32( |
|
xnn_operator_t truncation_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_truncation_nc_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x32( |
|
size_t output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* depth_to_space_op_out); |
|
|
|
enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x32( |
|
xnn_operator_t depth_to_space_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x32( |
|
xnn_operator_t depth_to_space_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_convolution2d_nchw_f32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
const float* kernel, |
|
const float* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nchw_f32( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nchw_f32( |
|
xnn_operator_t convolution_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_ncw_f32( |
|
size_t channels, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_ncw_f32( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_ncw_f32( |
|
xnn_operator_t global_average_pooling_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nchw_f32( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f32( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nchw_f32( |
|
xnn_operator_t resize_op, |
|
const float* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_create_channel_shuffle_nc_x32( |
|
size_t groups, |
|
size_t group_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* channel_shuffle_op_out); |
|
|
|
enum xnn_status xnn_reshape_channel_shuffle_nc_x32( |
|
xnn_operator_t channel_shuffle_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_channel_shuffle_nc_x32( |
|
xnn_operator_t channel_shuffle_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_constant_pad_nd_x32( |
|
const void* padding_value, |
|
uint32_t flags, |
|
xnn_operator_t* constant_pad_op_out); |
|
|
|
enum xnn_status xnn_reshape_constant_pad_nd_x32( |
|
xnn_operator_t constant_pad_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_padding, |
|
const size_t* post_padding, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_constant_pad_nd_x32( |
|
xnn_operator_t constant_pad_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_constant_pad_nd_x32( |
|
uint32_t flags, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_paddings, |
|
const size_t* post_paddings, |
|
const void* input, |
|
void* output, |
|
const void* padding_value, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_copy_nc_x32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* copy_op_out); |
|
|
|
enum xnn_status xnn_reshape_copy_nc_x32( |
|
xnn_operator_t copy_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_copy_nc_x32( |
|
xnn_operator_t copy_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_copy_nc_x32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const uint32_t* input, |
|
uint32_t* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_depth_to_space_nhwc_x32( |
|
size_t output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* depth_to_space_op_out); |
|
|
|
enum xnn_status xnn_reshape_depth_to_space_nhwc_x32( |
|
xnn_operator_t depth_to_space_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_depth_to_space_nhwc_x32( |
|
xnn_operator_t depth_to_space_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_slice_nd_x32( |
|
uint32_t flags, |
|
xnn_operator_t* slice_op_out); |
|
|
|
enum xnn_status xnn_reshape_slice_nd_x32( |
|
xnn_operator_t slice_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* offsets, |
|
const size_t* sizes, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_slice_nd_x32( |
|
xnn_operator_t slice_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_slice_nd_x32( |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* offsets, |
|
const size_t* sizes, |
|
const void* input, |
|
void* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_space_to_depth_nhwc_x32( |
|
size_t input_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* space_to_depth_op_out); |
|
|
|
enum xnn_status xnn_reshape_space_to_depth_nhwc_x32( |
|
xnn_operator_t space_to_depth_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_space_to_depth_nhwc_x32( |
|
xnn_operator_t space_to_depth_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_transpose_nd_x32( |
|
uint32_t flags, |
|
xnn_operator_t* transpose_op_out); |
|
|
|
enum xnn_status xnn_reshape_transpose_nd_x32( |
|
xnn_operator_t transpose_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_transpose_nd_x32( |
|
xnn_operator_t transpose_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_transpose_nd_x32( |
|
const void* input, |
|
void* output, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_unpooling2d_nhwc_x32( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* unpooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_unpooling2d_nhwc_x32( |
|
xnn_operator_t unpooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_unpooling2d_nhwc_x32( |
|
xnn_operator_t unpooling_op, |
|
const void* input, |
|
const uint32_t* index, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_abs_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* abs_op_out); |
|
|
|
enum xnn_status xnn_reshape_abs_nc_f16( |
|
xnn_operator_t abs_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_abs_nc_f16( |
|
xnn_operator_t abs_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_add_nd_f16( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* add_op_out); |
|
|
|
enum xnn_status xnn_reshape_add_nd_f16( |
|
xnn_operator_t add_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_add_nd_f16( |
|
xnn_operator_t add_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_average_pooling2d_nhwc_f16( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_average_pooling2d_nhwc_f16( |
|
xnn_operator_t average_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_average_pooling2d_nhwc_f16( |
|
xnn_operator_t average_pooling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_bankers_rounding_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* rounding_op_out); |
|
|
|
enum xnn_status xnn_reshape_bankers_rounding_nc_f16( |
|
xnn_operator_t rounding_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_bankers_rounding_nc_f16( |
|
xnn_operator_t rounding_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_ceiling_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* ceiling_op_out); |
|
|
|
enum xnn_status xnn_reshape_ceiling_nc_f16( |
|
xnn_operator_t ceiling_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_ceiling_nc_f16( |
|
xnn_operator_t ceiling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_clamp_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* clamp_op_out); |
|
|
|
enum xnn_status xnn_reshape_clamp_nc_f16( |
|
xnn_operator_t clamp_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_clamp_nc_f16( |
|
xnn_operator_t clamp_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_convolution2d_nhwc_f16( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
const void* kernel, |
|
const void* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nhwc_f16( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nhwc_f16( |
|
xnn_operator_t convolution_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_deconvolution2d_nhwc_f16( |
|
uint32_t output_padding_top, |
|
uint32_t output_padding_right, |
|
uint32_t output_padding_bottom, |
|
uint32_t output_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
const void* kernel, |
|
const void* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* deconvolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_deconvolution2d_nhwc_f16( |
|
xnn_operator_t deconvolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
uint32_t adjustment_height, |
|
uint32_t adjustment_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_deconvolution2d_nhwc_f16( |
|
xnn_operator_t deconvolution_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_divide_nd_f16( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* divide_op_out); |
|
|
|
enum xnn_status xnn_reshape_divide_nd_f16( |
|
xnn_operator_t divide_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_divide_nd_f16( |
|
xnn_operator_t divide_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_dynamic_fully_connected_nc_f16( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* dynamic_fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_dynamic_fully_connected_nc_f16( |
|
xnn_operator_t dynamic_fully_connected_op, |
|
size_t batch_size, |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t* workspace_size, |
|
size_t* workspace_alignment, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_dynamic_fully_connected_nc_f16( |
|
xnn_operator_t dynamic_fully_connected_op, |
|
void* workspace, |
|
const void* input, |
|
const void* kernel, |
|
const void* bias, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_elu_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float alpha, |
|
uint32_t flags, |
|
xnn_operator_t* elu_op_out); |
|
|
|
enum xnn_status xnn_reshape_elu_nc_f16( |
|
xnn_operator_t elu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_elu_nc_f16( |
|
xnn_operator_t elu_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_floor_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* floor_op_out); |
|
|
|
enum xnn_status xnn_reshape_floor_nc_f16( |
|
xnn_operator_t floor_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_floor_nc_f16( |
|
xnn_operator_t floor_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_f16( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const void* kernel, |
|
const void* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_f16( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_f16( |
|
xnn_operator_t fully_connected_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_nwc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_nwc_f16( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_nwc_f16( |
|
xnn_operator_t global_average_pooling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_global_sum_pooling_nwc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_sum_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_sum_pooling_nwc_f16( |
|
xnn_operator_t global_sum_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_sum_pooling_nwc_f16( |
|
xnn_operator_t global_sum_pooling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_hardswish_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* hardswish_op_out); |
|
|
|
enum xnn_status xnn_reshape_hardswish_nc_f16( |
|
xnn_operator_t hardswish_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_hardswish_nc_f16( |
|
xnn_operator_t hardswish_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_leaky_relu_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float negative_slope, |
|
uint32_t flags, |
|
xnn_operator_t* leaky_relu_op_out); |
|
|
|
enum xnn_status xnn_reshape_leaky_relu_nc_f16( |
|
xnn_operator_t leaky_relu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_leaky_relu_nc_f16( |
|
xnn_operator_t leaky_relu_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_max_pooling2d_nhwc_f16( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* max_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_max_pooling2d_nhwc_f16( |
|
xnn_operator_t max_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_max_pooling2d_nhwc_f16( |
|
xnn_operator_t max_pooling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_maximum_nd_f16( |
|
uint32_t flags, |
|
xnn_operator_t* maximum_op_out); |
|
|
|
enum xnn_status xnn_reshape_maximum_nd_f16( |
|
xnn_operator_t maximum_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_maximum_nd_f16( |
|
xnn_operator_t maximum_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_mean_nd_f16( |
|
uint32_t flags, |
|
xnn_operator_t* mean_op_out); |
|
|
|
enum xnn_status xnn_reshape_mean_nd_f16( |
|
xnn_operator_t mean_op, |
|
size_t num_reduction_axes, |
|
const size_t* reduction_axes, |
|
size_t num_input_dims, |
|
const size_t* input_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_mean_nd_f16( |
|
xnn_operator_t mean_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_minimum_nd_f16( |
|
uint32_t flags, |
|
xnn_operator_t* minimum_op_out); |
|
|
|
enum xnn_status xnn_reshape_minimum_nd_f16( |
|
xnn_operator_t minimum_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_minimum_nd_f16( |
|
xnn_operator_t minimum_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_multiply_nd_f16( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* multiply_op_out); |
|
|
|
enum xnn_status xnn_reshape_multiply_nd_f16( |
|
xnn_operator_t multiply_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_multiply_nd_f16( |
|
xnn_operator_t multiply_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_negate_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* negate_op_out); |
|
|
|
enum xnn_status xnn_reshape_negate_nc_f16( |
|
xnn_operator_t negate_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_negate_nc_f16( |
|
xnn_operator_t negate_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_prelu_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
const void* negative_slope, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* prelu_op_out); |
|
|
|
enum xnn_status xnn_reshape_prelu_nc_f16( |
|
xnn_operator_t prelu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_prelu_nc_f16( |
|
xnn_operator_t prelu_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nhwc_f16( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_f16( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f16( |
|
xnn_operator_t resize_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_sigmoid_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* sigmoid_op_out); |
|
|
|
enum xnn_status xnn_reshape_sigmoid_nc_f16( |
|
xnn_operator_t sigmoid_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_sigmoid_nc_f16( |
|
xnn_operator_t sigmoid_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_softmax_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* softmax_op_out); |
|
|
|
enum xnn_status xnn_reshape_softmax_nc_f16( |
|
xnn_operator_t softmax_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_softmax_nc_f16( |
|
xnn_operator_t softmax_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_square_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* square_op_out); |
|
|
|
enum xnn_status xnn_reshape_square_nc_f16( |
|
xnn_operator_t square_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_square_nc_f16( |
|
xnn_operator_t square_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_square_root_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* sqrt_op_out); |
|
|
|
enum xnn_status xnn_reshape_square_root_nc_f16( |
|
xnn_operator_t sqrt_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_square_root_nc_f16( |
|
xnn_operator_t sqrt_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_squared_difference_nd_f16( |
|
uint32_t flags, |
|
xnn_operator_t* squared_difference_op_out); |
|
|
|
enum xnn_status xnn_reshape_squared_difference_nd_f16( |
|
xnn_operator_t squared_difference_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_squared_difference_nd_f16( |
|
xnn_operator_t squared_difference_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_subtract_nd_f16( |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* subtract_op_out); |
|
|
|
enum xnn_status xnn_reshape_subtract_nd_f16( |
|
xnn_operator_t subtract_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_subtract_nd_f16( |
|
xnn_operator_t subtract_op, |
|
const void* input1, |
|
const void* input2, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_tanh_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* tanh_op_out); |
|
|
|
enum xnn_status xnn_reshape_tanh_nc_f16( |
|
xnn_operator_t tanh_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_tanh_nc_f16( |
|
xnn_operator_t tanh_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_truncation_nc_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* truncation_op_out); |
|
|
|
enum xnn_status xnn_reshape_truncation_nc_f16( |
|
xnn_operator_t truncation_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_truncation_nc_f16( |
|
xnn_operator_t truncation_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_convolution2d_nchw_f16( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
const void* kernel, |
|
const void* bias, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nchw_f16( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nchw_f16( |
|
xnn_operator_t convolution_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_depth_to_space_nchw2nhwc_x16( |
|
size_t output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* depth_to_space_op_out); |
|
|
|
enum xnn_status xnn_reshape_depth_to_space_nchw2nhwc_x16( |
|
xnn_operator_t depth_to_space_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_depth_to_space_nchw2nhwc_x16( |
|
xnn_operator_t depth_to_space_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_ncw_f16( |
|
size_t channels, |
|
float output_min, |
|
float output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_ncw_f16( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_ncw_f16( |
|
xnn_operator_t global_average_pooling_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nchw_f16( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nchw_f16( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nchw_f16( |
|
xnn_operator_t resize_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_constant_pad_nd_x16( |
|
const void* padding_value, |
|
uint32_t flags, |
|
xnn_operator_t* constant_pad_op_out); |
|
|
|
enum xnn_status xnn_reshape_constant_pad_nd_x16( |
|
xnn_operator_t constant_pad_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_padding, |
|
const size_t* post_padding, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_constant_pad_nd_x16( |
|
xnn_operator_t constant_pad_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_constant_pad_nd_x16( |
|
uint32_t flags, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_paddings, |
|
const size_t* post_paddings, |
|
const void* input, |
|
void* output, |
|
const void* padding_value, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_copy_nc_x16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* copy_op_out); |
|
|
|
enum xnn_status xnn_reshape_copy_nc_x16( |
|
xnn_operator_t copy_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_copy_nc_x16( |
|
xnn_operator_t copy_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_depth_to_space_nhwc_x16( |
|
size_t output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* depth_to_space_op_out); |
|
|
|
enum xnn_status xnn_reshape_depth_to_space_nhwc_x16( |
|
xnn_operator_t depth_to_space_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_depth_to_space_nhwc_x16( |
|
xnn_operator_t depth_to_space_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_slice_nd_x16( |
|
uint32_t flags, |
|
xnn_operator_t* slice_op_out); |
|
|
|
enum xnn_status xnn_reshape_slice_nd_x16( |
|
xnn_operator_t slice_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* offsets, |
|
const size_t* sizes, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_slice_nd_x16( |
|
xnn_operator_t slice_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_space_to_depth_nhwc_x16( |
|
size_t input_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* space_to_depth_op_out); |
|
|
|
enum xnn_status xnn_reshape_space_to_depth_nhwc_x16( |
|
xnn_operator_t space_to_depth_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_space_to_depth_nhwc_x16( |
|
xnn_operator_t space_to_depth_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_transpose_nd_x16( |
|
uint32_t flags, |
|
xnn_operator_t* transpose_op_out); |
|
|
|
enum xnn_status xnn_reshape_transpose_nd_x16( |
|
xnn_operator_t transpose_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_transpose_nd_x16( |
|
xnn_operator_t transpose_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_transpose_nd_x16( |
|
const void* input, |
|
void* output, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convolution2d_nhwc_qs8_qc8w( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
const float* kernel_scale, |
|
const int8_t* kernel, |
|
const int32_t* bias, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nhwc_qs8_qc8w( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nhwc_qs8_qc8w( |
|
xnn_operator_t convolution_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_add_nd_qs8( |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* add_op_out); |
|
|
|
enum xnn_status xnn_reshape_add_nd_qs8( |
|
xnn_operator_t add_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_add_nd_qs8( |
|
xnn_operator_t add_op, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_run_add_nd_qs8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convolution2d_nhwc_qs8( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
float kernel_scale, |
|
const int8_t* kernel, |
|
const int32_t* bias, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nhwc_qs8( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nhwc_qs8( |
|
xnn_operator_t convolution_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_deconvolution2d_nhwc_qs8( |
|
uint32_t output_padding_top, |
|
uint32_t output_padding_right, |
|
uint32_t output_padding_bottom, |
|
uint32_t output_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
float kernel_scale, |
|
const int8_t* kernel, |
|
const int32_t* bias, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* deconvolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qs8( |
|
xnn_operator_t deconvolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
uint32_t adjustment_height, |
|
uint32_t adjustment_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_deconvolution2d_nhwc_qs8( |
|
xnn_operator_t deconvolution_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_elu_nc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float alpha, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* elu_op_out); |
|
|
|
enum xnn_status xnn_reshape_elu_nc_qs8( |
|
xnn_operator_t elu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_elu_nc_qs8( |
|
xnn_operator_t elu_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_qs8( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
float kernel_scale, |
|
const int8_t* kernel, |
|
const int32_t* bias, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_qs8( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_qs8( |
|
xnn_operator_t fully_connected_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_nwc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_nwc_qs8( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_nwc_qs8( |
|
xnn_operator_t global_average_pooling_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_multiply_nd_qs8( |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* multiply_op_out); |
|
|
|
enum xnn_status xnn_reshape_multiply_nd_qs8( |
|
xnn_operator_t multiply_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_multiply_nd_qs8( |
|
xnn_operator_t multiply_op, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_run_multiply_nd_qs8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_leaky_relu_nc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float negative_slope, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
uint32_t flags, |
|
xnn_operator_t* leaky_relu_op_out); |
|
|
|
enum xnn_status xnn_reshape_leaky_relu_nc_qs8( |
|
xnn_operator_t leaky_relu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_leaky_relu_nc_qs8( |
|
xnn_operator_t leaky_relu_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_sigmoid_nc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* sigmoid_op_out); |
|
|
|
enum xnn_status xnn_reshape_sigmoid_nc_qs8( |
|
xnn_operator_t sigmoid_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_sigmoid_nc_qs8( |
|
xnn_operator_t sigmoid_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_subtract_nd_qs8( |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* subtract_op_out); |
|
|
|
enum xnn_status xnn_reshape_subtract_nd_qs8( |
|
xnn_operator_t subtract_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_subtract_nd_qs8( |
|
xnn_operator_t subtract_op, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_run_subtract_nd_qs8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
int8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
int8_t input2_zero_point, |
|
float input2_scale, |
|
const int8_t* input1, |
|
const int8_t* input2, |
|
int8_t* output, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_tanh_nc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
int8_t input_zero_point, |
|
float input_scale, |
|
int8_t output_zero_point, |
|
float output_scale, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* tanh_op_out); |
|
|
|
enum xnn_status xnn_reshape_tanh_nc_qs8( |
|
xnn_operator_t tanh_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_tanh_nc_qs8( |
|
xnn_operator_t tanh_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_add_nd_qu8( |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* add_op_out); |
|
|
|
enum xnn_status xnn_reshape_add_nd_qu8( |
|
xnn_operator_t add_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_add_nd_qu8( |
|
xnn_operator_t add_op, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_run_add_nd_qu8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_average_pooling2d_nhwc_qu8( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_average_pooling2d_nhwc_qu8( |
|
xnn_operator_t average_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8( |
|
xnn_operator_t average_pooling_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_convolution2d_nhwc_qu8( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t subsampling_height, |
|
uint32_t subsampling_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t kernel_zero_point, |
|
float kernel_scale, |
|
const uint8_t* kernel, |
|
const int32_t* bias, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* convolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_convolution2d_nhwc_qu8( |
|
xnn_operator_t convolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convolution2d_nhwc_qu8( |
|
xnn_operator_t convolution_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_deconvolution2d_nhwc_qu8( |
|
uint32_t output_padding_top, |
|
uint32_t output_padding_right, |
|
uint32_t output_padding_bottom, |
|
uint32_t output_padding_left, |
|
uint32_t kernel_height, |
|
uint32_t kernel_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
uint32_t groups, |
|
size_t group_input_channels, |
|
size_t group_output_channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t kernel_zero_point, |
|
float kernel_scale, |
|
const uint8_t* kernel, |
|
const int32_t* bias, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* deconvolution_op_out); |
|
|
|
enum xnn_status xnn_reshape_deconvolution2d_nhwc_qu8( |
|
xnn_operator_t deconvolution_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
uint32_t adjustment_height, |
|
uint32_t adjustment_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_deconvolution2d_nhwc_qu8( |
|
xnn_operator_t deconvolution_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_fully_connected_nc_qu8( |
|
size_t input_channels, |
|
size_t output_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t kernel_zero_point, |
|
float kernel_scale, |
|
const uint8_t* kernel, |
|
const int32_t* bias, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_code_cache_t code_cache, |
|
xnn_weights_cache_t weights_cache, |
|
xnn_operator_t* fully_connected_op_out); |
|
|
|
enum xnn_status xnn_reshape_fully_connected_nc_qu8( |
|
xnn_operator_t fully_connected_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_fully_connected_nc_qu8( |
|
xnn_operator_t fully_connected_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_global_average_pooling_nwc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* global_average_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_global_average_pooling_nwc_qu8( |
|
xnn_operator_t global_average_pooling_op, |
|
size_t batch_size, |
|
size_t width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_global_average_pooling_nwc_qu8( |
|
xnn_operator_t global_average_pooling_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_leaky_relu_nc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float negative_slope, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint32_t flags, |
|
xnn_operator_t* leaky_relu_op_out); |
|
|
|
enum xnn_status xnn_reshape_leaky_relu_nc_qu8( |
|
xnn_operator_t leaky_relu_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_leaky_relu_nc_qu8( |
|
xnn_operator_t leaky_relu_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_multiply_nd_qu8( |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* multiply_op_out); |
|
|
|
enum xnn_status xnn_reshape_multiply_nd_qu8( |
|
xnn_operator_t multiply_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_multiply_nd_qu8( |
|
xnn_operator_t multiply_op, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_run_multiply_nd_qu8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_sigmoid_nc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* sigmoid_op_out); |
|
|
|
enum xnn_status xnn_reshape_sigmoid_nc_qu8( |
|
xnn_operator_t sigmoid_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_sigmoid_nc_qu8( |
|
xnn_operator_t sigmoid_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_softmax_nc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint32_t flags, |
|
xnn_operator_t* softmax_op_out); |
|
|
|
enum xnn_status xnn_reshape_softmax_nc_qu8( |
|
xnn_operator_t softmax_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_softmax_nc_qu8( |
|
xnn_operator_t softmax_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_subtract_nd_qu8( |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* subtract_op_out); |
|
|
|
enum xnn_status xnn_reshape_subtract_nd_qu8( |
|
xnn_operator_t subtract_op, |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_subtract_nd_qu8( |
|
xnn_operator_t subtract_op, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_run_subtract_nd_qu8( |
|
size_t num_input1_dims, |
|
const size_t* input1_shape, |
|
uint8_t input1_zero_point, |
|
float input1_scale, |
|
size_t num_input2_dims, |
|
const size_t* input2_shape, |
|
uint8_t input2_zero_point, |
|
float input2_scale, |
|
const uint8_t* input1, |
|
const uint8_t* input2, |
|
uint8_t* output, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_tanh_nc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint8_t input_zero_point, |
|
float input_scale, |
|
uint8_t output_zero_point, |
|
float output_scale, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* tanh_op_out); |
|
|
|
enum xnn_status xnn_reshape_tanh_nc_qu8( |
|
xnn_operator_t tanh_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_tanh_nc_qu8( |
|
xnn_operator_t tanh_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_clamp_nc_s8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* clamp_op_out); |
|
|
|
enum xnn_status xnn_reshape_clamp_nc_s8( |
|
xnn_operator_t clamp_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_clamp_nc_s8( |
|
xnn_operator_t clamp_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_max_pooling2d_nhwc_s8( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* max_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_max_pooling2d_nhwc_s8( |
|
xnn_operator_t max_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_max_pooling2d_nhwc_s8( |
|
xnn_operator_t max_pooling_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_s8( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8( |
|
xnn_operator_t resize_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_clamp_nc_u8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* clamp_op_out); |
|
|
|
enum xnn_status xnn_reshape_clamp_nc_u8( |
|
xnn_operator_t clamp_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_clamp_nc_u8( |
|
xnn_operator_t clamp_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_max_pooling2d_nhwc_u8( |
|
uint32_t input_padding_top, |
|
uint32_t input_padding_right, |
|
uint32_t input_padding_bottom, |
|
uint32_t input_padding_left, |
|
uint32_t pooling_height, |
|
uint32_t pooling_width, |
|
uint32_t stride_height, |
|
uint32_t stride_width, |
|
uint32_t dilation_height, |
|
uint32_t dilation_width, |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* max_pooling_op_out); |
|
|
|
enum xnn_status xnn_reshape_max_pooling2d_nhwc_u8( |
|
xnn_operator_t max_pooling_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_max_pooling2d_nhwc_u8( |
|
xnn_operator_t max_pooling_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8( |
|
size_t channels, |
|
size_t input_pixel_stride, |
|
size_t output_pixel_stride, |
|
uint32_t flags, |
|
xnn_operator_t* resize_op_out); |
|
|
|
enum xnn_status xnn_reshape_resize_bilinear2d_nhwc_u8( |
|
xnn_operator_t resize_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t output_height, |
|
size_t output_width, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8( |
|
xnn_operator_t resize_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_copy_nc_x8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* copy_op_out); |
|
|
|
enum xnn_status xnn_reshape_copy_nc_x8( |
|
xnn_operator_t copy_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_copy_nc_x8( |
|
xnn_operator_t copy_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_channel_shuffle_nc_x8( |
|
size_t groups, |
|
size_t group_channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* channel_shuffle_op_out); |
|
|
|
enum xnn_status xnn_reshape_channel_shuffle_nc_x8( |
|
xnn_operator_t channel_shuffle_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_channel_shuffle_nc_x8( |
|
xnn_operator_t channel_shuffle_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_constant_pad_nd_x8( |
|
const void* padding_value, |
|
uint32_t flags, |
|
xnn_operator_t* constant_pad_op_out); |
|
|
|
enum xnn_status xnn_reshape_constant_pad_nd_x8( |
|
xnn_operator_t constant_pad_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_padding, |
|
const size_t* post_padding, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_constant_pad_nd_x8( |
|
xnn_operator_t constant_pad_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_constant_pad_nd_x8( |
|
uint32_t flags, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* pre_paddings, |
|
const size_t* post_paddings, |
|
const void* input, |
|
void* output, |
|
const void* padding_value, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_depth_to_space_nhwc_x8( |
|
size_t output_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* depth_to_space_op_out); |
|
|
|
enum xnn_status xnn_reshape_depth_to_space_nhwc_x8( |
|
xnn_operator_t depth_to_space_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_depth_to_space_nhwc_x8( |
|
xnn_operator_t depth_to_space_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_slice_nd_x8( |
|
uint32_t flags, |
|
xnn_operator_t* slice_op_out); |
|
|
|
enum xnn_status xnn_reshape_slice_nd_x8( |
|
xnn_operator_t slice_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* offsets, |
|
const size_t* sizes, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_slice_nd_x8( |
|
xnn_operator_t slice_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_space_to_depth_nhwc_x8( |
|
size_t input_channels, |
|
size_t input_channel_stride, |
|
size_t output_channel_stride, |
|
uint32_t block_size, |
|
uint32_t flags, |
|
xnn_operator_t* space_to_depth_op_out); |
|
|
|
enum xnn_status xnn_reshape_space_to_depth_nhwc_x8( |
|
xnn_operator_t space_to_depth_op, |
|
size_t batch_size, |
|
size_t input_height, |
|
size_t input_width, |
|
size_t* output_height_out, |
|
size_t* output_width_out, |
|
size_t* output_channels_out, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_space_to_depth_nhwc_x8( |
|
xnn_operator_t space_to_depth_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_create_transpose_nd_x8( |
|
uint32_t flags, |
|
xnn_operator_t* transpose_op_out); |
|
|
|
enum xnn_status xnn_reshape_transpose_nd_x8( |
|
xnn_operator_t transpose_op, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_transpose_nd_x8( |
|
xnn_operator_t transpose_op, |
|
const void* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_transpose_nd_x8( |
|
const void* input, |
|
void* output, |
|
size_t num_dims, |
|
const size_t* input_shape, |
|
const size_t* output_perm, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_f32_qd8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_f32_qd8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
|
|
enum xnn_status xnn_setup_convert_nc_f32_qd8( |
|
xnn_operator_t convert_op, |
|
const float* input, |
|
int8_t* output, |
|
struct xnn_dynamic_quantization_params* quantization_params); |
|
|
|
enum xnn_status xnn_create_convert_nc_f16_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_f16_f32( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_f16_f32( |
|
xnn_operator_t convert_op, |
|
const void* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_f16_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const void* input, |
|
float* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_f32_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_f32_f16( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_f32_f16( |
|
xnn_operator_t convert_op, |
|
const float* input, |
|
void* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_f32_f16( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
void* output, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_f32_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_scale, |
|
int8_t output_zero_point, |
|
int8_t output_min, |
|
int8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_f32_qs8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_f32_qs8( |
|
xnn_operator_t convert_op, |
|
const float* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_f32_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
int8_t* output, |
|
float output_scale, |
|
int8_t output_zero_point, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_f32_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float output_scale, |
|
uint8_t output_zero_point, |
|
uint8_t output_min, |
|
uint8_t output_max, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_f32_qu8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_f32_qu8( |
|
xnn_operator_t convert_op, |
|
const float* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_f32_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const float* input, |
|
uint8_t* output, |
|
float output_scale, |
|
uint8_t output_zero_point, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
int8_t input_zero_point, |
|
float output_scale, |
|
int8_t output_zero_point, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_qs8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_qs8( |
|
xnn_operator_t convert_op, |
|
const int8_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_create_convert_nc_qs8_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
int8_t input_zero_point, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_qs8_f32( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_qs8_f32( |
|
xnn_operator_t convert_op, |
|
const int8_t* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_qs8_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const int8_t* input, |
|
float* output, |
|
float input_scale, |
|
int8_t input_zero_point, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_qs16_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
float output_scale, |
|
int8_t output_zero_point, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_qs16_qs8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_qs16_qs8( |
|
xnn_operator_t convert_op, |
|
const int16_t* input, |
|
int8_t* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_qs16_qs8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const int16_t* input, |
|
int8_t* output, |
|
float input_scale, |
|
float output_scale, |
|
int8_t output_zero_point, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_create_convert_nc_qu8( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
uint8_t input_zero_point, |
|
float output_scale, |
|
uint8_t output_zero_point, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_qu8( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_qu8( |
|
xnn_operator_t convert_op, |
|
const uint8_t* input, |
|
uint8_t* output); |
|
|
|
enum xnn_status xnn_create_convert_nc_qu8_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
float input_scale, |
|
uint8_t input_zero_point, |
|
uint32_t flags, |
|
xnn_operator_t* convert_op_out); |
|
|
|
enum xnn_status xnn_reshape_convert_nc_qu8_f32( |
|
xnn_operator_t convert_op, |
|
size_t batch_size, |
|
pthreadpool_t threadpool); |
|
|
|
enum xnn_status xnn_setup_convert_nc_qu8_f32( |
|
xnn_operator_t convert_op, |
|
const uint8_t* input, |
|
float* output); |
|
|
|
enum xnn_status xnn_run_convert_nc_qu8_f32( |
|
size_t channels, |
|
size_t input_stride, |
|
size_t output_stride, |
|
size_t batch_size, |
|
const uint8_t* input, |
|
float* output, |
|
float input_scale, |
|
uint8_t input_zero_point, |
|
uint32_t flags, |
|
pthreadpool_t threadpool); |
|
|
|
#ifdef __cplusplus |
|
} |
|
#endif |
|
|