Add files using upload-large-folder tool

568f19a verified 3 months ago

13.9 kB

	/*******************************************************************************
	* Copyright 2024 Intel Corporation
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*******************************************************************************/

	/// @file
	/// ukernel C API

	#ifndef ONEAPI_DNNL_DNNL_UKERNEL_H
	#define ONEAPI_DNNL_DNNL_UKERNEL_H

	#include "oneapi/dnnl/dnnl.h"
	#include "oneapi/dnnl/dnnl_ukernel_types.h"

	#ifdef __cplusplus
	extern "C" {
	#endif

	/// @addtogroup dnnl_api
	/// @{

	/// @addtogroup dnnl_api_ukernel
	/// @{

	#ifdef DNNL_EXPERIMENTAL_UKERNEL

	/// Creates a ukernel attributes memory storage.
	///
	/// @param attr_params Output ukernel attributes memory storage.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_create(
	dnnl_ukernel_attr_params_t *attr_params);

	/// Sets post-operations arguments to a storage.
	///
	/// @param attr_params Memory pointers storage object.
	/// @param post_ops_args A pointer to pointers of post_ops storages. Expected to
	/// be packed together.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_post_ops_args(
	dnnl_ukernel_attr_params_t attr_params, const void **post_ops_args);

	/// Sets tensor A scales argument to a storage.
	///
	/// @param attr_params Memory pointers storage object.
	/// @param a_scales Pointer to the scales storage.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_A_scales(
	dnnl_ukernel_attr_params_t attr_params, const void *a_scales);

	/// Sets tensor B scales argument to a storage.
	///
	/// If `dnnl_brgemm_set_B_scales` used mask of 2, then at least N values of
	/// selected data type are expected.
	///
	/// @param attr_params Memory pointers storage object.
	/// @param b_scales Pointer to the scales storage.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_B_scales(
	dnnl_ukernel_attr_params_t attr_params, const void *b_scales);

	/// Sets tensor D scales argument to a storage.
	///
	/// @param attr_params Memory pointers storage object.
	/// @param d_scales Pointer to the scales storage.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_set_D_scales(
	dnnl_ukernel_attr_params_t attr_params, const void *d_scales);

	/// Destroys a ukernel attributes memory storage.
	///
	/// @param attr_params Memory pointers storage object to destroy.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_ukernel_attr_params_destroy(
	dnnl_ukernel_attr_params_t attr_params);

	/// @addtogroup dnnl_api_ukernel_brgemm
	/// @{

	/// Creates a BRGeMM ukernel object. Operates by the following formula:
	/// `C = [A x B]`.
	///
	/// @param brgemm Output BRGeMM ukernel object.
	/// @param M Dimension M of tensor A.
	/// @param N Dimension N of tensor B.
	/// @param K Dimension K of tensors A and B.
	/// @param batch_size Number of batches to process.
	/// @param lda Leading dimension of tensor A.
	/// @param ldb Leading dimension of tensor B.
	/// @param ldc Leading dimension of tensor C.
	/// @param a_dt Data type of tensor A.
	/// @param b_dt Data type of tensor B.
	/// @param c_dt Data type of tensor C. Must be dnnl_f32.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_create(dnnl_brgemm_t *brgemm, dnnl_dim_t M,
	dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t batch_size, dnnl_dim_t lda,
	dnnl_dim_t ldb, dnnl_dim_t ldc, dnnl_data_type_t a_dt,
	dnnl_data_type_t b_dt, dnnl_data_type_t c_dt);

	/// Sets adding an intermediate result to the output tensor C instead of
	/// writing: `C += [A x B]`.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param add_C Value to indicate addition. Can be `0` to skip addition, and
	/// `1` to apply addition.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_set_add_C(dnnl_brgemm_t brgemm, int add_C);

	/// Sets post-operations to a BRGeMM ukernel object: `D = post-operations(C)`.
	///
	/// Post-operations applies if one of the following holds:
	/// * Non-empty attributes are specified.
	/// * Output data type `d_dt` is different from accumulation data type `c_dt`.
	///
	/// If any of conditions happens, the final call of the accumulation chain
	/// must be `dnnl_brgemm_execute_postops`, and `dnnl_brgemm_execute`, otherwise.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param ldd Leading dimension of tensor D.
	/// @param d_dt Data type of tensor D.
	/// @param post_ops Primitive post operations attribute to extend the kernel
	/// operations.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_set_post_ops(dnnl_brgemm_t brgemm,
	dnnl_dim_t ldd, dnnl_data_type_t d_dt, const_dnnl_post_ops_t post_ops);

	/// Sets tensor A scales mask to a BRGeMM ukernel object.
	///
	/// For quantization flavor tensor A scales apply to accumulation buffer once C
	/// is ready.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param a_scale_mask Tensor A scale mask. Can be `0` only.
	dnnl_status_t DNNL_API dnnl_brgemm_set_A_scales(
	dnnl_brgemm_t brgemm, int a_scale_mask);

	/// Sets tensor B scales mask to a BRGeMM ukernel object.
	///
	/// For quantization flavor tensor B scales apply to accumulation buffer once C
	/// is ready.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param b_scale_mask Tensor B scale mask. Can be `0` and `2` only.
	dnnl_status_t DNNL_API dnnl_brgemm_set_B_scales(
	dnnl_brgemm_t brgemm, int b_scale_mask);

	/// Sets tensor D scales mask to a BRGeMM ukernel object.
	///
	/// For quantization flavor tensor D scales apply after all post-ops are
	/// applied.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param d_scale_mask Tensor D scale mask. Can be `0` only.
	dnnl_status_t DNNL_API dnnl_brgemm_set_D_scales(
	dnnl_brgemm_t brgemm, int d_scale_mask);

	/// Finalizes initialization of a BRGeMM ukernel object.
	///
	/// This step is mandatory to query information from the object.
	///
	/// @param brgemm Output BRGeMM ukernel object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_finalize(dnnl_brgemm_t brgemm);

	/// Returns the packing type expected by a tensor B of a BRGeMM ukernel object.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param pack_type Output packing type. Can be `dnnl_brgemm_no_pack` if
	/// packing is not expected, and `dnnl_brgemm_pack_32`, otherwise.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_get_B_pack_type(
	const_dnnl_brgemm_t brgemm, dnnl_pack_type_t *pack_type);

	/// Returns the size of a scratchpad memory needed for the BRGeMM ukernel
	/// object.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param size Output size of a buffer required for the BRGeMM ukernel object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_get_scratchpad_size(
	const_dnnl_brgemm_t brgemm, size_t *size);

	/// Returns the flag indicating when the call to `dnnl_brgemm_execute_postops`
	/// is valid.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param valid The flag indicating if `dnnl_brgemm_execute_postops` is valid
	/// for a given ukernel object. `1` is for valid and `0`, otherwise.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_is_execute_postops_valid(
	const_dnnl_brgemm_t brgemm, int *valid);

	/// Initializes the hardware-specific context. If no initialization required,
	/// returns the success status.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_set_hw_context(const_dnnl_brgemm_t brgemm);

	/// Releases the hardware-specific context. Must be used after all the execution
	/// calls to BRGeMM ukernel objects.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_release_hw_context();

	/// Generates an executable part of BRGeMM ukernel object.
	/// @param brgemm BRGeMM ukernel object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_generate(dnnl_brgemm_t brgemm);

	/// Executes a BRGeMM ukernel object.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param A_ptr Base pointer to a tensor A.
	/// @param B_ptr Base pointer to a tensor B.
	/// @param A_B_offsets Pointer to the set of tensor A and tensor B offsets for
	/// each batch; the set must be contiguous in memory. Single batch should
	/// supply offsets for both tensors A and B simultaneously. The number of
	/// batches must coincide with the `batch_size` value passed at the creation
	/// stage.
	/// @param C_ptr Pointer to a tensor C (accumulation buffer).
	/// @param scratchpad_ptr Pointer to a scratchpad buffer.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_execute(const_dnnl_brgemm_t brgemm,
	const void A_ptr, const void B_ptr, const dnnl_dim_t *A_B_offsets,
	void C_ptr, void scratchpad_ptr);

	/// Executes a BRGeMM ukernel object with post operations.
	///
	/// @param brgemm BRGeMM ukernel object.
	/// @param A Base pointer to a tensor A.
	/// @param B Base pointer to a tensor B.
	/// @param A_B_offsets Pointer to a set of tensor A and tensor B offsets for
	/// each batch. A set must be contiguous in memory. A single batch should
	/// supply offsets for both tensors A and B simultaneously. The number of
	/// batches must coincide with the `batch_size` value passed at the creation
	/// stage.
	/// @param C_ptr Pointer to a tensor C (accumulation buffer).
	/// @param D_ptr Pointer to a tensor D (output buffer).
	/// @param scratchpad_ptr Pointer to a scratchpad buffer.
	/// @param attr_params Ukernel attributes memory storage.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_execute_postops(const_dnnl_brgemm_t brgemm,
	const void A, const void B, const dnnl_dim_t *A_B_offsets,
	const void C_ptr, void D_ptr, void *scratchpad_ptr,
	const_dnnl_ukernel_attr_params_t attr_params);

	/// Destroys a BRGeMM ukernel object.
	///
	/// @param brgemm BRGeMM ukernel object to destroy.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_brgemm_destroy(dnnl_brgemm_t brgemm);

	/// Creates a transform object.
	///
	/// @param transform Output transform object.
	/// @param K Dimension K.
	/// @param N Dimension N.
	/// @param in_pack_type Input packing type. Must be one of
	/// `dnnl_pack_type_no_trans`, or `dnnl_pack_type_trans`.
	/// @param in_ld Input leading dimension.
	/// @param out_ld Output leading dimension. When packing data, it specifies a
	/// block by N dimension.
	/// @param in_dt Input data type.
	/// @param out_dt Output data type.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_transform_create(dnnl_transform_t *transform,
	dnnl_dim_t K, dnnl_dim_t N, dnnl_pack_type_t in_pack_type,
	dnnl_dim_t in_ld, dnnl_dim_t out_ld, dnnl_data_type_t in_dt,
	dnnl_data_type_t out_dt);

	/// Generates an executable part of transform object.
	/// @param transform Transform object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_transform_generate(dnnl_transform_t transform);

	/// Executes a transform object.
	///
	/// @param transform Transform object.
	/// @param in_ptr Pointer to an input buffer.
	/// @param out_ptr Pointer to an output buffer.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_transform_execute(
	const_dnnl_transform_t transform, const void in_ptr, void out_ptr);

	/// Destroys a transform object.
	///
	/// @param transform Transform object.
	/// @returns #dnnl_success on success and a status describing the error
	/// otherwise.
	dnnl_status_t DNNL_API dnnl_transform_destroy(dnnl_transform_t transform);

	/// @} dnnl_api_ukernel_brgemm

	#endif

	/// @} dnnl_api_ukernel

	/// @} dnnl_api

	#ifdef __cplusplus
	}
	#endif

	#endif /* ONEAPI_DNNL_DNNL_UKERNEL_H */