camenduru
/

ncnn

Model card Files Files and versions Community

ncnn / src /cpu.h

camenduru

thanks to ncnn ❤

be903e2 over 1 year ago

raw

history blame contribute delete

5.44 kB

	// Tencent is pleased to support the open source community by making ncnn available.
	//
	// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
	//
	// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
	// in compliance with the License. You may obtain a copy of the License at
	//
	// https://opensource.org/licenses/BSD-3-Clause
	//
	// Unless required by applicable law or agreed to in writing, software distributed
	// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
	// CONDITIONS OF ANY KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations under the License.

	#ifndef NCNN_CPU_H
	#define NCNN_CPU_H

	#include <stddef.h>

	#if (defined _WIN32 && !(defined __MINGW32__))
	#define WIN32_LEAN_AND_MEAN
	#include <windows.h>
	#endif
	#if defined __ANDROID__ \|\| defined __linux__
	#include <sched.h> // cpu_set_t
	#endif

	#include "platform.h"

	namespace ncnn {

	class NCNN_EXPORT CpuSet
	{
	public:
	CpuSet();
	void enable(int cpu);
	void disable(int cpu);
	void disable_all();
	bool is_enabled(int cpu) const;
	int num_enabled() const;

	public:
	#if (defined _WIN32 && !(defined __MINGW32__))
	ULONG_PTR mask;
	#endif
	#if defined __ANDROID__ \|\| defined __linux__
	cpu_set_t cpu_set;
	#endif
	#if __APPLE__
	unsigned int policy;
	#endif
	};

	// test optional cpu features
	// edsp = armv7 edsp
	NCNN_EXPORT int cpu_support_arm_edsp();
	// neon = armv7 neon or aarch64 asimd
	NCNN_EXPORT int cpu_support_arm_neon();
	// vfpv4 = armv7 fp16 + fma
	NCNN_EXPORT int cpu_support_arm_vfpv4();
	// asimdhp = aarch64 asimd half precision
	NCNN_EXPORT int cpu_support_arm_asimdhp();
	// cpuid = aarch64 cpuid info
	NCNN_EXPORT int cpu_support_arm_cpuid();
	// asimddp = aarch64 asimd dot product
	NCNN_EXPORT int cpu_support_arm_asimddp();
	// asimdfhm = aarch64 asimd fhm
	NCNN_EXPORT int cpu_support_arm_asimdfhm();
	// bf16 = aarch64 bf16
	NCNN_EXPORT int cpu_support_arm_bf16();
	// i8mm = aarch64 i8mm
	NCNN_EXPORT int cpu_support_arm_i8mm();
	// sve = aarch64 sve
	NCNN_EXPORT int cpu_support_arm_sve();
	// sve2 = aarch64 sve2
	NCNN_EXPORT int cpu_support_arm_sve2();
	// svebf16 = aarch64 svebf16
	NCNN_EXPORT int cpu_support_arm_svebf16();
	// svei8mm = aarch64 svei8mm
	NCNN_EXPORT int cpu_support_arm_svei8mm();
	// svef32mm = aarch64 svef32mm
	NCNN_EXPORT int cpu_support_arm_svef32mm();

	// avx = x86 avx
	NCNN_EXPORT int cpu_support_x86_avx();
	// fma = x86 fma
	NCNN_EXPORT int cpu_support_x86_fma();
	// xop = x86 xop
	NCNN_EXPORT int cpu_support_x86_xop();
	// f16c = x86 f16c
	NCNN_EXPORT int cpu_support_x86_f16c();
	// avx2 = x86 avx2 + fma + f16c
	NCNN_EXPORT int cpu_support_x86_avx2();
	// avx_vnni = x86 avx vnni
	NCNN_EXPORT int cpu_support_x86_avx_vnni();
	// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl
	NCNN_EXPORT int cpu_support_x86_avx512();
	// avx512_vnni = x86 avx512 vnni
	NCNN_EXPORT int cpu_support_x86_avx512_vnni();
	// avx512_bf16 = x86 avx512 bf16
	NCNN_EXPORT int cpu_support_x86_avx512_bf16();
	// avx512_fp16 = x86 avx512 fp16
	NCNN_EXPORT int cpu_support_x86_avx512_fp16();

	// lsx = loongarch lsx
	NCNN_EXPORT int cpu_support_loongarch_lsx();
	// lasx = loongarch lasx
	NCNN_EXPORT int cpu_support_loongarch_lasx();

	// msa = mips mas
	NCNN_EXPORT int cpu_support_mips_msa();
	// mmi = loongson mmi
	NCNN_EXPORT int cpu_support_loongson_mmi();

	// v = riscv vector
	NCNN_EXPORT int cpu_support_riscv_v();
	// zfh = riscv half-precision float
	NCNN_EXPORT int cpu_support_riscv_zfh();
	// vlenb = riscv vector length in bytes
	NCNN_EXPORT int cpu_riscv_vlenb();

	// cpu info
	NCNN_EXPORT int get_cpu_count();
	NCNN_EXPORT int get_little_cpu_count();
	NCNN_EXPORT int get_big_cpu_count();

	NCNN_EXPORT int get_physical_cpu_count();
	NCNN_EXPORT int get_physical_little_cpu_count();
	NCNN_EXPORT int get_physical_big_cpu_count();

	// cpu l2 varies from 64k to 1M, but l3 can be zero
	NCNN_EXPORT int get_cpu_level2_cache_size();
	NCNN_EXPORT int get_cpu_level3_cache_size();

	// bind all threads on little clusters if powersave enabled
	// affects HMP arch cpu like ARM big.LITTLE
	// only implemented on android at the moment
	// switching powersave is expensive and not thread-safe
	// 0 = all cores enabled(default)
	// 1 = only little clusters enabled
	// 2 = only big clusters enabled
	// return 0 if success for setter function
	NCNN_EXPORT int get_cpu_powersave();
	NCNN_EXPORT int set_cpu_powersave(int powersave);

	// convenient wrapper
	NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave);

	// set explicit thread affinity
	NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);

	// runtime thread affinity info
	NCNN_EXPORT int is_current_thread_running_on_a53_a55();

	// misc function wrapper for openmp routines
	NCNN_EXPORT int get_omp_num_threads();
	NCNN_EXPORT void set_omp_num_threads(int num_threads);

	NCNN_EXPORT int get_omp_dynamic();
	NCNN_EXPORT void set_omp_dynamic(int dynamic);

	NCNN_EXPORT int get_omp_thread_num();

	NCNN_EXPORT int get_kmp_blocktime();
	NCNN_EXPORT void set_kmp_blocktime(int time_ms);

	// need to flush denormals on Intel Chipset.
	// Other architectures such as ARM can be added as needed.
	// 0 = DAZ OFF, FTZ OFF
	// 1 = DAZ ON , FTZ OFF
	// 2 = DAZ OFF, FTZ ON
	// 3 = DAZ ON, FTZ ON
	NCNN_EXPORT int get_flush_denormals();
	NCNN_EXPORT int set_flush_denormals(int flush_denormals);

	} // namespace ncnn

	#endif // NCNN_CPU_H