| #include "ggml-backend.h"
|
| #include "ggml-backend-impl.h"
|
| #include "ggml-cpu.h"
|
| #include "repack.h"
|
| #include "traits.h"
|
| #include "ggml-impl.h"
|
| #include "amx/amx.h"
|
|
|
| #include <cctype>
|
| #include <string>
|
| #include <vector>
|
|
|
| #ifdef GGML_USE_CPU_HBM
|
| # include "hbm.h"
|
| #endif
|
|
|
| #ifdef GGML_USE_CPU_KLEIDIAI
|
| # include "kleidiai/kleidiai.h"
|
| #endif
|
|
|
| #ifdef GGML_USE_CPU_RISCV64_SPACEMIT
|
| # include "spacemit/ime.h"
|
| #endif
|
|
|
| #if defined(_WIN32)
|
| # define WIN32_LEAN_AND_MEAN
|
| # ifndef NOMINMAX
|
| # define NOMINMAX
|
| # endif
|
| # include <windows.h>
|
| #else
|
| # include <unistd.h>
|
| #endif
|
|
|
| #if defined(__APPLE__)
|
| # include <sys/sysctl.h>
|
| # include <sys/types.h>
|
| #endif
|
|
|
|
|
|
|
| std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_types() {
|
| static std::vector<ggml_backend_buffer_type_t> bufts = []() {
|
| std::vector<ggml_backend_buffer_type_t> bufts;
|
|
|
| #if defined(__AMX_INT8__) && defined(__AVX512VNNI__)
|
| if (ggml_backend_amx_buffer_type()) {
|
| bufts.push_back(ggml_backend_amx_buffer_type());
|
| }
|
| #endif
|
|
|
| #ifdef GGML_USE_CPU_RISCV64_SPACEMIT
|
| if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
|
| bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
|
| }
|
| #endif
|
|
|
| #ifdef GGML_USE_CPU_KLEIDIAI
|
| if (ggml_backend_cpu_kleidiai_buffer_type()) {
|
| bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());
|
| }
|
| #endif
|
|
|
| #ifdef GGML_USE_CPU_REPACK
|
| if (ggml_backend_cpu_repack_buffer_type()) {
|
| bufts.push_back(ggml_backend_cpu_repack_buffer_type());
|
| }
|
| #endif
|
|
|
| return bufts;
|
| }();
|
|
|
| return bufts;
|
| }
|
|
|
| static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
|
| static std::vector<ggml_backend_buffer_type_t> extra_bufts = [] {
|
| std::vector<ggml_backend_buffer_type_t> bufts = ggml_backend_cpu_get_extra_buffer_types();
|
| bufts.push_back(nullptr);
|
| return bufts;
|
| }();
|
|
|
| return extra_bufts.data();
|
|
|
| GGML_UNUSED(device);
|
| }
|
|
|
| static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
|
| for (auto * extra : ggml_backend_cpu_get_extra_buffer_types()) {
|
| if (extra == buft) {
|
| return true;
|
| }
|
| }
|
| return false;
|
| }
|
|
|
|
|
|
|
| struct ggml_backend_cpu_context {
|
| int n_threads;
|
| ggml_threadpool_t threadpool;
|
|
|
| uint8_t * work_data;
|
| size_t work_size;
|
|
|
| ggml_abort_callback abort_callback;
|
| void * abort_callback_data;
|
|
|
| bool use_ref;
|
| };
|
|
|
| static const char * ggml_backend_cpu_get_name(ggml_backend_t backend) {
|
| return "CPU";
|
|
|
| GGML_UNUSED(backend);
|
| }
|
|
|
| static void ggml_backend_cpu_free(ggml_backend_t backend) {
|
| struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
| delete[] cpu_ctx->work_data;
|
| delete cpu_ctx;
|
| delete backend;
|
| }
|
|
|
| struct ggml_backend_plan_cpu {
|
| struct ggml_cplan cplan;
|
| struct ggml_cgraph cgraph;
|
| };
|
|
|
| static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend_t backend, const struct ggml_cgraph * cgraph) {
|
| struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
|
|
| struct ggml_backend_plan_cpu * cpu_plan = new ggml_backend_plan_cpu;
|
|
|
| cpu_plan->cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
|
| cpu_plan->cgraph = *cgraph;
|
|
|
| if (cpu_plan->cplan.work_size > 0) {
|
| cpu_plan->cplan.work_data = new uint8_t[cpu_plan->cplan.work_size];
|
| if (cpu_plan->cplan.work_data == NULL) {
|
| delete cpu_plan;
|
| return NULL;
|
| }
|
| }
|
|
|
| cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
|
| cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
| cpu_plan->cplan.use_ref = cpu_ctx->use_ref;
|
|
|
| return cpu_plan;
|
| }
|
|
|
| static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
|
|
| delete[] cpu_plan->cplan.work_data;
|
| delete cpu_plan;
|
|
|
| GGML_UNUSED(backend);
|
| }
|
|
|
| static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
| struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
|
|
| return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
|
|
| GGML_UNUSED(backend);
|
| }
|
|
|
| static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
| struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
|
|
| struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads, cpu_ctx->threadpool);
|
|
|
| if (cpu_ctx->work_size < cplan.work_size) {
|
| delete[] cpu_ctx->work_data;
|
| cpu_ctx->work_data = new uint8_t[cplan.work_size];
|
| if (cpu_ctx->work_data == NULL) {
|
| cpu_ctx->work_size = 0;
|
| return GGML_STATUS_ALLOC_FAILED;
|
| }
|
| cpu_ctx->work_size = cplan.work_size;
|
| }
|
| cplan.work_data = (uint8_t *)cpu_ctx->work_data;
|
|
|
| cplan.abort_callback = cpu_ctx->abort_callback;
|
| cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
| cplan.use_ref = cpu_ctx->use_ref;
|
|
|
| return ggml_graph_compute(cgraph, &cplan);
|
| }
|
|
|
| static const struct ggml_backend_i ggml_backend_cpu_i = {
|
| ggml_backend_cpu_get_name,
|
| ggml_backend_cpu_free,
|
| NULL,
|
| NULL,
|
| NULL,
|
| NULL,
|
| ggml_backend_cpu_graph_plan_create,
|
| ggml_backend_cpu_graph_plan_free,
|
| NULL,
|
| ggml_backend_cpu_graph_plan_compute,
|
| ggml_backend_cpu_graph_compute,
|
| NULL,
|
| NULL,
|
| NULL,
|
| };
|
|
|
| static ggml_guid_t ggml_backend_cpu_guid(void) {
|
| static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
|
| return &guid;
|
| }
|
|
|
| ggml_backend_t ggml_backend_cpu_init(void) {
|
|
|
| ggml_cpu_init();
|
|
|
| struct ggml_backend_cpu_context * ctx = new ggml_backend_cpu_context;
|
| if (ctx == NULL) {
|
| return NULL;
|
| }
|
|
|
| ctx->n_threads = GGML_DEFAULT_N_THREADS;
|
| ctx->threadpool = NULL;
|
| ctx->work_data = NULL;
|
| ctx->work_size = 0;
|
| ctx->abort_callback = NULL;
|
| ctx->abort_callback_data = NULL;
|
| ctx->use_ref = false;
|
|
|
| ggml_backend_t cpu_backend = new ggml_backend {
|
| ggml_backend_cpu_guid(),
|
| ggml_backend_cpu_i,
|
| ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
|
| ctx,
|
| };
|
|
|
| if (cpu_backend == NULL) {
|
| delete ctx;
|
| return NULL;
|
| }
|
|
|
| return cpu_backend;
|
| }
|
|
|
| bool ggml_backend_is_cpu(ggml_backend_t backend) {
|
| return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
|
| }
|
|
|
| void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
|
| GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
|
|
| struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
| ctx->n_threads = n_threads;
|
| }
|
|
|
| void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
|
| GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
|
|
| struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
|
|
| if (ctx->threadpool && ctx->threadpool != threadpool) {
|
|
|
| ggml_threadpool_pause(ctx->threadpool);
|
| }
|
| ctx->threadpool = threadpool;
|
| }
|
|
|
| void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
|
| GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
|
|
| struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
| ctx->abort_callback = abort_callback;
|
| ctx->abort_callback_data = abort_callback_data;
|
| }
|
|
|
| void ggml_backend_cpu_set_use_ref(ggml_backend_t backend_cpu, bool use_ref) {
|
| GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
|
|
|
| struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
|
| ctx->use_ref = use_ref;
|
| }
|
|
|
|
|
|
|
| struct ggml_backend_cpu_device_context {
|
| std::string description = "CPU";
|
|
|
| ggml_backend_cpu_device_context() {
|
| #ifdef __APPLE__
|
| size_t len = 0;
|
| if (!sysctlbyname("machdep.cpu.brand_string", NULL, &len, NULL, 0)) {
|
| description.resize(len);
|
| sysctlbyname("machdep.cpu.brand_string", &description[0], &len, NULL, 0);
|
| }
|
| #elif defined(__linux__)
|
| FILE * f = fopen("/proc/cpuinfo", "r");
|
| if (f) {
|
| char buf[1024];
|
| while (fgets(buf, sizeof(buf), f)) {
|
| if (strncmp(buf, "model name", 10) == 0) {
|
| char * p = strchr(buf, ':');
|
| if (p) {
|
| p++;
|
| while (std::isspace(*p)) {
|
| p++;
|
| }
|
| while (std::isspace(p[strlen(p) - 1])) {
|
| p[strlen(p) - 1] = '\0';
|
| }
|
| description = p;
|
| break;
|
| }
|
| }
|
| }
|
| fclose(f);
|
| }
|
| #elif defined(_WIN32)
|
| HKEY hKey;
|
| if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
| TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
|
| 0,
|
| KEY_READ,
|
| &hKey) == ERROR_SUCCESS) {
|
| DWORD cpu_brand_size = 0;
|
| if (RegQueryValueExA(hKey,
|
| "ProcessorNameString",
|
| NULL,
|
| NULL,
|
| NULL,
|
| &cpu_brand_size) == ERROR_SUCCESS) {
|
| description.resize(cpu_brand_size);
|
| if (RegQueryValueExA(hKey,
|
| "ProcessorNameString",
|
| NULL,
|
| NULL,
|
| (LPBYTE)&description[0],
|
| &cpu_brand_size) == ERROR_SUCCESS) {
|
| if (description.find('\0') != std::string::npos) {
|
| description.resize(description.find('\0'));
|
| }
|
| }
|
| }
|
| RegCloseKey(hKey);
|
| }
|
| #endif
|
| }
|
| };
|
|
|
| static const char * ggml_backend_cpu_device_get_name(ggml_backend_dev_t dev) {
|
| return "CPU";
|
|
|
| GGML_UNUSED(dev);
|
| }
|
|
|
| static const char * ggml_backend_cpu_device_get_description(ggml_backend_dev_t dev) {
|
| struct ggml_backend_cpu_device_context * ctx = (struct ggml_backend_cpu_device_context *)dev->context;
|
|
|
| return ctx->description.c_str();
|
| }
|
|
|
| static void ggml_backend_cpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
| #ifdef _WIN32
|
| MEMORYSTATUSEX status;
|
| status.dwLength = sizeof(status);
|
| GlobalMemoryStatusEx(&status);
|
| *total = status.ullTotalPhys;
|
| *free = status.ullAvailPhys;
|
| #else
|
| long pages = sysconf(_SC_PHYS_PAGES);
|
| long page_size = sysconf(_SC_PAGE_SIZE);
|
| *total = pages * page_size;
|
|
|
|
|
| *free = *total;
|
| #endif
|
|
|
| GGML_UNUSED(dev);
|
| }
|
|
|
| static enum ggml_backend_dev_type ggml_backend_cpu_device_get_type(ggml_backend_dev_t dev) {
|
| return GGML_BACKEND_DEVICE_TYPE_CPU;
|
|
|
| GGML_UNUSED(dev);
|
| }
|
|
|
| static void ggml_backend_cpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
|
| props->name = ggml_backend_cpu_device_get_name(dev);
|
| props->description = ggml_backend_cpu_device_get_description(dev);
|
| props->type = ggml_backend_cpu_device_get_type(dev);
|
| ggml_backend_cpu_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
| props->caps = {
|
| false,
|
| false,
|
| true,
|
| false,
|
| };
|
| }
|
|
|
| static ggml_backend_t ggml_backend_cpu_device_init_backend(ggml_backend_dev_t dev, const char * params) {
|
| return ggml_backend_cpu_init();
|
|
|
| GGML_UNUSED(dev);
|
| GGML_UNUSED(params);
|
| }
|
|
|
| static ggml_backend_buffer_type_t ggml_backend_cpu_device_get_buffer_type(ggml_backend_dev_t dev) {
|
| return ggml_backend_cpu_buffer_type();
|
|
|
| GGML_UNUSED(dev);
|
| }
|
|
|
| static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
|
| return ggml_backend_cpu_buffer_from_ptr(ptr, size);
|
|
|
| GGML_UNUSED(dev);
|
| GGML_UNUSED(max_tensor_size);
|
| }
|
|
|
| static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
|
| const struct ggml_tensor * src0 = op->src[0];
|
| const struct ggml_tensor * src1 = op->src[1];
|
|
|
| if (op->op == GGML_OP_NONE || op->op == GGML_OP_RESHAPE || op->op == GGML_OP_VIEW || op->op == GGML_OP_PERMUTE || op->op == GGML_OP_TRANSPOSE) {
|
| return true;
|
| }
|
|
|
|
|
|
|
| for (int i = 0; i < 4; i++) {
|
| if (op->src[i] && op->src[i]->buffer &&
|
| ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
|
| auto * buf_extra = (ggml::cpu::extra_buffer_type *) op->src[i]->buffer->buft->context;
|
| return buf_extra->supports_op(dev, op);
|
| }
|
| }
|
|
|
| switch (op->op) {
|
| case GGML_OP_CPY:
|
| case GGML_OP_SET_ROWS:
|
| return
|
| op->type != GGML_TYPE_IQ3_XXS &&
|
| op->type != GGML_TYPE_IQ3_S &&
|
| op->type != GGML_TYPE_IQ2_XXS &&
|
| op->type != GGML_TYPE_IQ2_XS &&
|
| op->type != GGML_TYPE_IQ2_S &&
|
| op->type != GGML_TYPE_IQ1_S &&
|
| op->type != GGML_TYPE_IQ1_M;
|
| case GGML_OP_MUL_MAT:
|
| return src1->type == GGML_TYPE_F32 || src1->type == ggml_get_type_traits_cpu(src0->type)->vec_dot_type;
|
| case GGML_OP_SOFT_MAX_BACK: {
|
| if (op->src[0]->type != GGML_TYPE_F32 || op->src[1]->type != GGML_TYPE_F32) {
|
| return false;
|
| }
|
| float max_bias = 0.0f;
|
|
|
| memcpy(&max_bias, (const float *) op->op_params + 1, sizeof(float));
|
|
|
| return max_bias == 0.0f;
|
| }
|
| case GGML_OP_IM2COL_BACK:
|
| return src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32;
|
| case GGML_OP_GET_ROWS_BACK:
|
| return src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16;
|
| case GGML_OP_OUT_PROD:
|
| return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
|
| src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
|
| default:
|
| return true;
|
| }
|
| }
|
|
|
| static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
|
| return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
|
| GGML_UNUSED(dev);
|
| }
|
|
|
| static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
|
| ggml_backend_cpu_device_get_name,
|
| ggml_backend_cpu_device_get_description,
|
| ggml_backend_cpu_device_get_memory,
|
| ggml_backend_cpu_device_get_type,
|
| ggml_backend_cpu_device_get_props,
|
| ggml_backend_cpu_device_init_backend,
|
| ggml_backend_cpu_device_get_buffer_type,
|
| NULL,
|
| ggml_backend_cpu_device_buffer_from_host_ptr,
|
| ggml_backend_cpu_device_supports_op,
|
| ggml_backend_cpu_device_supports_buft,
|
| NULL,
|
| NULL,
|
| NULL,
|
| NULL,
|
| };
|
|
|
|
|
|
|
| static const char * ggml_backend_cpu_reg_get_name(ggml_backend_reg_t reg) {
|
| return "CPU";
|
|
|
| GGML_UNUSED(reg);
|
| }
|
|
|
| static size_t ggml_backend_cpu_reg_get_device_count(ggml_backend_reg_t reg) {
|
| return 1;
|
|
|
| GGML_UNUSED(reg);
|
| }
|
|
|
| static ggml_backend_dev_t ggml_backend_cpu_reg_get_device(ggml_backend_reg_t reg, size_t index) {
|
| GGML_ASSERT(index == 0);
|
|
|
| static ggml_backend_cpu_device_context ctx;
|
| static ggml_backend_device ggml_backend_cpu_device = {
|
| ggml_backend_cpu_device_i,
|
| reg,
|
| &ctx,
|
| };
|
|
|
| return &ggml_backend_cpu_device;
|
| }
|
|
|
|
|
|
|
| static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t reg) {
|
| static std::vector<ggml_backend_feature> features = []() {
|
| ggml_cpu_init();
|
|
|
| std::vector<ggml_backend_feature> features;
|
| if (ggml_cpu_has_sse3()) {
|
| features.push_back({ "SSE3", "1" });
|
| }
|
| if (ggml_cpu_has_ssse3()) {
|
| features.push_back({ "SSSE3", "1" });
|
| }
|
| if (ggml_cpu_has_avx()) {
|
| features.push_back({ "AVX", "1" });
|
| }
|
| if (ggml_cpu_has_avx_vnni()) {
|
| features.push_back({ "AVX_VNNI", "1" });
|
| }
|
| if (ggml_cpu_has_avx2()) {
|
| features.push_back({ "AVX2", "1" });
|
| }
|
| if (ggml_cpu_has_f16c()) {
|
| features.push_back({ "F16C", "1" });
|
| }
|
| if (ggml_cpu_has_fma()) {
|
| features.push_back({ "FMA", "1" });
|
| }
|
| if (ggml_cpu_has_bmi2()) {
|
| features.push_back({ "BMI2", "1" });
|
| }
|
| if (ggml_cpu_has_avx512()) {
|
| features.push_back({ "AVX512", "1" });
|
| }
|
| if (ggml_cpu_has_avx512_vbmi()) {
|
| features.push_back({ "AVX512_VBMI", "1" });
|
| }
|
| if (ggml_cpu_has_avx512_vnni()) {
|
| features.push_back({ "AVX512_VNNI", "1" });
|
| }
|
| if (ggml_cpu_has_avx512_bf16()) {
|
| features.push_back({ "AVX512_BF16", "1" });
|
| }
|
| if (ggml_cpu_has_amx_int8()) {
|
| features.push_back({ "AMX_INT8", "1" });
|
| }
|
| if (ggml_cpu_has_neon()) {
|
| features.push_back({ "NEON", "1" });
|
| }
|
| if (ggml_cpu_has_arm_fma()) {
|
| features.push_back({ "ARM_FMA", "1" });
|
| }
|
| if (ggml_cpu_has_fp16_va()) {
|
| features.push_back({ "FP16_VA", "1" });
|
| }
|
| if (ggml_cpu_has_matmul_int8()) {
|
| features.push_back({ "MATMUL_INT8", "1" });
|
| }
|
| if (ggml_cpu_has_sve()) {
|
| features.push_back({ "SVE", "1" });
|
| }
|
| if (ggml_cpu_has_dotprod()) {
|
| features.push_back({ "DOTPROD", "1" });
|
| }
|
| if (ggml_cpu_get_sve_cnt() > 0) {
|
| static std::string sve_cnt = std::to_string(ggml_cpu_get_sve_cnt());
|
| features.push_back({ "SVE_CNT", sve_cnt.c_str() });
|
| }
|
| if (ggml_cpu_has_sme()) {
|
| features.push_back({ "SME", "1" });
|
| }
|
| if (ggml_cpu_has_riscv_v()) {
|
| features.push_back({ "RISCV_V", "1" });
|
| }
|
| if (ggml_cpu_get_rvv_vlen() > 0) {
|
| static std::string rvv_vlen = std::to_string(ggml_cpu_get_rvv_vlen());
|
| features.push_back({ "RVV_VLEN", rvv_vlen.c_str() });
|
| }
|
| if (ggml_cpu_has_vsx()) {
|
| features.push_back({ "VSX", "1" });
|
| }
|
| if (ggml_cpu_has_vxe()) {
|
| features.push_back({ "VXE", "1" });
|
| }
|
| if (ggml_cpu_has_wasm_simd()) {
|
| features.push_back({ "WASM_SIMD", "1" });
|
| }
|
| if (ggml_cpu_has_llamafile()) {
|
| features.push_back({ "LLAMAFILE", "1" });
|
| }
|
| #ifdef GGML_USE_ACCELERATE
|
| features.push_back({ "ACCELERATE", "1" });
|
| #endif
|
| #ifdef GGML_USE_CPU_HBM
|
| features.push_back({ "CPU_HBM", "1" });
|
| #endif
|
| #ifdef GGML_USE_OPENMP
|
| features.push_back({ "OPENMP", "1" });
|
| #endif
|
| #ifdef GGML_USE_CPU_KLEIDIAI
|
| features.push_back({ "KLEIDIAI", "1" });
|
| #endif
|
| #ifdef GGML_USE_CPU_REPACK
|
| features.push_back({ "REPACK", "1" });
|
| #endif
|
|
|
| features.push_back({ nullptr, nullptr });
|
|
|
| return features;
|
| }();
|
|
|
| return features.data();
|
|
|
| GGML_UNUSED(reg);
|
| }
|
|
|
| static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const char * name) {
|
| if (strcmp(name, "ggml_backend_set_n_threads") == 0) {
|
| ggml_backend_set_n_threads_t fct = ggml_backend_cpu_set_n_threads;
|
| return (void *)fct;
|
| }
|
| if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
|
| ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
|
| return (void *)fct;
|
| }
|
| if (strcmp(name, "ggml_backend_get_features") == 0) {
|
| return (void *)ggml_backend_cpu_get_features;
|
| }
|
| if (strcmp(name, "ggml_backend_set_abort_callback") == 0) {
|
| return (void *)ggml_backend_cpu_set_abort_callback;
|
| }
|
| if (strcmp(name, "ggml_backend_cpu_numa_init") == 0) {
|
| return (void *)ggml_numa_init;
|
| }
|
| if (strcmp(name, "ggml_backend_cpu_is_numa") == 0) {
|
| return (void *)ggml_is_numa;
|
| }
|
| if (strcmp(name, "ggml_backend_cpu_set_use_ref") == 0) {
|
| return (void *)ggml_backend_cpu_set_use_ref;
|
| }
|
|
|
|
|
| if (strcmp(name, "ggml_threadpool_new") == 0) {
|
| return (void *)ggml_threadpool_new;
|
| }
|
| if (strcmp(name, "ggml_threadpool_free") == 0) {
|
| return (void *)ggml_threadpool_free;
|
| }
|
| if (strcmp(name, "ggml_backend_cpu_set_threadpool") == 0) {
|
| return (void *)ggml_backend_cpu_set_threadpool;
|
| }
|
|
|
| return NULL;
|
|
|
| GGML_UNUSED(reg);
|
| }
|
|
|
| static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {
|
| ggml_backend_cpu_reg_get_name,
|
| ggml_backend_cpu_reg_get_device_count,
|
| ggml_backend_cpu_reg_get_device,
|
| ggml_backend_cpu_get_proc_address,
|
| };
|
|
|
| ggml_backend_reg_t ggml_backend_cpu_reg(void) {
|
|
|
| ggml_cpu_init();
|
|
|
| static struct ggml_backend_reg ggml_backend_cpu_reg = {
|
| GGML_BACKEND_API_VERSION,
|
| ggml_backend_cpu_reg_i,
|
| NULL,
|
| };
|
|
|
| return &ggml_backend_cpu_reg;
|
| }
|
|
|
| GGML_BACKEND_DL_IMPL(ggml_backend_cpu_reg)
|
|
|