|
#pragma once |
|
|
|
#include "ggml.h" |
|
#include "traits.h" |
|
#include "ggml-cpu-impl.h" |
|
#include "ggml-impl.h" |
|
#include "simd-mappings.h" |
|
|
|
#ifdef __cplusplus |
|
|
|
#include <utility> |
|
|
|
|
|
|
|
static inline ggml_fp16_t f32_to_f16(float x) { |
|
return GGML_CPU_FP32_TO_FP16(x); |
|
} |
|
|
|
static inline float f16_to_f32(ggml_fp16_t x) { |
|
return GGML_CPU_FP16_TO_FP32(x); |
|
} |
|
|
|
static inline ggml_bf16_t f32_to_bf16(float x) { |
|
return GGML_FP32_TO_BF16(x); |
|
} |
|
|
|
static inline float bf16_to_f32(ggml_bf16_t x) { |
|
return GGML_BF16_TO_FP32(x); |
|
} |
|
|
|
static inline float f32_to_f32(float x) { |
|
return x; |
|
} |
|
|
|
|
|
template <class T> |
|
struct type_conversion_table; |
|
|
|
template <> |
|
struct type_conversion_table<ggml_fp16_t> { |
|
static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32; |
|
static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16; |
|
}; |
|
|
|
template <> |
|
struct type_conversion_table<float> { |
|
static constexpr float (*to_f32)(float) = f32_to_f32; |
|
static constexpr float (*from_f32)(float) = f32_to_f32; |
|
}; |
|
|
|
template <> |
|
struct type_conversion_table<ggml_bf16_t> { |
|
static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32; |
|
static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16; |
|
}; |
|
|
|
static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) { |
|
const int64_t ith = params->ith; |
|
const int64_t nth = params->nth; |
|
|
|
const int64_t nr = ggml_nrows(src0); |
|
|
|
|
|
const int64_t dr = (nr + nth - 1)/nth; |
|
|
|
|
|
const int64_t ir0 = dr*ith; |
|
const int64_t ir1 = MIN(ir0 + dr, nr); |
|
|
|
return {ir0, ir1}; |
|
} |
|
|
|
#endif |
|
|