llama.cpp/ggml/src/ggml-cpu/common.h · kaisser/LLM-Maroc at main

LLM-Maroc / llama.cpp /ggml /src /ggml-cpu /common.h

Upload folder using huggingface_hub

305a42c verified 3 months ago

1.85 kB

	#pragma once

	#include "ggml.h"
	#include "traits.h"
	#include "ggml-cpu-impl.h"
	#include "ggml-impl.h"
	#include "simd-mappings.h"

	#ifdef __cplusplus

	#include <utility>

	// convenience functions/macros for use in template calls
	// note: these won't be required after the 'traits' lookup table is used.
	static inline ggml_fp16_t f32_to_f16(float x) {
	return GGML_CPU_FP32_TO_FP16(x);
	}

	static inline float f16_to_f32(ggml_fp16_t x) {
	return GGML_CPU_FP16_TO_FP32(x);
	}

	static inline ggml_bf16_t f32_to_bf16(float x) {
	return GGML_FP32_TO_BF16(x);
	}

	static inline float bf16_to_f32(ggml_bf16_t x) {
	return GGML_BF16_TO_FP32(x);
	}

	static inline float f32_to_f32(float x) {
	return x;
	}

	// TODO - merge this into the traits table, after using row-based conversions
	template <class T>
	struct type_conversion_table;

	template <>
	struct type_conversion_table<ggml_fp16_t> {
	static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
	static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
	};

	template <>
	struct type_conversion_table<float> {
	static constexpr float (*to_f32)(float) = f32_to_f32;
	static constexpr float (*from_f32)(float) = f32_to_f32;
	};

	template <>
	struct type_conversion_table<ggml_bf16_t> {
	static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
	static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
	};

	static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
	const int64_t ith = params->ith;
	const int64_t nth = params->nth;

	const int64_t nr = ggml_nrows(src0);

	// rows per thread
	const int64_t dr = (nr + nth - 1)/nth;

	// row range for this thread
	const int64_t ir0 = dr*ith;
	const int64_t ir1 = MIN(ir0 + dr, nr);

	return {ir0, ir1};
	}

	#endif