| | #ifndef MTMD_H |
| | #define MTMD_H |
| |
|
| | #include "ggml.h" |
| | #include "llama.h" |
| |
|
| | #include <stddef.h> |
| | #include <stdint.h> |
| | #include <stdbool.h> |
| |
|
| | #ifdef __cplusplus |
| | #include <string> |
| | #include <vector> |
| | #include <cinttypes> |
| | #include <memory> |
| | #endif |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifdef LLAMA_SHARED |
| | # if defined(_WIN32) && !defined(__MINGW32__) |
| | # ifdef LLAMA_BUILD |
| | # define MTMD_API __declspec(dllexport) |
| | # else |
| | # define MTMD_API __declspec(dllimport) |
| | # endif |
| | # else |
| | # define MTMD_API __attribute__ ((visibility ("default"))) |
| | # endif |
| | #else |
| | # define MTMD_API |
| | #endif |
| |
|
| | |
| | #define MTMD_DEFAULT_IMAGE_MARKER "<__image__>" |
| |
|
| | #ifdef __cplusplus |
| | extern "C" { |
| | #endif |
| |
|
| | enum mtmd_input_chunk_type { |
| | MTMD_INPUT_CHUNK_TYPE_TEXT, |
| | MTMD_INPUT_CHUNK_TYPE_IMAGE, |
| | MTMD_INPUT_CHUNK_TYPE_AUDIO, |
| | }; |
| |
|
| | |
| | struct mtmd_context; |
| | struct mtmd_bitmap; |
| | struct mtmd_image_tokens; |
| | struct mtmd_input_chunk; |
| | struct mtmd_input_chunks; |
| |
|
| | struct mtmd_input_text { |
| | const char * text; |
| | bool add_special; |
| | bool parse_special; |
| | }; |
| |
|
| | |
| | |
| | |
| |
|
| | typedef struct mtmd_context mtmd_context; |
| | typedef struct mtmd_bitmap mtmd_bitmap; |
| | typedef struct mtmd_image_tokens mtmd_image_tokens; |
| | typedef struct mtmd_input_chunk mtmd_input_chunk; |
| | typedef struct mtmd_input_chunks mtmd_input_chunks; |
| | typedef struct mtmd_input_text mtmd_input_text; |
| |
|
| | struct mtmd_context_params { |
| | bool use_gpu; |
| | bool print_timings; |
| | int n_threads; |
| | enum ggml_log_level verbosity; |
| | const char * image_marker; |
| | const char * media_marker; |
| | }; |
| |
|
| | MTMD_API const char * mtmd_default_marker(void); |
| |
|
| | MTMD_API struct mtmd_context_params mtmd_context_params_default(void); |
| |
|
| | |
| | |
| | MTMD_API mtmd_context * mtmd_init_from_file(const char * mmproj_fname, |
| | const struct llama_model * text_model, |
| | const struct mtmd_context_params ctx_params); |
| |
|
| | MTMD_API void mtmd_free(mtmd_context * ctx); |
| |
|
| | |
| | MTMD_API bool mtmd_decode_use_non_causal(mtmd_context * ctx); |
| |
|
| | |
| | MTMD_API bool mtmd_decode_use_mrope(mtmd_context * ctx); |
| |
|
| | |
| | MTMD_API bool mtmd_support_vision(mtmd_context * ctx); |
| |
|
| | |
| | MTMD_API bool mtmd_support_audio(mtmd_context * ctx); |
| |
|
| | |
| | |
| | MTMD_API int mtmd_get_audio_bitrate(mtmd_context * ctx); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | MTMD_API mtmd_bitmap * mtmd_bitmap_init (uint32_t nx, uint32_t ny, const unsigned char * data); |
| | MTMD_API mtmd_bitmap * mtmd_bitmap_init_from_audio(size_t n_samples, const float * data); |
| | MTMD_API uint32_t mtmd_bitmap_get_nx (const mtmd_bitmap * bitmap); |
| | MTMD_API uint32_t mtmd_bitmap_get_ny (const mtmd_bitmap * bitmap); |
| | MTMD_API const unsigned char * mtmd_bitmap_get_data (const mtmd_bitmap * bitmap); |
| | MTMD_API size_t mtmd_bitmap_get_n_bytes(const mtmd_bitmap * bitmap); |
| | MTMD_API bool mtmd_bitmap_is_audio (const mtmd_bitmap * bitmap); |
| | MTMD_API void mtmd_bitmap_free (mtmd_bitmap * bitmap); |
| | |
| | |
| | MTMD_API const char * mtmd_bitmap_get_id(const mtmd_bitmap * bitmap); |
| | MTMD_API void mtmd_bitmap_set_id(mtmd_bitmap * bitmap, const char * id); |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | MTMD_API mtmd_input_chunks * mtmd_input_chunks_init(void); |
| | MTMD_API size_t mtmd_input_chunks_size(const mtmd_input_chunks * chunks); |
| | MTMD_API const mtmd_input_chunk * mtmd_input_chunks_get (const mtmd_input_chunks * chunks, size_t idx); |
| | MTMD_API void mtmd_input_chunks_free(mtmd_input_chunks * chunks); |
| |
|
| | |
| | |
| | |
| | |
| | MTMD_API enum mtmd_input_chunk_type mtmd_input_chunk_get_type (const mtmd_input_chunk * chunk); |
| | MTMD_API const llama_token * mtmd_input_chunk_get_tokens_text (const mtmd_input_chunk * chunk, size_t * n_tokens_output); |
| | MTMD_API const mtmd_image_tokens * mtmd_input_chunk_get_tokens_image(const mtmd_input_chunk * chunk); |
| | MTMD_API size_t mtmd_input_chunk_get_n_tokens (const mtmd_input_chunk * chunk); |
| | |
| | MTMD_API const char * mtmd_input_chunk_get_id (const mtmd_input_chunk * chunk); |
| | |
| | MTMD_API llama_pos mtmd_input_chunk_get_n_pos (const mtmd_input_chunk * chunk); |
| |
|
| | |
| | |
| | |
| | MTMD_API mtmd_input_chunk * mtmd_input_chunk_copy(const mtmd_input_chunk * chunk); |
| | MTMD_API void mtmd_input_chunk_free(mtmd_input_chunk * chunk); |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | MTMD_API size_t mtmd_image_tokens_get_n_tokens(const mtmd_image_tokens * image_tokens); |
| | MTMD_API size_t mtmd_image_tokens_get_nx (const mtmd_image_tokens * image_tokens); |
| | MTMD_API size_t mtmd_image_tokens_get_ny (const mtmd_image_tokens * image_tokens); |
| | MTMD_API const char * mtmd_image_tokens_get_id (const mtmd_image_tokens * image_tokens); |
| | |
| | MTMD_API llama_pos mtmd_image_tokens_get_n_pos (const mtmd_image_tokens * image_tokens); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx, |
| | mtmd_input_chunks * output, |
| | const mtmd_input_text * text, |
| | const mtmd_bitmap ** bitmaps, |
| | size_t n_bitmaps); |
| |
|
| | |
| | |
| | MTMD_API int32_t mtmd_encode(mtmd_context * ctx, |
| | const mtmd_image_tokens * image_tokens); |
| |
|
| | |
| | MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx, |
| | const mtmd_input_chunk * chunk); |
| |
|
| | |
| | |
| | |
| | MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx); |
| |
|
| | |
| |
|
| | |
| | MTMD_API mtmd_input_chunks * mtmd_test_create_input_chunks(void); |
| |
|
| | #ifdef __cplusplus |
| | } |
| | #endif |
| |
|
| | |
| | |
| | |
| |
|
| | #ifdef __cplusplus |
| |
|
| | namespace mtmd { |
| |
|
| | struct mtmd_context_deleter { |
| | void operator()(mtmd_context * val) { mtmd_free(val); } |
| | }; |
| | using context_ptr = std::unique_ptr<mtmd_context, mtmd_context_deleter>; |
| |
|
| | struct mtmd_bitmap_deleter { |
| | void operator()(mtmd_bitmap * val) { mtmd_bitmap_free(val); } |
| | }; |
| | using bitmap_ptr = std::unique_ptr<mtmd_bitmap, mtmd_bitmap_deleter>; |
| |
|
| | struct mtmd_input_chunks_deleter { |
| | void operator()(mtmd_input_chunks * val) { mtmd_input_chunks_free(val); } |
| | }; |
| | using input_chunks_ptr = std::unique_ptr<mtmd_input_chunks, mtmd_input_chunks_deleter>; |
| |
|
| | struct mtmd_input_chunk_deleter { |
| | void operator()(mtmd_input_chunk * val) { mtmd_input_chunk_free(val); } |
| | }; |
| | using input_chunk_ptr = std::unique_ptr<mtmd_input_chunk, mtmd_input_chunk_deleter>; |
| |
|
| | struct bitmap { |
| | bitmap_ptr ptr; |
| | bitmap() : ptr(nullptr) {} |
| | bitmap(mtmd_bitmap * bitmap) : ptr(bitmap) {} |
| | bitmap(bitmap && other) noexcept : ptr(std::move(other.ptr)) {} |
| | bitmap(uint32_t nx, uint32_t ny, const unsigned char * data) { |
| | ptr.reset(mtmd_bitmap_init(nx, ny, data)); |
| | } |
| | ~bitmap() = default; |
| | uint32_t nx() { return mtmd_bitmap_get_nx(ptr.get()); } |
| | uint32_t ny() { return mtmd_bitmap_get_ny(ptr.get()); } |
| | const unsigned char * data() { return mtmd_bitmap_get_data(ptr.get()); } |
| | size_t n_bytes() { return mtmd_bitmap_get_n_bytes(ptr.get()); } |
| | std::string id() { return mtmd_bitmap_get_id(ptr.get()); } |
| | void set_id(const char * id) { mtmd_bitmap_set_id(ptr.get(), id); } |
| | }; |
| |
|
| | struct bitmaps { |
| | std::vector<bitmap> entries; |
| | ~bitmaps() = default; |
| | |
| | |
| | |
| | |
| | std::vector<const mtmd_bitmap *> c_ptr() { |
| | std::vector<const mtmd_bitmap *> res(entries.size()); |
| | for (size_t i = 0; i < entries.size(); i++) { |
| | res[i] = entries[i].ptr.get(); |
| | } |
| | return res; |
| | } |
| | }; |
| |
|
| | struct input_chunks { |
| | input_chunks_ptr ptr; |
| | input_chunks() = default; |
| | input_chunks(mtmd_input_chunks * chunks) : ptr(chunks) {} |
| | ~input_chunks() = default; |
| | size_t size() { return mtmd_input_chunks_size(ptr.get()); } |
| | const mtmd_input_chunk * operator[](size_t idx) { |
| | return mtmd_input_chunks_get(ptr.get(), idx); |
| | } |
| | }; |
| |
|
| | } |
| |
|
| | #endif |
| |
|
| | #endif |
| |
|