| from __future__ import annotations |
|
|
| import os |
| from ctypes import ( |
| c_bool, |
| c_char_p, |
| c_int, |
| c_uint8, |
| c_uint32, |
| c_float, |
| c_void_p, |
| c_size_t, |
| POINTER, |
| _Pointer, |
| Structure, |
| byref, |
| ) |
| import pathlib |
| from typing import ( |
| Union, |
| NewType, |
| Optional, |
| TYPE_CHECKING, |
| ) |
|
|
| import llama_cpp.llama_cpp as llama_cpp |
|
|
| from llama_cpp._ctypes_extensions import ( |
| load_shared_library, |
| ctypes_function_for_shared_library, |
| ) |
|
|
| if TYPE_CHECKING: |
| from llama_cpp._ctypes_extensions import ( |
| CtypesArray, |
| ) |
|
|
|
|
| |
| _libmtmd_base_name = "mtmd" |
| _libmtmd_override_path = os.environ.get("MTMD_CPP_LIB") |
| _libmtmd_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" if _libmtmd_override_path is None else pathlib.Path() |
|
|
| |
| _libmtmd = load_shared_library(_libmtmd_base_name, _libmtmd_base_path) |
|
|
| ctypes_function = ctypes_function_for_shared_library(_libmtmd) |
|
|
| |
| |
| |
|
|
| |
| mtmd_context_p = NewType("mtmd_context_p", int) |
| mtmd_context_p_ctypes = c_void_p |
|
|
| mtmd_bitmap_p = NewType("mtmd_bitmap_p", int) |
| mtmd_bitmap_p_ctypes = c_void_p |
|
|
| mtmd_image_tokens_p = NewType("mtmd_image_tokens_p", int) |
| mtmd_image_tokens_p_ctypes = c_void_p |
|
|
| mtmd_input_chunk_p = NewType("mtmd_input_chunk_p", int) |
| mtmd_input_chunk_p_ctypes = c_void_p |
|
|
| mtmd_input_chunks_p = NewType("mtmd_input_chunks_p", int) |
| mtmd_input_chunks_p_ctypes = c_void_p |
|
|
| |
| MTMD_INPUT_CHUNK_TYPE_TEXT = 0 |
| MTMD_INPUT_CHUNK_TYPE_IMAGE = 1 |
| MTMD_INPUT_CHUNK_TYPE_AUDIO = 2 |
|
|
| |
| class mtmd_context_params(Structure): |
| _fields_ = [ |
| ("use_gpu", c_bool), |
| ("print_timings", c_bool), |
| ("n_threads", c_int), |
| ("verbosity", c_int), |
| ("image_marker", c_char_p), |
| ("media_marker", c_char_p), |
| ] |
|
|
| class mtmd_input_text(Structure): |
| _fields_ = [ |
| ("text", c_char_p), |
| ("add_special", c_bool), |
| ("parse_special", c_bool), |
| ] |
|
|
| |
| |
| |
|
|
| |
| @ctypes_function("mtmd_default_marker", [], c_char_p) |
| def mtmd_default_marker() -> bytes: |
| ... |
|
|
| |
| @ctypes_function("mtmd_context_params_default", [], mtmd_context_params) |
| def mtmd_context_params_default() -> mtmd_context_params: |
| ... |
|
|
| |
| |
| |
| @ctypes_function( |
| "mtmd_init_from_file", |
| [c_char_p, llama_cpp.llama_model_p_ctypes, mtmd_context_params], |
| mtmd_context_p_ctypes |
| ) |
| def mtmd_init_from_file( |
| mmproj_fname: bytes, |
| text_model: llama_cpp.llama_model_p, |
| ctx_params: mtmd_context_params, |
| /, |
| ) -> Optional[mtmd_context_p]: |
| ... |
|
|
| |
| @ctypes_function("mtmd_free", [mtmd_context_p_ctypes], None) |
| def mtmd_free(ctx: mtmd_context_p, /): |
| ... |
|
|
| |
| @ctypes_function("mtmd_support_vision", [mtmd_context_p_ctypes], c_bool) |
| def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool: |
| ... |
|
|
| |
| @ctypes_function( |
| "mtmd_bitmap_init", |
| [c_uint32, c_uint32, POINTER(c_uint8)], |
| mtmd_bitmap_p_ctypes |
| ) |
| def mtmd_bitmap_init( |
| nx: Union[c_uint32, int], |
| ny: Union[c_uint32, int], |
| data: CtypesArray[c_uint8], |
| /, |
| ) -> Optional[mtmd_bitmap_p]: |
| ... |
|
|
| |
| @ctypes_function("mtmd_bitmap_free", [mtmd_bitmap_p_ctypes], None) |
| def mtmd_bitmap_free(bitmap: mtmd_bitmap_p, /): |
| ... |
|
|
| |
| @ctypes_function("mtmd_input_chunks_init", [], mtmd_input_chunks_p_ctypes) |
| def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: |
| ... |
|
|
| |
| @ctypes_function("mtmd_input_chunks_free", [mtmd_input_chunks_p_ctypes], None) |
| def mtmd_input_chunks_free(chunks: mtmd_input_chunks_p, /): |
| ... |
|
|
| |
| @ctypes_function("mtmd_input_chunks_size", [mtmd_input_chunks_p_ctypes], c_size_t) |
| def mtmd_input_chunks_size(chunks: mtmd_input_chunks_p, /) -> int: |
| ... |
|
|
| |
| @ctypes_function( |
| "mtmd_input_chunks_get", |
| [mtmd_input_chunks_p_ctypes, c_size_t], |
| mtmd_input_chunk_p_ctypes |
| ) |
| def mtmd_input_chunks_get( |
| chunks: mtmd_input_chunks_p, idx: Union[c_size_t, int], / |
| ) -> Optional[mtmd_input_chunk_p]: |
| ... |
|
|
| |
| |
| |
| |
| |
| @ctypes_function( |
| "mtmd_tokenize", |
| [ |
| mtmd_context_p_ctypes, |
| mtmd_input_chunks_p_ctypes, |
| POINTER(mtmd_input_text), |
| POINTER(mtmd_bitmap_p_ctypes), |
| c_size_t, |
| ], |
| c_int, |
| ) |
| def mtmd_tokenize( |
| ctx: mtmd_context_p, |
| output: mtmd_input_chunks_p, |
| text: "_Pointer[mtmd_input_text]", |
| bitmaps: CtypesArray[mtmd_bitmap_p_ctypes], |
| n_bitmaps: Union[c_size_t, int], |
| /, |
| ) -> int: |
| ... |
|
|
| |
| @ctypes_function("mtmd_input_chunk_get_n_tokens", [mtmd_input_chunk_p_ctypes], c_size_t) |
| def mtmd_input_chunk_get_n_tokens(chunk: mtmd_input_chunk_p, /) -> int: |
| ... |
|
|
| |
| @ctypes_function("mtmd_input_chunk_get_type", [mtmd_input_chunk_p_ctypes], c_int) |
| def mtmd_input_chunk_get_type(chunk: mtmd_input_chunk_p, /) -> int: |
| ... |
|
|
| |
| @ctypes_function( |
| "mtmd_input_chunk_get_tokens_text", |
| [mtmd_input_chunk_p_ctypes, POINTER(c_size_t)], |
| POINTER(llama_cpp.llama_token) |
| ) |
| def mtmd_input_chunk_get_tokens_text( |
| chunk: mtmd_input_chunk_p, n_tokens_output: "_Pointer[c_size_t]", / |
| ) -> Optional["_Pointer[llama_cpp.llama_token]"]: |
| ... |
|
|
| |
| |
| |
|
|
| |
| @ctypes_function( |
| "mtmd_helper_bitmap_init_from_buf", |
| [mtmd_context_p_ctypes, POINTER(c_uint8), c_size_t], |
| mtmd_bitmap_p_ctypes |
| ) |
| def mtmd_helper_bitmap_init_from_buf( |
| ctx: mtmd_context_p, |
| buf: CtypesArray[c_uint8], |
| length: Union[c_size_t, int], |
| /, |
| ) -> Optional[mtmd_bitmap_p]: |
| ... |
|
|
| |
| @ctypes_function("mtmd_helper_get_n_tokens", [mtmd_input_chunks_p_ctypes], c_size_t) |
| def mtmd_helper_get_n_tokens(chunks: mtmd_input_chunks_p, /) -> int: |
| ... |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| @ctypes_function( |
| "mtmd_helper_eval_chunk_single", |
| [ |
| mtmd_context_p_ctypes, |
| llama_cpp.llama_context_p_ctypes, |
| mtmd_input_chunk_p_ctypes, |
| llama_cpp.llama_pos, |
| llama_cpp.llama_seq_id, |
| c_int, |
| c_bool, |
| POINTER(llama_cpp.llama_pos), |
| ], |
| c_int, |
| ) |
| def mtmd_helper_eval_chunk_single( |
| ctx: mtmd_context_p, |
| lctx: llama_cpp.llama_context_p, |
| chunk: mtmd_input_chunk_p, |
| n_past: llama_cpp.llama_pos, |
| seq_id: llama_cpp.llama_seq_id, |
| n_batch: Union[c_int, int], |
| logits_last: Union[c_bool, bool], |
| new_n_past: "_Pointer[llama_cpp.llama_pos]", |
| /, |
| ) -> int: |
| ... |
|
|