diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..26ce55d8a3484215bbaceae2f28f93d359c5965a Binary files /dev/null and b/.DS_Store differ diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..3834cade1623ec27666ab244445f34e76858217c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/kompute/docs/images/komputer-2.gif filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/kompute/docs/images/komputer-godot-4.gif filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/kompute/docs/images/komputer-logos.gif filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/kompute/examples/android/android-simple/app/src/main/assets/komputer-2.gif filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-gpt2.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-stablelm.gguf filter=lfs diff=lfs merge=lfs -text +llama-cpp-python/vendor/llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text +qdrant_storage/collections/vector_db/0/wal/open-1 filter=lfs diff=lfs merge=lfs -text +qdrant_storage/collections/vector_db/0/wal/open-2 filter=lfs diff=lfs merge=lfs -text diff --git a/BioMistral-7B-GGUF/.DS_Store b/BioMistral-7B-GGUF/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4d454ff73e28e9c17738b48117aef414e024ca25 Binary files /dev/null and b/BioMistral-7B-GGUF/.DS_Store differ diff --git a/BioMistral-7B-GGUF/.gitattributes b/BioMistral-7B-GGUF/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..7c03df66012d1c3bdf24f4b78e3fa07ca5886663 --- /dev/null +++ b/BioMistral-7B-GGUF/.gitattributes @@ -0,0 +1,45 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +BioMistral-7B.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/BioMistral-7B-GGUF/README.md b/BioMistral-7B-GGUF/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c78f5de0fbde2232f6517765704d827cda52d69 --- /dev/null +++ b/BioMistral-7B-GGUF/README.md @@ -0,0 +1,238 @@ +--- +tags: +- quantized +- 2-bit +- 3-bit +- 4-bit +- 5-bit +- 6-bit +- 8-bit +- GGUF +- transformers +- pytorch +- tensorboard +- mistral +- text-generation +- medical +- biology +- conversational +- fr +- en +- de +- nl +- es +- pt +- pl +- ro +- it +- dataset:pubmed +- arxiv:2402.10373 +- license:apache-2.0 +- autotrain_compatible +- endpoints_compatible +- text-generation-inference +- region:us +- text-generation +model_name: BioMistral-7B-GGUF +base_model: BioMistral/BioMistral-7B +inference: false +model_creator: BioMistral +pipeline_tag: text-generation +quantized_by: MaziyarPanahi +--- +# [MaziyarPanahi/BioMistral-7B-GGUF](https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF) +- Model creator: [BioMistral](https://huggingface.co/BioMistral) +- Original model: [BioMistral/BioMistral-7B](https://huggingface.co/BioMistral/BioMistral-7B) + +## Description +[MaziyarPanahi/BioMistral-7B-GGUF](https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF) contains GGUF format model files for [BioMistral/BioMistral-7B](https://huggingface.co/BioMistral/BioMistral-7B). + +## How to use +Thanks to [TheBloke](https://huggingface.co/TheBloke) for preparing an amazing README on how to use GGUF models: + +### About GGUF + +GGUF is a new format introduced by the llama.cpp team on August 21st 2023. It is a replacement for GGML, which is no longer supported by llama.cpp. + +Here is an incomplete list of clients and libraries that are known to support GGUF: + +* [llama.cpp](https://github.com/ggerganov/llama.cpp). The source project for GGUF. Offers a CLI and a server option. +* [text-generation-webui](https://github.com/oobabooga/text-generation-webui), the most widely used web UI, with many features and powerful extensions. Supports GPU acceleration. +* [KoboldCpp](https://github.com/LostRuins/koboldcpp), a fully featured web UI, with GPU accel across all platforms and GPU architectures. Especially good for story telling. +* [GPT4All](https://gpt4all.io/index.html), a free and open source local running GUI, supporting Windows, Linux and macOS with full GPU accel. +* [LM Studio](https://lmstudio.ai/), an easy-to-use and powerful local GUI for Windows and macOS (Silicon), with GPU acceleration. Linux available, in beta as of 27/11/2023. +* [LoLLMS Web UI](https://github.com/ParisNeo/lollms-webui), a great web UI with many interesting and unique features, including a full model library for easy model selection. +* [Faraday.dev](https://faraday.dev/), an attractive and easy to use character-based chat GUI for Windows and macOS (both Silicon and Intel), with GPU acceleration. +* [llama-cpp-python](https://github.com/abetlen/llama-cpp-python), a Python library with GPU accel, LangChain support, and OpenAI-compatible API server. +* [candle](https://github.com/huggingface/candle), a Rust ML framework with a focus on performance, including GPU support, and ease of use. +* [ctransformers](https://github.com/marella/ctransformers), a Python library with GPU accel, LangChain support, and OpenAI-compatible AI server. Note, as of time of writing (November 27th 2023), ctransformers has not been updated in a long time and does not support many recent models. + +### Explanation of quantisation methods + +
+ Click to see details + +The new methods available are: + +* GGML_TYPE_Q2_K - "type-1" 2-bit quantization in super-blocks containing 16 blocks, each block having 16 weight. Block scales and mins are quantized with 4 bits. This ends up effectively using 2.5625 bits per weight (bpw) +* GGML_TYPE_Q3_K - "type-0" 3-bit quantization in super-blocks containing 16 blocks, each block having 16 weights. Scales are quantized with 6 bits. This end up using 3.4375 bpw. +* GGML_TYPE_Q4_K - "type-1" 4-bit quantization in super-blocks containing 8 blocks, each block having 32 weights. Scales and mins are quantized with 6 bits. This ends up using 4.5 bpw. +* GGML_TYPE_Q5_K - "type-1" 5-bit quantization. Same super-block structure as GGML_TYPE_Q4_K resulting in 5.5 bpw +* GGML_TYPE_Q6_K - "type-0" 6-bit quantization. Super-blocks with 16 blocks, each block having 16 weights. Scales are quantized with 8 bits. This ends up using 6.5625 bpw + +## How to download GGUF files + +**Note for manual downloaders:** You almost never want to clone the entire repo! Multiple different quantisation formats are provided, and most users only want to pick and download a single file. + +The following clients/libraries will automatically download models for you, providing a list of available models to choose from: + +* LM Studio +* LoLLMS Web UI +* Faraday.dev + +### In `text-generation-webui` + +Under Download Model, you can enter the model repo: [MaziyarPanahi/BioMistral-7B-GGUF](https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF) and below it, a specific filename to download, such as: BioMistral-7B-GGUF.Q4_K_M.gguf. + +Then click Download. + +### On the command line, including multiple files at once + +I recommend using the `huggingface-hub` Python library: + +```shell +pip3 install huggingface-hub +``` + +Then you can download any individual model file to the current directory, at high speed, with a command like this: + +```shell +huggingface-cli download MaziyarPanahi/BioMistral-7B-GGUF BioMistral-7B-GGUF.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False +``` +
+
+ More advanced huggingface-cli download usage (click to read) + +You can also download multiple files at once with a pattern: + +```shell +huggingface-cli download [MaziyarPanahi/BioMistral-7B-GGUF](https://huggingface.co/MaziyarPanahi/BioMistral-7B-GGUF) --local-dir . --local-dir-use-symlinks False --include='*Q4_K*gguf' +``` + +For more documentation on downloading with `huggingface-cli`, please see: [HF -> Hub Python Library -> Download files -> Download from the CLI](https://huggingface.co/docs/huggingface_hub/guides/download#download-from-the-cli). + +To accelerate downloads on fast connections (1Gbit/s or higher), install `hf_transfer`: + +```shell +pip3 install hf_transfer +``` + +And set environment variable `HF_HUB_ENABLE_HF_TRANSFER` to `1`: + +```shell +HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download MaziyarPanahi/BioMistral-7B-GGUF BioMistral-7B-GGUF.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False +``` + +Windows Command Line users: You can set the environment variable by running `set HF_HUB_ENABLE_HF_TRANSFER=1` before the download command. +
+ +## Example `llama.cpp` command + +Make sure you are using `llama.cpp` from commit [d0cee0d](https://github.com/ggerganov/llama.cpp/commit/d0cee0d36d5be95a0d9088b674dbb27354107221) or later. + +```shell +./main -ngl 35 -m BioMistral-7B-GGUF.Q4_K_M.gguf --color -c 32768 --temp 0.7 --repeat_penalty 1.1 -n -1 -p "<|im_start|>system +{system_message}<|im_end|> +<|im_start|>user +{prompt}<|im_end|> +<|im_start|>assistant" +``` + +Change `-ngl 32` to the number of layers to offload to GPU. Remove it if you don't have GPU acceleration. + +Change `-c 32768` to the desired sequence length. For extended sequence models - eg 8K, 16K, 32K - the necessary RoPE scaling parameters are read from the GGUF file and set by llama.cpp automatically. Note that longer sequence lengths require much more resources, so you may need to reduce this value. + +If you want to have a chat-style conversation, replace the `-p ` argument with `-i -ins` + +For other parameters and how to use them, please refer to [the llama.cpp documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/main/README.md) + +## How to run in `text-generation-webui` + +Further instructions can be found in the text-generation-webui documentation, here: [text-generation-webui/docs/04 ‐ Model Tab.md](https://github.com/oobabooga/text-generation-webui/blob/main/docs/04%20%E2%80%90%20Model%20Tab.md#llamacpp). + +## How to run from Python code + +You can use GGUF models from Python using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) or [ctransformers](https://github.com/marella/ctransformers) libraries. Note that at the time of writing (Nov 27th 2023), ctransformers has not been updated for some time and is not compatible with some recent models. Therefore I recommend you use llama-cpp-python. + +### How to load this model in Python code, using llama-cpp-python + +For full documentation, please see: [llama-cpp-python docs](https://abetlen.github.io/llama-cpp-python/). + +#### First install the package + +Run one of the following commands, according to your system: + +```shell +# Base ctransformers with no GPU acceleration +pip install llama-cpp-python +# With NVidia CUDA acceleration +CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python +# Or with OpenBLAS acceleration +CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python +# Or with CLBLast acceleration +CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python +# Or with AMD ROCm GPU acceleration (Linux only) +CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python +# Or with Metal GPU acceleration for macOS systems only +CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python + +# In windows, to set the variables CMAKE_ARGS in PowerShell, follow this format; eg for NVidia CUDA: +$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on" +pip install llama-cpp-python +``` + +#### Simple llama-cpp-python example code + +```python +from llama_cpp import Llama + +# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system. +llm = Llama( + model_path="./BioMistral-7B-GGUF.Q4_K_M.gguf", # Download the model file first + n_ctx=32768, # The max sequence length to use - note that longer sequence lengths require much more resources + n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance + n_gpu_layers=35 # The number of layers to offload to GPU, if you have GPU acceleration available +) + +# Simple inference example +output = llm( + "<|im_start|>system +{system_message}<|im_end|> +<|im_start|>user +{prompt}<|im_end|> +<|im_start|>assistant", # Prompt + max_tokens=512, # Generate up to 512 tokens + stop=[""], # Example stop token - not necessarily correct for this specific model! Please check before using. + echo=True # Whether to echo the prompt +) + +# Chat Completion API + +llm = Llama(model_path="./BioMistral-7B-GGUF.Q4_K_M.gguf", chat_format="llama-2") # Set chat_format according to the model you are using +llm.create_chat_completion( + messages = [ + {"role": "system", "content": "You are a story writing assistant."}, + { + "role": "user", + "content": "Write a story about llamas." + } + ] +) +``` + +## How to use with LangChain + +Here are guides on using llama-cpp-python and ctransformers with LangChain: + +* [LangChain + llama-cpp-python](https://python.langchain.com/docs/integrations/llms/llamacpp) +* [LangChain + ctransformers](https://python.langchain.com/docs/integrations/providers/ctransformers) \ No newline at end of file diff --git a/BioMistral-7B-GGUF/config.json b/BioMistral-7B-GGUF/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9f0f76f8d1de0ed9b8adf494540f149aaef5f07c --- /dev/null +++ b/BioMistral-7B-GGUF/config.json @@ -0,0 +1,3 @@ +{ + "model_type": "mistral" +} \ No newline at end of file diff --git a/BioMistral-7B.Q4_K_M.gguf b/BioMistral-7B.Q4_K_M.gguf new file mode 100644 index 0000000000000000000000000000000000000000..fb663c6893fb980d5319b039e8d3ac0a6e670d1e --- /dev/null +++ b/BioMistral-7B.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a73107045dfe7e3f113b392b0a67e3e6ca9fa9dae2abe301424ce5abd1721a6 +size 4368439424 diff --git a/README.md b/README.md index 1a84a851b221aab76fc309357832143a7c076c1c..12ec722869a6e40989fb516cae5aa2329a752e2d 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ --- -title: BioMistral Gradio -emoji: 📚 -colorFrom: red -colorTo: yellow +title: BioMistral_gradio +app_file: app.py sdk: gradio sdk_version: 4.29.0 -app_file: app.py -pinned: false --- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/__pycache__/app.cpython-39.pyc b/__pycache__/app.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..536a3b365dfba5739c6c577a4dd4d45a968d51c2 Binary files /dev/null and b/__pycache__/app.cpython-39.pyc differ diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..26f0f39623cef222b1782e16ed0ece3f8f28d500 --- /dev/null +++ b/app.py @@ -0,0 +1 @@ + from langchain import PromptTemplate from langchain_community.llms import LlamaCpp from langchain.chains import RetrievalQA from langchain.chains import ConversationalRetrievalChain from langchain.prompts import SystemMessagePromptTemplate from langchain_community.embeddings import SentenceTransformerEmbeddings from fastapi import FastAPI, Request, Form, Response from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates from fastapi.staticfiles import StaticFiles from fastapi.encoders import jsonable_encoder from qdrant_client import QdrantClient from langchain_community.vectorstores import Qdrant import os import json import gradio as gr import sys #sys.path.insert(0, ). local_llm = "BioMistral-7B.Q4_K_M.gguf" llm = LlamaCpp(model_path= local_llm,temperature=0.3,max_tokens=2048,top_p=1,n_ctx= 2048) prompt_template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Chat History: {chat_history} Question: {question} Only return the helpful answer. Answer must be detailed and well explained. Helpful answer: """ embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") url = "http://localhost:6333" client = QdrantClient(url=url, prefer_grpc=False) db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db") retriever = db.as_retriever(search_kwargs={"k":1}) chat_history = [] # Create the custom chain if llm is not None and db is not None: chain = ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever) else: print("LLM or Vector Database not initialized") def predict(message, history): history_langchain_format = [] prompt = PromptTemplate(template=prompt_template, input_variables=["chat_history", 'message']) response = chain({"question": message, "chat_history": chat_history}) answer = response['answer'] chat_history.append((message, answer)) temp = [] for input_question, bot_answer in history: temp.append(input_question) temp.append(bot_answer) history_langchain_format.append(temp) temp.clear() temp.append(message) temp.append(answer) history_langchain_format.append(temp) return answer gr.ChatInterface(predict).launch(share=True) \ No newline at end of file diff --git a/data/10.1177_1557988318780857.pdf b/data/10.1177_1557988318780857.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3159544a6801e6a1201327b35ca52d57a0e03cc4 --- /dev/null +++ b/data/10.1177_1557988318780857.pdf @@ -0,0 +1,5468 @@ +%PDF-1.4 % +1 0 obj <>>> endobj 2 0 obj <>stream + + + + + uuid:045e0726-783f-400e-81ea-8c49428bdb2c + xmp.did:0877EB9F9C59DF11A1EBA9B605CE80B2 + adobe:docid:indd:61020d5a-531d-11de-9aa3-f47368b35948 + proof:pdf + + xmp.iid:0777EB9F9C59DF11A1EBA9B605CE80B2 + xmp.did:94BE82176957DF11B542CA46BCB5136B + adobe:docid:indd:61020d5a-531d-11de-9aa3-f47368b35948 + default + + + + + saved + xmp.iid:9FB4E5792453DE11A7B5AFBAD2547E18 + 2009-06-07T11:01:39+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A0B4E5792453DE11A7B5AFBAD2547E18 + 2009-06-07T11:01:39+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:7512E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T11:39:10+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7712E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T11:44:28+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7812E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T11:50:07+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:7912E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T11:50:07+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:7A12E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T12:07:20+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7B12E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T12:41:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7C12E5942853DE11A7B5AFBAD2547E18 + 2009-06-07T12:42:59+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:18A50C343353DE11A7B5AFBAD2547E18 + 2009-06-07T12:47:04+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:19A50C343353DE11A7B5AFBAD2547E18 + 2009-06-07T13:01:21+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:02A88F58BA55DE11AC61FCC07213CABC + 2009-06-10T17:59:30+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:03A88F58BA55DE11AC61FCC07213CABC + 2009-06-10T18:00:44+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:04A88F58BA55DE11AC61FCC07213CABC + 2009-06-10T18:00:44+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:05A88F58BA55DE11AC61FCC07213CABC + 2009-06-10T18:03:50+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0CA88F58BA55DE11AC61FCC07213CABC + 2009-06-10T18:23:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3A1FDE4BBF55DE11AC61FCC07213CABC + 2009-06-10T18:34:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3B1FDE4BBF55DE11AC61FCC07213CABC + 2009-06-10T18:42:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3C1FDE4BBF55DE11AC61FCC07213CABC + 2009-06-10T18:47:09+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1743EC08DB55DE118224BEA7DFF6EC94 + 2009-06-10T21:53:30+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:1843EC08DB55DE118224BEA7DFF6EC94 + 2009-06-10T21:53:30+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:C1B875345656DE1189BAD4E2A7FE97BE + 2009-06-11T12:35:11+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C2B875345656DE1189BAD4E2A7FE97BE + 2009-06-11T12:35:11+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:D5BD205E7959DE11965BEAB5465F1AFC + 2009-06-15T12:24:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D6BD205E7959DE11965BEAB5465F1AFC + 2009-06-15T13:05:28+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D7BD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:07:29+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D8BD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:07:48+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:D9BD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:07:48+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:DABD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:10:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DBBD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:13:44+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DCBD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:30:59+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DDBD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:32:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DEBD205E7959DE11965BEAB5465F1AFC + 2009-06-15T14:40:40+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A6A577708D59DE11965BEAB5465F1AFC + 2009-06-15T14:48:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A7A577708D59DE11965BEAB5465F1AFC + 2009-06-15T14:48:08+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:D70F5AF1A959DE11965681505D58AD45 + 2009-06-15T18:18:31+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D80F5AF1A959DE11965681505D58AD45 + 2009-06-15T18:24:10+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D90F5AF1A959DE11965681505D58AD45 + 2009-06-15T18:26:14+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DA0F5AF1A959DE11965681505D58AD45 + 2009-06-15T18:26:14+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:178FD99E2C5ADE11A0D7A2F2BACDC93E + 2009-06-16T10:03:15+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:188FD99E2C5ADE11A0D7A2F2BACDC93E + 2009-06-16T10:03:15+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:198FD99E2C5ADE11A0D7A2F2BACDC93E + 2009-06-16T10:03:47+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1C8FD99E2C5ADE11A0D7A2F2BACDC93E + 2009-06-16T10:11:31+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D547143A305ADE11A0D7A2F2BACDC93E + 2009-06-16T11:30:25+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:29C2E9866E5ADE11ABF7C8DAEFF9331A + 2009-06-16T18:04:47+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:66FDCDC7F75ADE11A2EEAA979F1448A4 + 2009-06-17T10:06:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:67FDCDC7F75ADE11A2EEAA979F1448A4 + 2009-06-17T10:06:42+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:68FDCDC7F75ADE11A2EEAA979F1448A4 + 2009-06-17T10:07:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:69FDCDC7F75ADE11A2EEAA979F1448A4 + 2009-06-17T10:07:55+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:6AFDCDC7F75ADE11A2EEAA979F1448A4 + 2009-06-17T10:07:55+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:FBE636FEF85ADE11A2EEAA979F1448A4 + 2009-06-17T10:43:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FEE636FEF85ADE11A2EEAA979F1448A4 + 2009-06-17T10:55:24+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FFE636FEF85ADE11A2EEAA979F1448A4 + 2009-06-17T11:03:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:98355BA7005BDE11A2EEAA979F1448A4 + 2009-06-17T11:08:35+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:99355BA7005BDE11A2EEAA979F1448A4 + 2009-06-17T11:09:47+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:9A355BA7005BDE11A2EEAA979F1448A4 + 2009-06-17T11:09:47+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:391CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:42:22+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3E1CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:45:15+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3F1CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:45:15+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:401CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:48:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:411CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:48:56+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:421CDD5B055BDE11A2EEAA979F1448A4 + 2009-06-17T11:49:18+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E00B7AE8065BDE11A2EEAA979F1448A4 + 2009-06-17T11:56:25+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E10B7AE8065BDE11A2EEAA979F1448A4 + 2009-06-17T11:58:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E20B7AE8065BDE11A2EEAA979F1448A4 + 2009-06-17T11:58:17+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E30B7AE8065BDE11A2EEAA979F1448A4 + 2009-06-17T12:01:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E40B7AE8065BDE11A2EEAA979F1448A4 + 2009-06-17T12:01:05+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E66D1FB98967DE11B7C8B2C74D6DEFCA + 2009-07-03T10:46:34+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E76D1FB98967DE11B7C8B2C74D6DEFCA + 2009-07-03T10:46:34+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:7C3663EB9867DE11B7C8B2C74D6DEFCA + 2009-07-03T11:49:38+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:565AC47C9967DE11B7C8B2C74D6DEFCA + 2009-07-03T11:49:38+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:575AC47C9967DE11B7C8B2C74D6DEFCA + 2009-07-03T11:51:20+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:585AC47C9967DE11B7C8B2C74D6DEFCA + 2009-07-03T11:51:20+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:5D5AC47C9967DE11B7C8B2C74D6DEFCA + 2009-07-03T11:53:38+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:5E5AC47C9967DE11B7C8B2C74D6DEFCA + 2009-07-03T11:53:38+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:53EFF8272E7BDE11B11ECFE82DE77C59 + 2009-07-28T09:51:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:54EFF8272E7BDE11B11ECFE82DE77C59 + 2009-07-28T09:51:43+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:57EFF8272E7BDE11B11ECFE82DE77C59 + 2009-07-28T09:54:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:58EFF8272E7BDE11B11ECFE82DE77C59 + 2009-07-28T09:54:08+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:22BEEC775583DE11A21EAB433474642B + 2009-08-07T18:59:33+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:23BEEC775583DE11A21EAB433474642B + 2009-08-07T18:59:33+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:AD8E146C5683DE11A21EAB433474642B + 2009-08-07T19:06:53+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AE8E146C5683DE11A21EAB433474642B + 2009-08-07T19:06:53+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:B38E146C5683DE11A21EAB433474642B + 2009-08-07T19:16:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B48E146C5683DE11A21EAB433474642B + 2009-08-07T19:16:03+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:CC1691BA5883DE11A21EAB433474642B + 2009-08-07T19:23:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CD1691BA5883DE11A21EAB433474642B + 2009-08-07T19:23:08+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:92DBCF4A5A83DE11A21EAB433474642B + 2009-08-07T19:45:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6AC9B7D15C83DE11A21EAB433474642B + 2009-08-07T19:45:54+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:6FC9B7D15C83DE11A21EAB433474642B + 2009-08-07T20:21:12+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:70C9B7D15C83DE11A21EAB433474642B + 2009-08-07T20:21:12+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:00E32F626383DE11A21EAB433474642B + 2009-08-07T20:32:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:01E32F626383DE11A21EAB433474642B + 2009-08-07T20:32:54+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:AD8E819A6483DE11A21EAB433474642B + 2009-08-07T20:49:18+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AE8E819A6483DE11A21EAB433474642B + 2009-08-07T20:49:18+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:AF8E819A6483DE11A21EAB433474642B + 2009-08-07T20:52:09+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B08E819A6483DE11A21EAB433474642B + 2009-08-07T20:52:09+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:F8966E9B6683DE11A21EAB433474642B + 2009-08-07T21:00:24+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F9966E9B6683DE11A21EAB433474642B + 2009-08-07T21:00:24+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:FE966E9B6683DE11A21EAB433474642B + 2009-08-07T21:05:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:84EDA4FF6783DE11A21EAB433474642B + 2009-08-07T21:05:56+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:30C8A947D183DE118149954DA24C9787 + 2009-08-08T10:15:19+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:31C8A947D183DE118149954DA24C9787 + 2009-08-08T10:15:19+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:41C7156CE383DE118149954DA24C9787 + 2009-08-08T12:14:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:42C7156CE383DE118149954DA24C9787 + 2009-08-08T12:14:03+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:A28493F4E683DE118149954DA24C9787 + 2009-08-08T12:20:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A38493F4E683DE118149954DA24C9787 + 2009-08-08T12:23:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:91A66FD30488DE11A49BAF932A0E07A5 + 2009-08-13T18:00:25+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:92A66FD30488DE11A49BAF932A0E07A5 + 2009-08-13T18:00:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A5284F0BA988DE11AC4CA07251F08B71 + 2009-08-14T15:12:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A7284F0BA988DE11AC4CA07251F08B71 + 2009-08-14T15:14:59+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:626F8B6477A7DE1185ED9B98A427A88C + 2009-09-22T18:29:33+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E543016F1FA8DE11AFEEA49B0F976179 + 2009-09-23T14:46:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:4573804285CEDE118962F22B032EDEB9 + 2009-11-11T11:26:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:4673804285CEDE118962F22B032EDEB9 + 2009-11-11T11:30:32+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:2072B1368FCEDE118393B4CAA9D9A147 + 2009-11-11T12:31:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C0A111029BCEDE11A8E4F61063E5B9A7 + 2009-11-11T14:09:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:5D693F41C72ADF11ACCE8D849BE5FC2E + 2010-03-08T21:07:06+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:260CA0F9CF51DF11AFC198A2F75C57B3 + 2010-04-27T13:42:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:270CA0F9CF51DF11AFC198A2F75C57B3 + 2010-04-27T13:42:37+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:280CA0F9CF51DF11AFC198A2F75C57B3 + 2010-04-27T13:42:39+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:536E426DDF51DF118C62D21BAFFFCAB8 + 2010-04-27T14:59:49+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:546E426DDF51DF118C62D21BAFFFCAB8 + 2010-04-27T16:24:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:556E426DDF51DF118C62D21BAFFFCAB8 + 2010-04-27T16:25:46+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D94B63658A52DF11B74AA97A148003EF + 2010-04-28T11:28:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:53185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T15:23:53+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:54185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T15:23:53+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:55185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T15:23:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:56185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T16:03:32+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:57185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T16:03:32+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:58185DF4AB52DF11A6B6E2F7C4742947 + 2010-04-28T16:03:35+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6D312B42B252DF1194A3911A4AFC63F3 + 2010-04-28T16:09:01+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:6E312B42B252DF1194A3911A4AFC63F3 + 2010-04-28T16:09:01+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:6F312B42B252DF1194A3911A4AFC63F3 + 2010-04-28T16:09:04+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E0235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:06+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E1235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:06+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E2235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E3235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:34+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E4235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:34+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:E5235E65B952DF11B23D9DD83050AA6D + 2010-04-28T17:00:38+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:869694448453DF118D3E99BE1A9EA8A1 + 2010-04-29T17:27:11+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:879694448453DF118D3E99BE1A9EA8A1 + 2010-04-29T17:27:11+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:889694448453DF118D3E99BE1A9EA8A1 + 2010-04-29T17:27:13+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B960A6444B54DF118781F27EF6F1D098 + 2010-04-30T16:56:49+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0005E2B75054DF118781F27EF6F1D098 + 2010-04-30T17:35:50+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0505E2B75054DF118781F27EF6F1D098 + 2010-04-30T17:41:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0A05E2B75054DF118781F27EF6F1D098 + 2010-04-30T17:44:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FD0E6A026254DF11923DBB4078C20F56 + 2010-04-30T19:39:36+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FE0E6A026254DF11923DBB4078C20F56 + 2010-04-30T19:39:36+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:FF0E6A026254DF11923DBB4078C20F56 + 2010-04-30T19:39:39+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:90AD6D64A556DF1186C1CC236BDDAD86 + 2010-05-03T16:58:44+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:91AD6D64A556DF1186C1CC236BDDAD86 + 2010-05-03T17:00:42+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:92AD6D64A556DF1186C1CC236BDDAD86 + 2010-05-03T17:00:42+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:93AD6D64A556DF1186C1CC236BDDAD86 + 2010-05-03T17:00:45+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:560CB882A756DF1186C1CC236BDDAD86 + 2010-05-03T17:11:21+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C990849EB056DF119BFE9E60916F26AE + 2010-05-03T18:07:21+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CA90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:07:21+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:CB90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:07:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CC90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:37:34+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CD90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:37:34+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:CE90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:37:36+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CF90849EB056DF119BFE9E60916F26AE + 2010-05-03T18:38:52+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D090849EB056DF119BFE9E60916F26AE + 2010-05-03T18:38:52+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:D190849EB056DF119BFE9E60916F26AE + 2010-05-03T18:38:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D290849EB056DF119BFE9E60916F26AE + 2010-05-03T18:39:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:90DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:39:57+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:91DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:39:59+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:92DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:46:13+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:93DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:46:13+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:94DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:46:15+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:95DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:50:28+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:96DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:55:51+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:97DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:55:51+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:98DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:55:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:99DE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:56:11+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:9ADE452CB556DF119BFE9E60916F26AE + 2010-05-03T18:56:11+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:D8A07F72B756DF119BFE9E60916F26AE + 2010-05-03T18:56:14+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D9A07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:02:14+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DAA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:02:14+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:DBA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:02:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DCA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:03:02+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:DDA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:03:02+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:DEA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:03:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DFA07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:06:51+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E0A07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:06:51+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E1A07F72B756DF119BFE9E60916F26AE + 2010-05-03T19:06:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F710DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:42:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F810DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:42:37+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:F910DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:42:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FA10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:43:58+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:FB10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:43:58+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:FC10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:44:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FD10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:50:18+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:FE10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:50:18+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:FF10DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:50:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0011DE4EC656DF11AFCAB77BC063E4BA + 2010-05-03T20:52:20+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:862CBCAAC756DF11AFCAB77BC063E4BA + 2010-05-03T20:52:20+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:872CBCAAC756DF11AFCAB77BC063E4BA + 2010-05-03T20:52:26+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C526A6963B57DF1183FAD8FD2CE367B2 + 2010-05-04T10:42:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C626A6963B57DF1183FAD8FD2CE367B2 + 2010-05-04T10:45:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C726A6963B57DF1183FAD8FD2CE367B2 + 2010-05-04T10:55:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C826A6963B57DF1183FAD8FD2CE367B2 + 2010-05-04T10:58:28+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CB355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:46+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CC355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:46+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:CD355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:CE355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:55+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:CF355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:55+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:D0355E164957DF11B37C871CE2AB335E + 2010-05-04T12:18:58+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:942C9A806057DF11B542CA46BCB5136B + 2010-05-04T15:10:22+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:952C9A806057DF11B542CA46BCB5136B + 2010-05-04T15:10:22+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:962C9A806057DF11B542CA46BCB5136B + 2010-05-04T15:10:25+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FED10E576157DF11B542CA46BCB5136B + 2010-05-04T15:13:55+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FFD10E576157DF11B542CA46BCB5136B + 2010-05-04T15:13:55+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:00D20E576157DF11B542CA46BCB5136B + 2010-05-04T15:13:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A883D8666257DF11B542CA46BCB5136B + 2010-05-04T15:32:46+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AAADD40F6657DF11B542CA46BCB5136B + 2010-05-04T16:07:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:ABADD40F6657DF11B542CA46BCB5136B + 2010-05-04T16:07:48+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:ACADD40F6657DF11B542CA46BCB5136B + 2010-05-04T16:07:48+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:92BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:07:52+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:93BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:08:49+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:94BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:08:49+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:95BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:08:52+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:96BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:10:12+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:97BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:10:12+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:98BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:10:14+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:99BE82176957DF11B542CA46BCB5136B + 2010-05-04T16:11:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:9ABE82176957DF11B542CA46BCB5136B + 2010-05-04T16:11:23+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:9BBE82176957DF11B542CA46BCB5136B + 2010-05-04T16:11:26+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:0777EB9F9C59DF11A1EBA9B605CE80B2 + 2010-05-07T11:46:54+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:0877EB9F9C59DF11A1EBA9B605CE80B2 + 2010-05-07T11:46:54+05:30 + Adobe InDesign 6.0 + /;/metadata + + + saved + xmp.iid:0977EB9F9C59DF11A1EBA9B605CE80B2 + 2010-05-07T11:46:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7DE4E338A259DF11B7AB831DDBA81713 + 2010-05-07T12:01:51+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7EE4E338A259DF11B7AB831DDBA81713 + 2010-05-07T12:01:51+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:7FE4E338A259DF11B7AB831DDBA81713 + 2010-05-07T12:01:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FFFCD183A059DF11A1EBA9B605CE80B2 + 2010-05-07T12:13:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:85E4E338A259DF11B7AB831DDBA81713 + 2010-05-07T12:42:40+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:86E4E338A259DF11B7AB831DDBA81713 + 2010-05-07T12:42:40+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:181EDBEDA759DF11B7AB831DDBA81713 + 2010-05-07T12:42:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E4322169AA59DF11B7AB831DDBA81713 + 2010-05-07T13:01:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E5322169AA59DF11B7AB831DDBA81713 + 2010-05-07T13:01:43+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E6322169AA59DF11B7AB831DDBA81713 + 2010-05-07T13:01:46+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:015B3FADB459DF11A86BF041EFE99E68 + 2010-05-07T14:13:58+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:025B3FADB459DF11A86BF041EFE99E68 + 2010-05-07T14:13:58+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:035B3FADB459DF11A86BF041EFE99E68 + 2010-05-07T14:14:02+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:893E23ADB559DF11A86BF041EFE99E68 + 2010-05-07T14:34:26+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:8A3E23ADB559DF11A86BF041EFE99E68 + 2010-05-07T14:34:26+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:8B3E23ADB559DF11A86BF041EFE99E68 + 2010-05-07T14:34:29+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:DF662795B459DF11A9F2CF61789B5372 + 2010-05-07T16:16:10+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E0662795B459DF11A9F2CF61789B5372 + 2010-05-07T16:16:10+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E1662795B459DF11A9F2CF61789B5372 + 2010-05-07T16:16:13+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BD722D1CCD59DF11AA30FDD640F49D77 + 2010-05-07T17:26:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BE722D1CCD59DF11AA30FDD640F49D77 + 2010-05-07T17:26:43+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:8858189DCF59DF11AA30FDD640F49D77 + 2010-05-07T17:26:47+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:8C58189DCF59DF11AA30FDD640F49D77 + 2010-05-07T17:35:34+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:8D58189DCF59DF11AA30FDD640F49D77 + 2010-05-07T17:35:34+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:8E58189DCF59DF11AA30FDD640F49D77 + 2010-05-07T17:35:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B808725FD159DF11AA30FDD640F49D77 + 2010-05-07T18:11:13+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B908725FD159DF11AA30FDD640F49D77 + 2010-05-07T18:33:20+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BA08725FD159DF11AA30FDD640F49D77 + 2010-05-07T18:33:20+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:484399EAD859DF11AA30FDD640F49D77 + 2010-05-07T18:33:22+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1058493D015CDF1184508932725B2339 + 2010-05-10T12:50:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:E857C67B045CDF1184508932725B2339 + 2010-05-10T12:50:17+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:E957C67B045CDF1184508932725B2339 + 2010-05-10T12:50:19+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7F422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:55:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:80422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:55:05+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:81422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:55:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:82422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:56:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:83422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:56:56+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:84422D59215CDF11B634AA5972CC18AA + 2010-05-10T16:56:58+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FCA1DF2C345CDF11B634AA5972CC18AA + 2010-05-10T18:43:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:8BD61DA9805DDF11B3EC9115466816A2 + 2010-05-12T10:11:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:8CD61DA9805DDF11B3EC9115466816A2 + 2010-05-12T10:12:58+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:FB0D2C78835DDF11B978A7D498C33CEF + 2010-05-12T10:31:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:010E2C78835DDF11B978A7D498C33CEF + 2010-05-12T10:47:24+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:020E2C78835DDF11B978A7D498C33CEF + 2010-05-12T10:48:29+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:030E2C78835DDF11B978A7D498C33CEF + 2010-05-12T10:50:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:92D61DA9805DDF11B3EC9115466816A2 + 2010-05-12T10:50:59+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:9803CF56865DDF11B3EC9115466816A2 + 2010-05-12T10:55:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:4EF4FBEDF05FDF11B095FF63D6CD5860 + 2010-05-15T12:40:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:4FF4FBEDF05FDF11B095FF63D6CD5860 + 2010-05-15T12:40:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7B554726F56EDF11ADF5942B78FD772D + 2010-06-03T15:18:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7C554726F56EDF11ADF5942B78FD772D + 2010-06-03T15:18:30+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7E554726F56EDF11ADF5942B78FD772D + 2010-06-03T15:21:13+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:374DEAB56970DF1198DADB372725909E + 2010-06-05T11:48:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:384DEAB56970DF1198DADB372725909E + 2010-06-05T11:56:33+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:394DEAB56970DF1198DADB372725909E + 2010-06-05T11:58:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3A4DEAB56970DF1198DADB372725909E + 2010-06-05T11:59:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3B4DEAB56970DF1198DADB372725909E + 2010-06-05T12:04:10+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3C4DEAB56970DF1198DADB372725909E + 2010-06-05T12:05:01+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3D4DEAB56970DF1198DADB372725909E + 2010-06-05T12:07:04+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3E4DEAB56970DF1198DADB372725909E + 2010-06-05T12:07:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BAA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:08:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BBA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:14:25+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BCA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:20:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BDA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:21:35+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BEA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:26:38+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BFA5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:41+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:C0A5E5EC6C70DF1198DADB372725909E + 2010-06-05T12:44:09+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:EFB25F71C773DF119697F5C0501308BD + 2010-06-09T19:03:50+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F0B25F71C773DF119697F5C0501308BD + 2010-06-09T19:05:04+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F1B25F71C773DF119697F5C0501308BD + 2010-06-09T19:05:04+05:30 + Adobe InDesign 6.0 + /metadata + + + saved + xmp.iid:F2B25F71C773DF119697F5C0501308BD + 2010-06-09T19:05:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F3B25F71C773DF119697F5C0501308BD + 2010-06-09T19:05:18+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:92A876457577DF11B2FE8F1383E7AC81 + 2010-06-14T10:59:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7244DA139277DF11B2FE8F1383E7AC81 + 2010-06-14T14:44:53+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1AD1B2195E78DF118548DA14F9A7FB6E + 2010-06-15T14:45:45+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:275899986278DF118548DA14F9A7FB6E + 2010-06-15T16:32:49+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6D6FD05A0479DF11B99B98D4021FC27B + 2010-06-16T10:38:17+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6E6FD05A0479DF11B99B98D4021FC27B + 2010-06-16T10:39:54+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:706FD05A0479DF11B99B98D4021FC27B + 2010-06-16T10:41:30+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B8840EEF0679DF11B99B98D4021FC27B + 2010-06-16T10:50:53+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:B9840EEF0679DF11B99B98D4021FC27B + 2010-06-16T10:51:11+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BA840EEF0679DF11B99B98D4021FC27B + 2010-06-16T11:10:23+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BB840EEF0679DF11B99B98D4021FC27B + 2010-06-16T11:22:01+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:BC840EEF0679DF11B99B98D4021FC27B + 2010-06-16T11:48:04+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:51845BDEF37DDF1185A4850F98A9697A + 2010-06-22T17:21:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7FEA56EB3683DF11B297C3B864B551C4 + 2010-06-29T09:59:34+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:80EA56EB3683DF11B297C3B864B551C4 + 2010-06-29T10:00+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:82EA56EB3683DF11B297C3B864B551C4 + 2010-06-29T10:02:12+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AE7F8E37418BDF118494F6AC5ABAA51D + 2010-07-09T15:53:47+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AF7F8E37418BDF118494F6AC5ABAA51D + 2010-07-09T15:56:57+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3D401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:32:45+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3E401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:34:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:3F401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:35:21+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:40401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:35:56+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:41401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:36:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:42401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:36:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:43401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:36:37+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:44401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:37:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:45401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:37:14+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:46401513498BDF118494F6AC5ABAA51D + 2010-07-09T16:38:06+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:2AD5FF584A8BDF118494F6AC5ABAA51D + 2010-07-09T16:38:48+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:2BD5FF584A8BDF118494F6AC5ABAA51D + 2010-07-09T16:39:07+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:2CD5FF584A8BDF118494F6AC5ABAA51D + 2010-07-09T16:40:26+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6912110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T16:54:06+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6A12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T16:55:08+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6B12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T16:55:33+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6C12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T16:58:29+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6D12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:00+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6E12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:00:35+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:6F12110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:02:16+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7012110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:03:16+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7112110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:7212110C4C8BDF118494F6AC5ABAA51D + 2010-07-09T17:05:16+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:68A5EE204E8BDF118494F6AC5ABAA51D + 2010-07-09T17:05:52+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:69A5EE204E8BDF118494F6AC5ABAA51D + 2010-07-09T17:06:43+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:2D6199F22E17E011BBA5E791279BCC73 + 2011-01-03T17:15:22+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:A689D9351F1BE0118CDCA50353799B14 + 2011-01-08T17:44:24+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:AD63338CF321E011BE63A805374B0750 + 2011-01-17T10:26:30+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:F12E0F3EF621E011BE63A805374B0750 + 2011-01-17T10:28:42+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:24CBEF6BFB21E011BE63A805374B0750 + 2011-01-17T11:09:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:25CBEF6BFB21E011BE63A805374B0750 + 2011-01-17T11:09:24+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:133B06BAE290E0118D30E9A1D4C968BC + 2011-06-07T16:25:05+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1A0ED6C420FAE01188AB8D0996061141 + 2011-10-19T12:37:03+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1B0ED6C420FAE01188AB8D0996061141 + 2011-10-19T12:37:12+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:1C0ED6C420FAE01188AB8D0996061141 + 2011-10-19T12:37:27+05:30 + Adobe InDesign 6.0 + / + + + saved + xmp.iid:D63F2E64CE74E2119396D3BF10406004 + 2013-02-12T10:10:56+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D73F2E64CE74E2119396D3BF10406004 + 2013-02-12T10:10:56+05:30 + Adobe InDesign 7.5 + /metadata + + + saved + xmp.iid:DC3F2E64CE74E2119396D3BF10406004 + 2013-02-12T10:18:04+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:47A854E5CF74E2119396D3BF10406004 + 2013-02-12T10:21:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:6C5EF5C72877E211AAE8AF4395D140DC + 2013-02-15T11:48:30+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:0EF433043C77E211AAE8AF4395D140DC + 2013-02-15T12:20:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CE959F9A3D77E211AAE8AF4395D140DC + 2013-02-15T12:32:04+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D3959F9A3D77E211AAE8AF4395D140DC + 2013-02-15T12:33:54+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E90B5F173E77E211AAE8AF4395D140DC + 2013-02-15T12:35:33+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E8D1ACCC3F77E211AAE8AF4395D140DC + 2013-02-15T12:50:44+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:2400F7934077E211AAE8AF4395D140DC + 2013-02-15T12:53:21+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:505767BF4077E211AAE8AF4395D140DC + 2013-02-15T13:02:49+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:535767BF4077E211AAE8AF4395D140DC + 2013-02-15T13:02:54+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:89EFAC0F4277E211AAE8AF4395D140DC + 2013-02-15T13:04:56+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:B903FDF94E77E2119900CBFF622638CA + 2013-02-15T14:46:29+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:DAEA5DC85077E2119900CBFF622638CA + 2013-02-15T14:53:58+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:F105B549F277E211BC37B8629040E288 + 2013-02-16T10:24:39+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3DDED38E2B78E211A49798DF8A64C9B6 + 2013-02-16T16:57:19+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3EDED38E2B78E211A49798DF8A64C9B6 + 2013-02-16T17:01:50+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:500FF2692F78E211A49798DF8A64C9B6 + 2013-02-16T17:23:59+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:7895A4D03778E211A49798DF8A64C9B6 + 2013-02-16T18:26:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:676884B8D779E211A3BCFC2F27B41F33 + 2013-02-18T20:00:59+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C8708D95AA51E311B158D2E323395EF2 + 2013-11-20T11:43+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C9708D95AA51E311B158D2E323395EF2 + 2013-11-20T11:43:04+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CE51EEA6AE73E311BE7BB1EEC39780D9 + 2014-01-02T18:46:34+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CF51EEA6AE73E311BE7BB1EEC39780D9 + 2014-01-02T18:47:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9453F3218A55E611931199556B16D26C + 2016-07-29T18:15:24+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:0B5DFCCF015EE6118148B329D94E75A2 + 2016-08-09T12:53:02+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CDB37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:09+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CEB37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:12+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D0B37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:24+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D1B37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:44+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D4B37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:55+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D7B37F1F025EE6118148B329D94E75A2 + 2016-08-09T12:53:58+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:69E63545025EE6118148B329D94E75A2 + 2016-08-09T12:54:12+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:6AE63545025EE6118148B329D94E75A2 + 2016-08-09T12:54:22+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:4B17B070295EE6118148B329D94E75A2 + 2016-08-09T17:36:40+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3BF49E1E2A5EE6118148B329D94E75A2 + 2016-08-09T17:39:27+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:42F49E1E2A5EE6118148B329D94E75A2 + 2016-08-09T17:39:56+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:88204F332A5EE6118148B329D94E75A2 + 2016-08-09T17:40:02+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:89204F332A5EE6118148B329D94E75A2 + 2016-08-09T17:41:33+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:8A204F332A5EE6118148B329D94E75A2 + 2016-08-09T17:42:30+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:81081F1AB25EE6119C8FD535260EF189 + 2016-08-10T12:24:41+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C0FC712603C9E6118F43EC9EB06D0352 + 2016-12-23T17:00:04+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9B67352E03C9E6118F43EC9EB06D0352 + 2016-12-23T17:00:17+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:A267352E03C9E6118F43EC9EB06D0352 + 2016-12-23T17:00:29+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:ACFB613F03C9E6118F43EC9EB06D0352 + 2016-12-23T17:00:46+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:B3FB613F03C9E6118F43EC9EB06D0352 + 2016-12-23T17:01:44+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1F00646D03C9E6118F43EC9EB06D0352 + 2016-12-23T17:02:03+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:2000646D03C9E6118F43EC9EB06D0352 + 2016-12-23T17:02:19+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:14C36D8463FEE611B77699ED96928F18 + 2017-03-01T15:15:13+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:A07634CA63FEE611B77699ED96928F18 + 2017-03-01T15:15:22+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:078E72EDF124E7118EF7D050182FAE34 + 2017-04-19T16:48:34+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:088E72EDF124E7118EF7D050182FAE34 + 2017-04-19T16:48:56+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:4C522A0BF224E7118EF7D050182FAE34 + 2017-04-19T16:49:24+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:53522A0BF224E7118EF7D050182FAE34 + 2017-04-19T16:49:31+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:54522A0BF224E7118EF7D050182FAE34 + 2017-04-19T16:49:38+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:81BE278AD52CE711B67BEE8ED78F62ED + 2017-04-29T17:46:07+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3D60FAA12B36E711AB52BF1EF27B8D90 + 2017-05-11T14:54:28+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:982309E60F63E711A351DFD68971CD21 + 2017-07-07T18:01:21+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:616B44311063E711A351DFD68971CD21 + 2017-07-07T18:01:25+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:B8AB66BF0713E811B753B68D67E33140 + 2018-02-16T16:24:40+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:050700E08E2DE811BE74B718962F45D7 + 2018-03-22T10:37:10+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:DC244C30A52EE811B20DEAE06373C16D + 2018-03-23T19:49:49+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:6D830E40A52EE811B20DEAE06373C16D + 2018-03-23T19:49:51+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:EE4B1EE6E33BE811A67DF5D84CFA9E35 + 2018-04-09T16:28:58+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:49FF6F0DE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:29:19+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CD098117E53BE811A67DF5D84CFA9E35 + 2018-04-09T16:29:36+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D4098117E53BE811A67DF5D84CFA9E35 + 2018-04-09T16:29:50+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:8DBC672EE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:30:14+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:BB84F84FE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:31:11+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C284F84FE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:34:09+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C384F84FE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:34:40+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C484F84FE53BE811A67DF5D84CFA9E35 + 2018-04-09T16:35:02+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3C38C40AE63BE811A67DF5D84CFA9E35 + 2018-04-09T16:36:24+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:8A20180C3441E811B727925162634D52 + 2018-04-16T10:37:23+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9120180C3441E811B727925162634D52 + 2018-04-16T10:37:45+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9220180C3441E811B727925162634D52 + 2018-04-16T10:37:56+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:A6917B213441E811B727925162634D52 + 2018-04-16T10:37:59+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:AD917B213441E811B727925162634D52 + 2018-04-16T10:38:57+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D246E02A4D62E81193A680DA6541124E + 2018-05-28T13:33:25+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D346E02A4D62E81193A680DA6541124E + 2018-05-28T13:33:25+05:30 + Adobe InDesign 7.5 + /metadata + + + saved + xmp.iid:2368A69E4D62E81193A680DA6541124E + 2018-05-28T13:33:34+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9275AE9E5362E81193A680DA6541124E + 2018-05-28T14:16:31+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D4E92ADD6362E81193A680DA6541124E + 2018-05-28T16:12:48+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D241E3527A70E811B56CC921FEF406AF + 2018-06-15T14:34:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3786884E7B70E811B56CC921FEF406AF + 2018-06-15T14:35:53+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:156CFC547B70E811B56CC921FEF406AF + 2018-06-15T14:36:04+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CAB107657B70E811B56CC921FEF406AF + 2018-06-15T14:36:31+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:8DD4707B7B70E811B56CC921FEF406AF + 2018-06-15T14:37:08+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:96D4707B7B70E811B56CC921FEF406AF + 2018-06-15T14:38:37+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3B19EDB37B70E811B56CC921FEF406AF + 2018-06-15T14:38:43+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:255BEAE57B70E811B56CC921FEF406AF + 2018-06-15T14:40:07+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:9F2E882E7C70E811B56CC921FEF406AF + 2018-06-15T14:42:09+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:B7CDF52F7C70E811B56CC921FEF406AF + 2018-06-15T14:42:11+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E7449A437C70E811B56CC921FEF406AF + 2018-06-15T14:42:44+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:F0449A437C70E811B56CC921FEF406AF + 2018-06-15T14:42:54+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E1415C507C70E811B56CC921FEF406AF + 2018-06-15T14:43:06+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:DB652C577C70E811B56CC921FEF406AF + 2018-06-15T14:43:17+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:57995D7D7C70E811B56CC921FEF406AF + 2018-06-15T14:44:21+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:0D93918D7C70E811B56CC921FEF406AF + 2018-06-15T14:44:48+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1693918D7C70E811B56CC921FEF406AF + 2018-06-15T14:44:55+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1AA0E2BE7C70E811B56CC921FEF406AF + 2018-06-15T14:46:11+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1DD4D9CF7C70E811B56CC921FEF406AF + 2018-06-15T14:46:40+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:EE0639E67C70E811B56CC921FEF406AF + 2018-06-15T14:47:17+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1B726DE87C70E811B56CC921FEF406AF + 2018-06-15T14:47:21+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:332A45117D70E811B56CC921FEF406AF + 2018-06-15T14:48:29+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3C2A45117D70E811B56CC921FEF406AF + 2018-06-15T14:49:17+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:0D37EA3F7D70E811B56CC921FEF406AF + 2018-06-15T14:49:48+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CCCE4F607D70E811B56CC921FEF406AF + 2018-06-15T14:50:42+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C5F7D9967D70E811B56CC921FEF406AF + 2018-06-15T14:52:13+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:DFC93BA57D70E811B56CC921FEF406AF + 2018-06-15T14:52:38+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E8C93BA57D70E811B56CC921FEF406AF + 2018-06-15T14:53:14+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:C60E95DA7D70E811B56CC921FEF406AF + 2018-06-15T14:54:07+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:3357B4097E70E811B56CC921FEF406AF + 2018-06-15T14:55:26+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:B8158A107E70E811B56CC921FEF406AF + 2018-06-15T14:55:38+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:A3A28E1F7E70E811B56CC921FEF406AF + 2018-06-15T14:56:03+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:140D96417E70E811B56CC921FEF406AF + 2018-06-15T14:57+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:1D0D96417E70E811B56CC921FEF406AF + 2018-06-15T14:57:23+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:CBDFAA607E70E811B56CC921FEF406AF + 2018-06-15T14:57:52+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:7DC491D03271E811AE5EE7EF95534DD1 + 2018-06-16T12:29:29+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:86C491D03271E811AE5EE7EF95534DD1 + 2018-06-16T12:30:18+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:37DF9CF13271E811AE5EE7EF95534DD1 + 2018-06-16T12:30:25+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:DA613A9AD390E8119736CE53F8806BE6 + 2018-07-26T18:28:33+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:D3E00967D890E8119736CE53F8806BE6 + 2018-07-26T19:02:55+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:E546D31CD990E8119736CE53F8806BE6 + 2018-07-26T19:08+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:09FAFECCD990E8119736CE53F8806BE6 + 2018-07-26T19:12:55+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:553878CC5A91E811AE4D84A0E5C74650 + 2018-07-27T10:36:19+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:FBEE0DC45F91E811AE4D84A0E5C74650 + 2018-07-27T11:11:53+05:30 + Adobe InDesign 7.5 + /;/metadata + + + saved + xmp.iid:FCEE0DC45F91E811AE4D84A0E5C74650 + 2018-07-27T11:11:53+05:30 + Adobe InDesign 7.5 + /metadata + + + + + + 2018-08-09T09:00:51+05:30 + 2018-08-09T09:00:53+05:30 + 2018-08-09T09:00:53+05:30 + Adobe InDesign CS5.5 (7.5) + + + + 1 + JPEG + 256 + 256 + /9j/4AAQSkZJRgABAgEASABIAAD/7QAsUGhvdG9zaG9wIDMuMAA4QklNA+0AAAAAABAASAAAAAEA AQBIAAAAAQAB/+4AE0Fkb2JlAGSAAAAAAQUAAgAg/9sAhAAKBwcHBwcKBwcKDgkJCQ4RDAsLDBEU EBAQEBAUEQ8RERERDxERFxoaGhcRHyEhISEfKy0tLSsyMjIyMjIyMjIyAQsJCQ4MDh8XFx8rIh0i KzIrKysrMjIyMjIyMjIyMjIyMjIyMjI+Pj4+PjJAQEBAQEBAQEBAQEBAQEBAQEBAQED/wAARCAEA AL8DAREAAhEBAxEB/8QBogAAAAcBAQEBAQAAAAAAAAAABAUDAgYBAAcICQoLAQACAgMBAQEBAQAA AAAAAAABAAIDBAUGBwgJCgsQAAIBAwMCBAIGBwMEAgYCcwECAxEEAAUhEjFBUQYTYSJxgRQykaEH FbFCI8FS0eEzFmLwJHKC8SVDNFOSorJjc8I1RCeTo7M2F1RkdMPS4ggmgwkKGBmElEVGpLRW01Uo GvLj88TU5PRldYWVpbXF1eX1ZnaGlqa2xtbm9jdHV2d3h5ent8fX5/c4SFhoeIiYqLjI2Oj4KTlJ WWl5iZmpucnZ6fkqOkpaanqKmqq6ytrq+hEAAgIBAgMFBQQFBgQIAwNtAQACEQMEIRIxQQVRE2Ei BnGBkTKhsfAUwdHhI0IVUmJy8TMkNEOCFpJTJaJjssIHc9I14kSDF1STCAkKGBkmNkUaJ2R0VTfy o7PDKCnT4/OElKS0xNTk9GV1hZWltcXV5fVGVmZ2hpamtsbW5vZHV2d3h5ent8fX5/c4SFhoeIiY qLjI2Oj4OUlZaXmJmam5ydnp+So6SlpqeoqaqrrK2ur6/9oADAMBAAIRAxEAPwA80X8tPL2s6Fou pTRRwtNpto0ixQRAs5hQtIzFTVmJqTiqc6f+WHlfT5TKtrBc8hxKXVtBKv8AsQYxQ4qmP+CPLH/V o03/AKQbf/qniqqnkvyiFAbQ9NY9z9TgH/MvFW/8GeT/APqw6b/0hwf9U8Vd/gzyf/1YdN/6Q4P+ qeKu/wAGeT/+rDpv/SHB/wBU8Vd/gzyf/wBWHTf+kOD/AKp4q7/Bnk//AKsOm/8ASHB/1TxV3+DP J/8A1YdN/wCkOD/qnirv8GeT/wDqw6b/ANIcH/VPFXf4M8n/APVh03/pDg/6p4q7/Bnk/wD6sOm/ 9IcH/VPFXf4M8n/9WHTf+kOD/qnirv8ABnk//qw6b/0hwf8AVPFXf4M8n/8AVh03/pDg/wCqeKu/ wZ5P/wCrDpv/AEhwf9U8Vd/gzyf/ANWHTf8ApDg/6p4q7/Bnk/8A6sOm/wDSHB/1TxV3+DPJ/wD1 YdN/6Q4P+qeKu/wZ5P8A+rDpv/SHB/1TxV3+DPJ//Vh03/pDg/6p4q7/AAZ5P/6sOm/9IcH/AFTx V3+DPJ//AFYdN/6Q4P8AqnirvJn/ACh+g/8AbNs/+TEeKpxI4jjaQgkICxA3JoK7Yqxz/HFn/wBW 3Uf+RC/9VcVd/jiz/wCrbqP/ACIX/qriqe2N4l/aRXiRyRLKKhJRxcUJHxCp8MVRGKuxV2KuxV2K uxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KpL5M/5Q/Qf+2bZ/8AJiPFU5NADU0Hc9MVQvrWH/LS v/I4/wDNeKu9aw/5aV/5HH/mvFXetYf8tK/8jj/zXirvWsP+Wlf+Rx/5rxV3rWH/AC0r/wAjj/zX irvWsP8AlpX/AJHH/mvFXetYf8tK/wDI4/8ANeKu9aw/5aV/5HH/AJrxV3rWH/LSv/I4/wDNeKu9 aw/5aV/5HH/mvFXetYf8tK/8jj/zXirvWsP+Wlf+Rx/5rxV3rWH/AC0r/wAjj/zXiqsIYyKgsQeh 5t/zViqydVhgkmHImNGejSsoPEV3Ynb54qxf/F7/APLPa/8AcUX/AJoxV3+L3/5Z7X/uKL/zRirv 8Xv/AMs9r/3FF/5oxVMrK/1i7MMg0xRbTFazpf8AMBCd3ACDlQYqniqEFBX6ST+uuKrsVdiqS+TP +UP0H/tm2f8AyYjxVNbkI1tKHbihRgzUrQUNTTFXlv6K8m/9X2T/AKRZP+acVd+ivJv/AFfZP+kW T/mnFXforyb/ANX2T/pFk/5pxV36K8m/9X2T/pFk/wCacVd+ivJv/V9k/wCkWT/mnFXforyb/wBX 2T/pFk/5pxV36K8m/wDV9k/6RZP+acVd+ivJv/V9k/6RZP8AmnFXforyb/1fZP8ApFk/5pxV36K8 m/8AV9k/6RZP+acVd+ivJv8A1fZP+kWT/mnFXforyb/1fZP+kWT/AJpxV36K8m/9X2T/AKRZP+ac VZtb+cvLCpFbpfcioWNf3UwqRRf994qnV6eNncHpSJz1C/snu2w+nFXnf18/7+k/6TNP/wCaMVd9 fP8Av6T/AKTNP/5oxVM9B1SyjuymoPGYpF/vLm6s3VCN9liCnfFWUxazoZKQwX9oSSFREmj3J2AV Q2Ko/FXYq7FUl8mf8ofoP/bNs/8AkxHiqcSIsiNG+6uCpHsdsVY//gTyz/yyt/yNk/5rxV3+BPLP /LK3/I2T/mvFXf4E8s/8srf8jZP+a8Vd/gTyz/yyt/yNk/5rxV3+BPLP/LK3/I2T/mvFXf4E8s/8 srf8jZP+a8Vd/gTyz/yyt/yNk/5rxV3+BPLP/LK3/I2T/mvFXf4E8s/8srf8jZP+a8Vd/gTyz/yy t/yNk/5rxV3+BPLP/LK3/I2T/mvFXf4E8s/8srf8jZP+a8Vd/gTyz/yyt/yNk/5rxVtPI3ltHV1t mDKQQfVk6j/ZYqnd4pa0nVRUtG4AChiSVP7L/CfkcVYCmmXcqh47OV1PRl0/TiD26g4qu/RN+dxY zf8AcO0/FXfom/8A+WGb/uHafiqN0nRLqS+jaWFrVYiJecthZIDxYHiGiBYE+IxVmuKuxV2KpL5M /wCUP0H/ALZtn/yYjxVOemKrfWh/34v3jFXetD/vxfvGKu9aH/fi/eMVd60P+/F+8Yq71of9+L94 xV3rQ/78X7xirvWh/wB+L94xVTlvLSEKZZkTmwjXkwFWbZVHucVVPWh/34v3jFXetD/vxfvGKu9a H/fi/eMVd60P+/F+8Yq71of9+L94xV3rQ/78X7xiqnP9XuIJLeSQBJUZGKsAaMKGh+nFWNWvkfRb aBIBe3HwAAFJI4twzuGHpov8/wAsVTXRtGsNEa4e2uZJfrJUsJWj4rxFBwEaIBiqYTXlpbxNNPMk cabs7MAB88VV8VdirsVdiqS+TP8AlD9B/wC2bZ/8mI8VTiRFljaN/suCp+RFMVY5/gDy5/vqX/kY 2Ku/wB5c/wB9S/8AIxsVd/gDy5/vqX/kY2Ku/wAAeXP99S/8jGxV3+APLn++pf8AkY2Ku/wB5c/3 1L/yMbFXf4A8uf76l/5GNiqpB5H8v2/qcIXPqo0bcnLbN1pXoffFVP8AwB5c/wB9S/8AIxsVd/gD y5/vqX/kY2Ku/wAAeXP99S/8jGxV3+APLn++pf8AkY2Ku/wB5c/31L/yMbFXf4A8uf76l/5GNirv 8AeXP99S/wDIxsVd/gDy5/vqX/kY2Ku/wB5c/wB9S/8AIxsVVLbyP5ftZ0njhcsnQO5ZdxTdTtir IcVdirsVdiqS+TP+UP0H/tm2f/JiPFU56Yqt9aH/AH4v3jFW1kjY0Vgx8AQcVXYq7FXYq7FXYq7F XYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FUl8mf8ofoP/bNs/8AkxHiqbXA5QSrx51RhxrSu3Su KvOf0L/37P8A0/8A/N2KozTIb3R7g3Vh5dEcpUoWN6G+EkE7MT4Yqm3+IfM3/VjX/pKjxV3+IfM3 /VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+ IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKj xV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX/pKjxV3+IfM3/VjX /pKjxV3+IfM3/VjX/pKjxVOtJu729tfWv7YWcvIr6QcSbClDyXFUdirsVSXyZ/yh+g/9s2z/AOTE eKptcDlBKOIeqMOJNAduldqYq85/RH/fu23/AHEf+znFXfoj/v3bb/uI/wDZzirv0R/37tt/3Ef+ znFXfoj/AL922/7iP/Zzirv0R/37tt/3Ef8As5xV36I/7922/wC4j/2c4q79Ef8Afu23/cR/7OcV d+iP+/dtv+4j/wBnOKu/RH/fu23/AHEf+znFXfoj/v3bb/uI/wDZzirv0R/37tt/3Ef+znFXfoj/ AL922/7iP/Zzirv0R/37tt/3Ef8As5xV36I/7922/wC4j/2c4q79Ef8Afu23/cR/7OcVd+iP+/dt v+4j/wBnOKu/RH/fu23/AHEf+znFXfoj/v3bb/uI/wDZzirv0R/37tt/3Ef+znFWY+VoPq2l+n9U Sw/esfRjm9cbhfi5836+FcVTnFXYqkvkz/lD9B/7Ztn/AMmI8VTW4FbeUEBqo3wk0B26E7UxV5t9 Qg/6sNn/ANxMf9lWKu+oQf8AVhs/+4mP+yrFXfUIP+rDZ/8AcTH/AGVYq76hB/1YbP8A7iY/7KsV d9Qg/wCrDZ/9xMf9lWKu+oQf9WGz/wC4mP8AsqxV31CD/qw2f/cTH/ZVirvqEH/Vhs/+4mP+yrFX fUIP+rDZ/wDcTH/ZVirvqEH/AFYbP/uJj/sqxV31CD/qw2f/AHEx/wBlWKu+oQf9WGz/AO4mP+yr FXfUIP8Aqw2f/cTH/ZVirvqEH/Vhs/8AuJj/ALKsVd9Qg/6sNn/3Ex/2VYq76hB/1YbP/uJj/sqx V31CD/qw2f8A3Ex/2VYq76hB/wBWGz/7iY/7KsVd9Qg/6sNn/wBxMf8AZVirM/KkSw6VwS1jsh6r H0opvrC9F3585Puriqd4q7FUl8mf8ofoP/bNs/8AkxHiqbXArBKKBqo2zGgO3c7Yq87+o/8Aaq0n /pL/AOv+Ku+o/wDaq0n/AKS/+v8AirvqP/aq0n/pL/6/4q76j/2qtJ/6S/8Ar/irvqP/AGqtJ/6S /wDr/irvqP8A2qtJ/wCkv/r/AIq76j/2qtJ/6S/+v+Ku+o/9qrSf+kv/AK/4q76j/wBqrSf+kv8A 6/4q76j/ANqrSf8ApL/6/wCKu+o/9qrSf+kv/r/irvqP/aq0n/pL/wCv+Ku+o/8Aaq0n/pL/AOv+ Ku+o/wDaq0n/AKS/+v8AirvqP/aq0n/pL/6/4q76j/2qtJ/6S/8Ar/irvqP/AGqtJ/6S/wDr/irv qP8A2qtJ/wCkv/r/AIq76j/2qtJ/6S/+v+Ksv8rxelphT0ILb94x9O1k9WPou/Lk+/04qnOKuxVJ fJn/ACh+g/8AbNs/+TEeKprcCtvKPhNUb7ZovT9o7bYq82+qp/vny/8A9JL/APZTirvqqf758v8A /SS//ZTirvqqf758v/8ASS//AGU4q76qn++fL/8A0kv/ANlOKu+qp/vny/8A9JL/APZTirvqqf75 8v8A/SS//ZTirvqqf758v/8ASS//AGU4q76qn++fL/8A0kv/ANlOKu+qp/vny/8A9JL/APZTirvq qf758v8A/SS//ZTirvqqf758v/8ASS//AGU4q76qn++fL/8A0kv/ANlOKu+qp/vny/8A9JL/APZT irvqqf758v8A/SS//ZTirvqqf758v/8ASS//AGU4q76qn++fL/8A0kv/ANlOKu+qp/vny/8A9JL/ APZTirvqqf758v8A/SS//ZTirvqqf758v/8ASS//AGU4qzPyogj0riFtE/esaWLmSLov7ReTfx3x VO8VdiqS+TP+UP0H/tm2f/JiPFU1uP8AeeX7J+Btn+z0/a9sVeb1X/ln8v8A/Ixv+q2Kuqv/ACz+ X/8AkY3/AFWxV1V/5Z/L/wDyMb/qtirqr/yz+X/+Rjf9VsVdVf8Aln8v/wDIxv8Aqtirqr/yz+X/ APkY3/VbFXVX/ln8v/8AIxv+q2Kuqv8Ayz+X/wDkY3/VbFXVX/ln8v8A/Ixv+q2Kuqv/ACz+X/8A kY3/AFWxV1V/5Z/L/wDyMb/qtirqr/yz+X/+Rjf9VsVdVf8Aln8v/wDIxv8Aqtirqr/yz+X/APkY 3/VbFXVX/ln8v/8AIxv+q2Kuqv8Ayz+X/wDkY3/VbFXVX/ln8v8A/Ixv+q2Kuqv/ACz+X/8AkY3/ AFWxV1V/5Z/L/wDyMb/qtirM/KhB0vZLRP3rbWBLRdF7lm38cVTrFXYqkvkz/lD9B/7Ztn/yYjxV Nrj+4l+z9hvt/Z6fte2KvOf/AAnMVd/4TmKu/wDCcxV3/hOYq7/wnMVd/wCE5irv/CcxV3/hOYq7 /wAJzFXf+E5irv8AwnMVd/4TmKu/8JzFXf8AhOYq7/wnMVd/4TmKu/8ACcxV3/hOYq7/AMJzFWY+ Vv8Ajmf8ef8AeN/xz/7nov8Aw3jiqc4q7FUl8mf8ofoP/bNs/wDkxHiqbyqXidFALMpADfZqR39s VYd/hjXf+WLQv+RUn/NGKu/wxrv/ACxaF/yKk/5oxV3+GNd/5YtC/wCRUn/NGKu/wxrv/LFoX/Iq T/mjFXf4Y13/AJYtC/5FSf8ANGKu/wAMa7/yxaF/yKk/5oxV3+GNd/5YtC/5FSf80Yq7/DGu/wDL FoX/ACKk/wCaMVd/hjXf+WLQv+RUn/NGKu/wxrv/ACxaF/yKk/5oxV3+GNd/5YtC/wCRUn/NGKu/ wxrv/LFoX/IqT/mjFXf4Y13/AJYtC/5FSf8ANGKu/wAMa7/yxaF/yKk/5oxVF6b5ZuRc11ax0lrb idraJufLt9tAKYqm3+HNA/6t1t/yKX+mKu/w5oH/AFbrb/kUv9MVd/hzQP8Aq3W3/Ipf6Yq7/Dmg f9W62/5FL/TFUZa2drYxehZwpBHUtwjUKKnvQYqr4q7FUl8mf8ofoP8A2zbP/kxHiqcSMyRsyLzZ QSF6VIHTFWOf4i8x/wDUuS/9JC/9UsVVLbXtfmuYoZtAkgikdVeUzghFJAL09MVoN8VZDirsVdir sVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVSXyZ/yh+g/9s2z/wCTEeKptcLyglXiXqjD iDQtUdK4q85/QX/fr3P/AEmf9esVd+gv+/Xuf+kz/r1irv0F/wB+vc/9Jn/XrFXfoL/v17n/AKTP +vWKo/SPKlhfzPHf6NPpyKvJZGuTJyNacaBFxVNv8AeXP99S/wDIxsVd/gDy5/vqX/kY2Ku/wB5c /wB9S/8AIxsVd/gDy5/vqX/kY2Ku/wAAeXP99S/8jGxV3+APLn++pf8AkY2Ku/wB5c/31L/yMbFX f4A8uf76l/5GNirv8AeXP99S/wDIxsVd/gDy5/vqX/kY2Ku/wB5c/wB9S/8AIxsVd/gDy5/vqX/k Y2Ku/wAAeXP99S/8jGxV3+APLn++pf8AkY2KpxpelWej2v1OxVli5F6MSxqaV3PyxVG4q7FUl8mf 8ofoP/bNs/8AkxHiqcSOsaNI+yoCxPsN8VY//jvyz/y1N/yKk/5oxV3+O/LP/LU3/IqT/mjFXf47 8s/8tTf8ipP+aMVd/jvyz/y1N/yKk/5oxV3+O/LP/LU3/IqT/mjFXf478s/8tTf8ipP+aMVd/jvy z/y1N/yKk/5oxV3+O/LP/LU3/IqT/mjFXf478s/8tTf8ipP+aMVd/jvyz/y1N/yKk/5oxV3+O/LP /LU3/IqT/mjFXf478s/8tTf8ipP+aMVd/jvyz/y1N/yKk/5oxVkWKuxV2KuxV2KuxV2KuxV2KpL5 M/5Q/Qf+2bZ/8mI8VTnriq30Yf8Afa/cMVd6MP8AvtfuGKu9GH/fa/cMVd6MP++1+4Yq70Yf99r9 wxV3ow/77X7hirvRh/32v3DFXejD/vtfuGKu9GH/AH2v3DFXejD/AL7X7hirvRh/32v3DFXejD/v tfuGKu9GH/fa/cMVX4q7FXYq7FXYq7FXYq7FXYqkvkz/AJQ/Qf8Atm2f/JiPFU2n3gk+19hvsfa6 fs++KvPPRm/335l+44q70Zv99+ZfuOKu9Gb/AH35l+44q70Zv99+ZfuOKu9Gb/ffmX7jirvRm/33 5l+44q70Zv8AffmX7jirvRm/335l+44q70Zv99+ZfuOKsz8uqy6TEGF0DV9r/af7R+1/D2xVhnoz f778y/ccVd6M3++/Mv3HFXejN/vvzL9xxV3ozf778y/ccVd6M3++/Mv3HFXejN/vvzL9xxV3ozf7 78y/ccVd6M3++/Mv3HFXejN/vvzL9xxVl/lhWXTKMLxT6jbajtN0H/C+GKpxirsVSXyZ/wAofoP/ AGzbP/kxHiqbXArBKKMao2yfaO37Pvirzn6r/wAuXmP7/wDrzirvqv8Ay5eY/v8A+vOKu+q/8uXm P7/+vOKu+q/8uXmP7/8Arzirvqv/AC5eY/v/AOvOKu+q/wDLl5j+/wD684q76r/y5eY/v/684q76 r/y5eY/v/wCvOKu+q/8ALl5j+/8A684qzXy2nDSIV9O5ioX+G+3nHxH7Xwr9G3TFWFfVf+XLzH9/ /XnFXfVf+XLzH9//AF5xV31X/ly8x/f/ANecVd9V/wCXLzH9/wD15xV31X/ly8x/f/15xV31X/ly 8x/f/wBecVd9V/5cvMf3/wDXnFXfVf8Aly8x/f8A9ecVd9V/5cvMf3/9ecVZj5WT09L4+ndxfvWP HUDWbou/2V+HwxVOcVdiqS+TP+UP0H/tm2f/ACYjxVNpxWCQUY1RhRPtdP2ffFXn36Mk/wCWTX/+ Ryf9U8Vd+jJP+WTX/wDkcn/VPFXfoyT/AJZNf/5HJ/1TxV36Mk/5ZNf/AORyf9U8Vd+jJP8Alk1/ /kcn/VPFXfoyT/lk1/8A5HJ/1TxV36Mk/wCWTX/+Ryf9U8Vd+jJP+WTX/wDkcn/VPFXfoyT/AJZN f/5HJ/1TxVmXl2Iw6VFGUuIyC/w3jBpt2P2iAPoxVhv6Mk/5ZNf/AORyf9U8Vd+jJP8Alk1//kcn /VPFXfoyT/lk1/8A5HJ/1TxV36Mk/wCWTX/+Ryf9U8Vd+jJP+WTX/wDkcn/VPFXfoyT/AJZNf/5H J/1TxV36Mk/5ZNf/AORyf9U8Vd+jJP8Alk1//kcn/VPFXfoyT/lk1/8A5HJ/1TxVlvlmEwabwMd1 EfUY8b5g0vRe6hdvDFU4xV2KpL5M/wCUP0H/ALZtn/yYjxVNrgVglFC1UYcV2J26DFXnf6M/7Uur f8j/APr3irv0Z/2pdW/5H/8AXvFXfoz/ALUurf8AI/8A694q79Gf9qXVv+R//XvFXfoz/tS6t/yP /wCveKu/Rn/al1b/AJH/APXvFXfoz/tS6t/yP/694q79Gf8Aal1b/kf/ANe8Vd+jP+1Lq3/I/wD6 94qzTy5F6Gkwx+hNa0L/ALq5bnIKserUHXFUv/wLpH+/rv8A5Hf824q7/Aukf7+u/wDkd/zbirv8 C6R/v67/AOR3/NuKu/wLpH+/rv8A5Hf824q7/Aukf7+u/wDkd/zbirv8C6R/v67/AOR3/NuKu/wL pH+/rv8A5Hf824q7/Aukf7+u/wDkd/zbiqP0ny7Y6NM89rJM7SLwIlk5ila7bDwxVNcVdirsVSXy Z/yh+g/9s2z/AOTEeKptOOUEgCl6ow4g0J26A4q8+/Rlz/1Lt7/0nn/qnirv0Zc/9S7e/wDSef8A qnirv0Zc/wDUu3v/AEnn/qnirv0Zc/8AUu3v/Sef+qeKu/Rlz/1Lt7/0nn/qnirv0Zc/9S7e/wDS ef8Aqnirv0Zc/wDUu3v/AEnn/qnirv0Zc/8AUu3v/Sef+qeKu/Rlz/1Lt7/0nn/qnirNtFtVtNMt 4VieCq82ikcyMjP8TKXNK0JxVH4q7FXYq7FXYq7FXYq7FXYq7FXYq7FUl8mf8ofoP/bNs/8AkxHi qcSMyRsyryZQSFrSpA6VxVhGri91ySOTUPLzs0IKpxvo12Jr2TFUu/QSf9S5L/3EE/5oxV36CT/q XJf+4gn/ADRirv0En/UuS/8AcQT/AJoxV36CT/qXJf8AuIJ/zRirv0En/UuS/wDcQT/mjFXfoJP+ pcl/7iCf80Yq79BJ/wBS5L/3EE/5oxV36CT/AKlyX/uIJ/zRirNfLduLXSIYRamy4l/3DSCYrVif 7wAVr1xVi/6du4/gN9EpX4SsthIZBTs5jHEt48dsVd+n7r/lvt/+4fN/TFXfp+6/5b7f/uHzf0xV 36fuv+W+3/7h839MVd+n7r/lvt/+4fN/TFXfp+6/5b7f/uHzf0xV36fuv+W+3/7h839MVd+n7r/l vt/+4fN/TFU30KXUNSl9c3NtPaxNxlQWrQuajbiZMVZIqKgoihR4AUxVdirsVSXyZ/yh+g/9s2z/ AOTEeKptcDlBKvHnVGHGtK7dK4q85/Qv/fs/9P8A/wA3Yq79C/8Afs/9P/8Azdirv0L/AN+z/wBP /wDzdirv0L/37P8A0/8A/N2Ku/Qv/fs/9P8A/wA3Yq79C/8Afs/9P/8Azdirv0L/AN+z/wBP/wDz dirv0L/37P8A0/8A/N2Ku/Qv/fs/9P8A/wA3YqzXy3B9W0iGH6r9R4l/9H9T1eNWJ+3vWvXFWN+r 5a/6mnUv+kl/+qOKu9Xy1/1NOpf9JL/9UcVd6vlr/qadS/6SX/6o4qi9PsNK1SRotP8AMWqTui8m C3LCgrSu8QxVPdM0f9GPI/168vPUAHG7m9ULTuo4rTFUxxV2KuxV2KuxV2KuxVJfJn/KH6D/ANs2 z/5MR4qm04rBIOPOqMOJNK7dK9sVeefoqH/qWoP+4mP+q2Ku/RUP/UtQf9xMf9VsVd+iof8AqWoP +4mP+q2Ku/RUP/UtQf8AcTH/AFWxV36Kh/6lqD/uJj/qtirv0VD/ANS1B/3Ex/1WxV36Kh/6lqD/ ALiY/wCq2Ku/RUP/AFLUH/cTH/VbFXfoqH/qWoP+4mP+q2Ksz8uwrBpMUS2q2IBf9wkvrhasf92c mrXr1xVi9NX/AJdZ/wCREGKupq/8us/8iIMVdTV/5dZ/5EQYq2rayhqv6aU+0MA/jiq71dc/m1v/ AJEwf1xV3q65/Nrf/ImD+uKplp2natfwNNJqWo2ZVynp3CRKxAAPIUB23xVF/oHU/wDq+Xf/AAMf /NOKqtro1/b3Ec0ur3NwiGrROE4t7Gig4qm+KuxV2KpL5M/5Q/Qf+2bZ/wDJiPFU4dFkRo3FVcFW HiDscVSL/A/lf/lh/wCSs3/VXFXf4H8r/wDLD/yVm/6q4q7/AAP5X/5Yf+Ss3/VXFXf4H8r/APLD /wAlZv8Aqrirv8D+V/8Alh/5Kzf9VcVd/gfyv/yw/wDJWb/qrirv8D+V/wDlh/5Kzf8AVXFXf4H8 r/8ALD/yVm/6q4q7/A/lf/lh/wCSs3/VXFU2sLC00y1WzsY/SgQkqlWanI8ju5Y9TiqU/wCB/K// ACw/8lZv+quKu/wP5X/5Yf8AkrN/1VxV3+B/K/8Ayw/8lZv+quKpza2sFlbx2lsvpwwrxRak0A92 JOKq2KuxV2KuxV2KuxV2KuxVJfJn/KH6D/2zbP8A5MR4qnPXFXk8vmXzWqWqw6brsjyIvrk2lygj cla8ieVe9So+jbdVdH5l83POAdJ1sLLcFB+5n4ohoATyiX4N68q19h3VZDqd1q8eoXEcGoahFGsj BEj0/wBVFHgshk+Ie+Kob69rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/ 6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A 7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz 1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769rn/Vz1P8A7hg/6qYq769r n/Vz1P8A7hg/6qYqmGma1NarINSOp6gzEFG+ptDwA6iiOa1xVkmm3kd9bevFFNCvIrxuVZH2pvRy TTFUXirsVSXyZ/yh+g/9s2z/AOTEeKptOC0MiqORKsAoNCTTpXtirBf0Prf/AFZrn/uJj/mrFXfo fW/+rNc/9xMf81Yq79D63/1Zrn/uJj/mrFXfofW/+rNc/wDcTH/NWKu/Q+t/9Wa5/wC4mP8AmrFX fofW/wDqzXP/AHEx/wA1Yq79D63/ANWa5/7iY/5qxV36H1v/AKs1z/3Ex/zVirv0Prf/AFZrn/uJ j/mrFXfofW/+rNc/9xMf81Yqr2eh6lNdRx3mmXNtAxpJMNR58R48VapxVO/8I6Z/v67/AOkiT/mr FXf4R0z/AH9d/wDSRJ/zVirv8I6Z/v67/wCkiT/mrFXf4R0z/f13/wBJEn/NWKu/wjpn+/rv/pIk /wCasVd/hHTP9/Xf/SRJ/wA1Yq7/AAjpn+/rv/pIk/5qxV3+EdM/39d/9JEn/NWKp5irsVdirD/K fmzyrbeVdFt7jWtPhmh0+1jkjkuoVdHWGNWVlaQEEEbjFU2/xn5P/wCr9pv/AEmQf9VMVd/jPyf/ ANX7Tf8ApMg/6qYq7/Gfk/8A6v2m/wDSZB/1UxV3+M/J/wD1ftN/6TIP+qmKu/xn5P8A+r9pv/SZ B/1UxV3+M/J//V+03/pMg/6qYq7/ABn5P/6v2m/9JkH/AFUxV3+M/J//AFftN/6TIP8Aqpirv8Z+ T/8Aq/ab/wBJkH/VTFXf4z8n/wDV+03/AKTIP+qmKu/xn5P/AOr9pv8A0mQf9VMVd/jPyf8A9X7T f+kyD/qpirv8Z+T/APq/ab/0mQf9VMVd/jPyf/1ftN/6TIP+qmKu/wAZ+T/+r9pv/SZB/wBVMVd/ jPyf/wBX7Tf+kyD/AKqYq7/Gfk//AKv2m/8ASZB/1UxV3+M/J/8A1ftN/wCkyD/qpirv8Z+T/wDq /ab/ANJkH/VTFXf4z8n/APV+03/pMg/6qYq7/Gfk/wD6v2m/9JkH/VTFXf4z8n/9X7Tf+kyD/qpi rv8AGfk//q/ab/0mQf8AVTFX/9k= + + + 2 + JPEG + 256 + 256 + /9j/4AAQSkZJRgABAgEASABIAAD/7QAsUGhvdG9zaG9wIDMuMAA4QklNA+0AAAAAABAASAAAAAEA AQBIAAAAAQAB/+4AE0Fkb2JlAGSAAAAAAQUAAgAg/9sAhAAKBwcHBwcKBwcKDgkJCQ4RDAsLDBEU EBAQEBAUEQ8RERERDxERFxoaGhcRHyEhISEfKy0tLSsyMjIyMjIyMjIyAQsJCQ4MDh8XFx8rIh0i KzIrKysrMjIyMjIyMjIyMjIyMjIyMjI+Pj4+PjJAQEBAQEBAQEBAQEBAQEBAQEBAQED/wAARCAEA AL8DAREAAhEBAxEB/8QBogAAAAcBAQEBAQAAAAAAAAAABAUDAgYBAAcICQoLAQACAgMBAQEBAQAA AAAAAAABAAIDBAUGBwgJCgsQAAIBAwMCBAIGBwMEAgYCcwECAxEEAAUhEjFBUQYTYSJxgRQykaEH FbFCI8FS0eEzFmLwJHKC8SVDNFOSorJjc8I1RCeTo7M2F1RkdMPS4ggmgwkKGBmElEVGpLRW01Uo GvLj88TU5PRldYWVpbXF1eX1ZnaGlqa2xtbm9jdHV2d3h5ent8fX5/c4SFhoeIiYqLjI2Oj4KTlJ WWl5iZmpucnZ6fkqOkpaanqKmqq6ytrq+hEAAgIBAgMFBQQFBgQIAwNtAQACEQMEIRIxQQVRE2Ei BnGBkTKhsfAUwdHhI0IVUmJy8TMkNEOCFpJTJaJjssIHc9I14kSDF1STCAkKGBkmNkUaJ2R0VTfy o7PDKCnT4/OElKS0xNTk9GV1hZWltcXV5fVGVmZ2hpamtsbW5vZHV2d3h5ent8fX5/c4SFhoeIiY qLjI2Oj4OUlZaXmJmam5ydnp+So6SlpqeoqaqrrK2ur6/9oADAMBAAIRAxEAPwCbeU/KflW58q6L cXGi6fNNNp9rJJJJaws7u0MbMzM0ZJJJ3OKpt/gzyf8A9WHTf+kOD/qnirv8GeT/APqw6b/0hwf9 U8Vd/gzyf/1YdN/6Q4P+qeKu/wAGeT/+rDpv/SHB/wBU8Vd/gzyf/wBWHTf+kOD/AKp4q7/Bnk// AKsOm/8ASHB/1TxV3+DPJ/8A1YdN/wCkOD/qnirv8GeT/wDqw6b/ANIcH/VPFXf4M8n/APVh03/p Dg/6p4q7/Bnk/wD6sOm/9IcH/VPFXf4M8n/9WHTf+kOD/qnirv8ABnk//qw6b/0hwf8AVPFXf4M8 n/8AVh03/pDg/wCqeKu/wZ5P/wCrDpv/AEhwf9U8Vd/gzyf/ANWHTf8ApDg/6p4q7/Bnk/8A6sOm /wDSHB/1TxV3+DPJ/wD1YdN/6Q4P+qeKu/wZ5P8A+rDpv/SHB/1TxV3+DPJ//Vh03/pDg/6p4q7/ AAZ5P/6sOm/9IcH/AFTxV3+DPJ//AFYdN/6Q4P8Aqnirv8GeT/8Aqw6b/wBIcH/VPFXf4M8n/wDV h03/AKQ4P+qeKu8mf8ofoP8A2zbP/kxHiqdYql+r63YaJFHNfsypK3BeKltwK9sVSr/H/lz/AH7L /wAi2xV3+P8Ay5/v2X/kW2Ku/wAf+XP9+y/8i2xVr/H/AJc/37L/AMi2xVv/AB/5c/37L/yLbFXf 4/8ALn+/Zf8AkW2Ku/x/5c/37L/yLbFXf4/8uf79l/5Ftirv8f8Alz/fsv8AyLbFXf4/8uf79l/5 Ftirv8f+XP8Afsv/ACLbFXf4/wDLn+/Zf+RbYq7/AB/5c/37L/yLbFXf4/8ALn+/Zf8AkW2Ku/x/ 5c/37L/yLbFXf4/8uf79l/5Ftirv8f8Alz/fsv8AyLbFXf4/8uf79l/5FtiraeffLsjqiyS8mIUf uz1O2KskxV2KpL5M/wCUP0H/ALZtn/yYjxVOsVQmoX+n2CI+oTrbo5opZitT4bYqgP8AEflv/q4x /wDI1/64q7/Eflv/AKuMf/I1/wCuKoiy1PR9RlMFjdi4kVS5VJHJCggV6+JxVHeini//AAbf81Yq 70U8X/4Nv+asVd6KeL/8G3/NWKu9FPF/+Db/AJqxV3op4v8A8G3/ADVirvRTxf8A4Nv+asVQ97c6 fp0Qnvp/q8bMEDPIwBYgmn2vAYqgf8R+W/8Aq4x/8jX/AK4q7/Eflv8A6uMf/I1/64q7/Eflv/q4 x/8AI1/64q7/ABH5b/6uMf8AyNf+uKu/xH5b/wCrjH/yNf8Arirv8R+W/wDq4x/8jX/rirv8R+W/ +rjH/wAjX/rirv8AEflv/q4x/wDI1/64q2vmHy4zBV1FCSaAeq25+/FU5xV2KpL5M/5Q/Qf+2bZ/ 8mI8VTrFUn8xTWUMEJvdNk1RS5CxxxCUoafaIPTFUg+v6B/1K1z/ANIi4q76/oH/AFK1z/0iLiqv aa5plhIZrLy5eW8jLwLx2wUlSQaVB9sVRf8AjL/tT6l/yI/5uxV3+Mv+1PqX/Ij/AJuxV3+Mv+1P qX/Ij/m7FXf4y/7U+pf8iP8Am7FXf4y/7U+pf8iP+bsVVIPNvrzxwfonUI/VdU5vDRV5GlWNegxV X80lBp8fqSWcQ9Zfiv4xJHXi+yqQfi/txVinK2/5a9A/6RR/zRiruVt/y16B/wBIo/5oxV3K2/5a 9A/6RR/zRiruVt/y16B/0ij/AJoxV3K2/wCWvQP+kUf80Yq7lbf8tegf9Io/5oxV3K2/5a9A/wCk Uf8ANGKu5W3/AC16B/0ij/mjFV0TW3qpS70E/ENltgD17fB1xV6HirsVSXyZ/wAofoP/AGzbP/kx HiqdYqkPmtGe2g4rqDUkP/HMFX6ft+2KsW9Gb/ffmX7jirvRm/335l+44q70Zv8AffmX7jirvRm/ 335l+44q70Zv99+ZfuOKu9Gb/ffmX7jirvRm/wB9+ZfuOKu9Gb/ffmX7jiqvYxSi9tyU8xACVP74 H0/tD+8/yfHFWS+a5fR06NvrFva1mUc7uP1UPwyfCF4Pv74qxL69/wBrXSf+kT/rxirvr3/a10n/ AKRP+vGKu+vf9rXSf+kT/rxirvr3/a10n/pE/wCvGKu+vf8Aa10n/pE/68Yq769/2tdJ/wCkT/rx irvr3/a10n/pE/68YqnI8veZGAYTaaQdx/oqf9UsVXJ5e8xq6s02m0BBNLZAae37rFWW4q7FUl8m f8ofoP8A2zbP/kxHiqdYqx/zdH6lrbj0r+akh2040cbft/A+2KsV+q/8uXmP7/8Arzirvqv/AC5e Y/v/AOvOKu+q/wDLl5j+/wD684q76r/y5eY/v/684qm2k+WoNTgeaWbWLEo/AR3MoRmFAeQBiG2+ Ko//AAPZ/wDVy1H/AJHr/wBUsVd/gez/AOrlqP8AyPX/AKpYq7/A9n/1ctR/5Hr/ANUsVXweTbS3 njnXUNQcxOrhXmUqSprRh6fTFUy1i90mwtVm1koLdpAi+pGZRzIYj4VV+wOKsJm1bT2mkaDVbBIi xMaHTmJVSfhBPo+GKrP0raf9XfT/APuGt/1RxV36VtP+rvp//cNb/qjirv0raf8AV30//uGt/wBU cVd+lbT/AKu+n/8AcNb/AKo4q79K2n/V30//ALhrf9UcVd+lbT/q76f/ANw1v+qOKooeaLgCg8x2 9P8AmCl/6pYqui8z3DSov+IrduTAcRZSitT0r6eKs9xV2KpL5M/5Q/Qf+2bZ/wDJiPFU6xVL9W1G 606KOS1sZb9nYqyQmhUUrU7HFWGyW9xLI8raTrALsWIW4IAqa7D08VW/VJ/+rVrP/SSf+qeKu+qT /wDVq1n/AKST/wBU8Vd9Un/6tWs/9JJ/6p4q76pP/wBWrWf+kk/9U8Vd9Un/AOrVrP8A0kn/AKp4 q76pP/1atZ/6ST/1TxV31Sf/AKtWs/8ASSf+qeKq1lazre25OmasgEqEtJcEovxDdhw3HjirIvNl x9W06KT66mn1nVfVkh+sA/BIeHDhJTpWtO2KsR/S/wD38Vt/3Dv+zbFXfpf/AL+K2/7h3/Ztirv0 v/38Vt/3Dv8As2xV36X/AO/itv8AuHf9m2Ku/S//AH8Vt/3Dv+zbFXfpf/v4rb/uHf8AZtirv0v/ AN/Fbf8AcO/7NsVd+l/+/itv+4d/2bYqvh1asqD/ABDbNVgOP6PpXfpX6tir0fFXYqkvkz/lD9B/ 7Ztn/wAmI8VTrFUg82W/1i2gX6pc3nGQnjaPwZdurHi22KsW/Rn/AGpdW/5H/wDXvFXfoz/tS6t/ yP8A+veKu/Rn/al1b/kf/wBe8Vd+jP8AtS6t/wAj/wDr3irv0Z/2pdW/5H/9e8Vd+jP+1Lq3/I// AK94q79Gf9qXVv8Akf8A9e8Vd+jP+1Lq3/I//r3iqvY6dwvbdv0RqkfGVDzknqi0YfEw9PoO+Ksl 82XX1TTo5Pr36NrOq+t6Pr1+GQ8OND4Vr7YqxH9Nf9/N/wBOH/NuKu/TX/fzf9OH/NuKu/TX/fzf 9OH/ADbirv01/wB/N/04f824q79Nf9/N/wBOH/NuKu/TX/fzf9OH/NuKu/TX/fzf9OH/ADbirv01 /wB/N/04f824qvh1nlKg/wAScqsBx+o0rv0rxxV6PirsVSXyZ/yh+g/9s2z/AOTEeKp1iqQ+a7eS 4toFjsJdQKuSUhmMBXbqSFauKsY/Rlz/ANS7e/8ASef+qeKu/Rlz/wBS7e/9J5/6p4q79GXP/Uu3 v/Sef+qeKu/Rlz/1Lt7/ANJ5/wCqeKu/Rlz/ANS7e/8ASef+qeKu/Rlz/wBS7e/9J5/6p4q79GXP /Uu3v/Sef+qeKu/Rlz/1Lt7/ANJ5/wCqeKq1jp1wl7budAu4gsqEyNelglGHxFfT3A8MVZH5suxZ 6dHKb1tPrOq+qsInJqsh4cGI8K19sVYj+nU/6mOX/uHp/wA14q79Op/1Mcv/AHD0/wCa8Vd+nU/6 mOX/ALh6f814q79Op/1Mcv8A3D0/5rxV36dT/qY5f+4en/NeKu/Tqf8AUxy/9w9P+a8Vd+nU/wCp jl/7h6f814q79Op/1Mcv/cPT/mvFV8OuI0qL/iKRqsBx/R6Cu/SvPFXo+KuxVJfJn/KH6D/2zbP/ AJMR4qnWKsf83WYvbW3Q2DajxkJ4LOLfjt9rkwNcVYr+gk/6lyX/ALiCf80Yq79BJ/1Lkv8A3EE/ 5oxV36CT/qXJf+4gn/NGKu/QSf8AUuS/9xBP+aMVd+gk/wCpcl/7iCf80Yq79BJ/1Lkv/cQT/mjF XfoJP+pcl/7iCf8ANGKu/QSf9S5L/wBxBP8AmjFVew0VI762k/w/JDwlRvUN+jBKMDy48d6eGKsn 82Xf1PTopfr76ZWdV9aOL1S1UkPDjyXwrX2xViP6d/7+i5/6Q/8Ar7irv07/AN/Rc/8ASH/19xV3 6d/7+i5/6Q/+vuKu/Tv/AH9Fz/0h/wDX3FXfp3/v6Ln/AKQ/+vuKu/Tv/f0XP/SH/wBfcVd+nf8A v6Ln/pD/AOvuKu/Tv/f0XP8A0h/9fcVXw65ylRf8TXL1YDj9TpXfpX1cVej4q7FUl8mf8ofoP/bN s/8AkxHiqdYqx/zbafW7W3T9H/pLjITw9b0OO32q1FcVYr+hf+/Z/wCn/wD5uxV36F/79n/p/wD+ bsVd+hf+/Z/6f/8Am7FXfoX/AL9n/p//AObsVd+hf+/Z/wCn/wD5uxV36F/79n/p/wD+bsVd+hf+ /Z/6f/8Am7FXfoX/AL9n/p//AObsVRFjo/p31tJ/h30eMqN6v13lwowPLjy3p4YqyjzTdG00+OQX M9pWZV9S2jErn4XPEqSNtuuKsV/TT/8AV51P/pFT/mvFXfpp/wDq86n/ANIqf814q79NP/1edT/6 RU/5rxV36af/AKvOp/8ASKn/ADXirv00/wD1edT/AOkVP+a8Vd+mn/6vOp/9Iqf814q79NP/ANXn U/8ApFT/AJrxV36af/q86n/0ip/zXiq6LWXMqD9MakasBQ2qgHfv8WKvQsVdiqS+TP8AlD9B/wC2 bZ/8mI8VTrFUi81WEt/bQJFpq6oUckxtMYeG32qh0rirEjpBBofLlsCOo/SP/Zzirv0R/wB+7bf9 xH/s5xV36I/7922/7iP/AGc4q79Ef9+7bf8AcR/7OcVd+iP+/dtv+4j/ANnOKu/RH/fu23/cR/7O cVd+iP8Av3bb/uI/9nOKpvpPlvy9c2zPq2nQ2U4cqsa3jyApRaNVZz3JxVMYPLHlCGeOaCGMSxur xkXEhPJTVdjKe+KojzTI8Wnxslxc2pMyjnZp6kh+F/hIqvw4qxX63P8A9XXWf+kY/wDVTFXfW5/+ rrrP/SMf+qmKu+tz/wDV11n/AKRj/wBVMVd9bn/6uus/9Ix/6qYq763P/wBXXWf+kY/9VMVd9bn/ AOrrrP8A0jH/AKqYq763P/1ddZ/6Rj/1UxV31uf/AKuus/8ASMf+qmKrorucyoP0prB+IbNbEA79 /wB5ir0LFXYqkvkz/lD9B/7Ztn/yYjxVOsVSzWr+9sIo3so4JGdiGFxKsIAp2LEVxVhcmn280jyy aTp7PIxZj+kmFSTU9JsVW/oq0/6tGn/9xJv+q2Ku/RVp/wBWjT/+4k3/AFWxV36KtP8Aq0af/wBx Jv8Aqtirv0Vaf9WjT/8AuJN/1WxV36KtP+rRp/8A3Em/6rYq79FWn/Vo0/8A7iTf9VsVd+irT/q0 af8A9xJv+q2Kq9jptql9bOulWMZWVCHXUGdlIYbhfWNSPDFWTebJPT06JvVvYf36jlpwrL9iTZvi T4f40xViP1r/AJffMf3f9fsVd9a/5ffMf3f9fsVd9a/5ffMf3f8AX7FXfWv+X3zH93/X7FXfWv8A l98x/d/1+xV31r/l98x/d/1+xV31r/l98x/d/wBfsVd9a/5ffMf3f9fsVXw3X71P9M8wn4h9ofD1 7/vumKvR8VdiqS+TP+UP0H/tm2f/ACYjxVOsVSDzZD61tAPq1tdUkJ43cvpKNuqnmlTirFvqP/aq 0n/pL/6/4q76j/2qtJ/6S/8Ar/irvqP/AGqtJ/6S/wDr/irvqP8A2qtJ/wCkv/r/AIq76j/2qtJ/ 6S/+v+Ku+o/9qrSf+kv/AK/4q76j/wBqrSf+kv8A6/4q76j/ANqrSf8ApL/6/wCKq9jZ8b23b9Ga WlJUPOO65OvxDdR6xqfDFWaappzalbrAt1PZFXD+pav6bmgYcSfD4sVYTOZoJ5IPW8ySek7JzRiV biaVU+BxVT9ab/fnmX7zirvWm/355l+84q71pv8AfnmX7zirvWm/355l+84q71pv9+eZfvOKu9ab /fnmX7zirvWm/wB+eZfvOKr4ZpfVT4/Mn2h9snj1/a9sVeiYq7FUl8mf8ofoP/bNs/8AkxHiqdYq x/zbHFJa24ljs5QJDQX0xhUbfskMtTirFvq9n/yy6H/0mn/qrirvq9n/AMsuh/8ASaf+quKu+r2f /LLof/Saf+quKu+r2f8Ayy6H/wBJp/6q4q76vZ/8suh/9Jp/6q4q76vZ/wDLLof/AEmn/qrirvq9 n/yy6H/0mn/qrirvq9n/AMsuh/8ASaf+quKq9jBaC+tittoykSpQxXhZweQ+wvq7t4YqyfzUrNp8 YUXzH1l200Vl+y/2v8j+NMVYn6M3++/Mv3HFXejN/vvzL9xxV3ozf778y/ccVd6M3++/Mv3HFXej N/vvzL9xxV3ozf778y/ccVd6M3++/Mv3HFXejN/vvzL9xxVfDDL6qfB5k+0Ptg8ev7Xtir0TFXYq kvkz/lD9B/7Ztn/yYjxVOsVY/wCbl5Wtv8Fg/wC8O2ovwTp+x8ab4qxX0/8Aijy5/wAjv+v+Ku9P /ijy5/yO/wCv+Ku9P/ijy5/yO/6/4q70/wDijy5/yO/6/wCKu9P/AIo8uf8AI7/r/irvT/4o8uf8 jv8Ar/irvT/4o8uf8jv+v+Ku9P8A4o8uf8jv+v8AiqvYJ/p1t+50AfvU3hmrIPiH2B65+LwxVk/m yP1NOiX0r2b9+p46caS/Yk3b4X+H+NMVYj9V/wCXLzH9/wD15xV31X/ly8x/f/15xV31X/ly8x/f /wBecVd9V/5cvMf3/wDXnFXfVf8Aly8x/f8A9ecVd9V/5cvMf3/9ecVd9V/5cvMf3/8AXnFXfVf+ XLzH9/8A15xVfDa/vU/0PzCPiH2j8PXv+56Yq9HxV2KpL5M/5Q/Qf+2bZ/8AJiPFU6xVj/m3/eW3 /wB4P7w/8dL7HT9j3xViv/hOYq7/AMJzFXf+E5irv/CcxV3/AITmKphFoGtzRpNFYaI8cih0dY2I ZWFQQaYqv/w5r/8A1btF/wCRTf0xV3+HNf8A+rdov/Ipv6Yqq2nl/XIrqGWSw0dESRGZo42DgAgk pt18MVTjzTAbjT40WO8lpMp42DBZPsvuxYN8P9mKsU/Rkn/LJr//ACOT/qnirv0ZJ/yya/8A8jk/ 6p4q79GSf8smv/8AI5P+qeKu/Rkn/LJr/wDyOT/qnirv0ZJ/yya//wAjk/6p4q79GSf8smv/API5 P+qeKu/Rkn/LJr//ACOT/qnirv0ZJ/yya/8A8jk/6p4qui02QSofqmvCjD7UqU69/wB30xV6Hirs VSXyZ/yh+g/9s2z/AOTEeKp1iqHu7Cyv1VL2CO4VDVRIoYA+IriqE/w5oH/Vutv+RS/0xV3+HNA/ 6t1t/wAil/pirv8ADmgf9W62/wCRS/0xV3+HNA/6t1t/yKX+mKu/w5oH/Vutv+RS/wBMVTCKOOGN IYlCRxqERFFAqqKAAYqvxV2KuxVJPNcH1jTo0+q3F5SZT6do/Bx8MnxE8W23xViX6M/7Uurf8j/+ veKu/Rn/AGpdW/5H/wDXvFXfoz/tS6t/yP8A+veKu/Rn/al1b/kf/wBe8Vd+jP8AtS6t/wAj/wDr 3irv0Z/2pdW/5H/9e8Vd+jP+1Lq3/I//AK94q79Gf9qXVv8Akf8A9e8VXxabSVD+htVWjDcz7Dfv +7xV6LirsVSXyZ/yh+g/9s2z/wCTEeKp1irGvOsFhPZ2y38V3MgkJUWQUsDx/a5q22KpZpvkTQdS sor1Wv4BLy/dzNGrjizJuPSPhiqK/wCVbaF/v+7/AODj/wCqOKu/5VtoX+/7v/g4/wDqjirv+Vba F/v+7/4OP/qjirv+VbaF/v8Au/8Ag4/+qOKu/wCVbaF/v+7/AODj/wCqOKu/5VtoX+/7v/g4/wDq jiqpbfl7otrcxXUc90XgdZFDPHQlCGFaRDwxVlWKpL5pge40+NI7KXUCJlPpQymFgOL/AB8gG23p TFWKfoy5/wCpdvf+k8/9U8Vd+jLn/qXb3/pPP/VPFXfoy5/6l29/6Tz/ANU8Vd+jLn/qXb3/AKTz /wBU8Vd+jLn/AKl29/6Tz/1TxV36Muf+pdvf+k8/9U8Vd+jLn/qXb3/pPP8A1TxV36Muf+pdvf8A pPP/AFTxVdFplyJUJ8vXi0YfEb4kDfrT08Veh4q7FUl8mf8AKH6D/wBs2z/5MR4qnWKpR5g1C+0+ CKSwks42dyrG+f01IpX4TzTfFUh/xN5i/wCWnQ/+kj/r/irv8TeYv+WnQ/8ApI/6/wCKu/xN5i/5 adD/AOkj/r/irv8AE3mL/lp0P/pI/wCv+Ku/xN5i/wCWnQ/+kj/r/irv8TeYv+WnQ/8ApI/6/wCK u/xN5i/5adD/AOkj/r/irv8AE3mL/lp0P/pI/wCv+Kq1r5i16W6hiluNGKPIquIp6uQSAeA9Y/F4 YqyDWNKsNYtlttRBMSSCRaNw+IBlG/yY4qwefy/BHNJHF5fkljRmVJPr6LzUGgahXauKrP0En/Uu S/8AcQT/AJoxV36CT/qXJf8AuIJ/zRirv0En/UuS/wDcQT/mjFXfoJP+pcl/7iCf80Yq79BJ/wBS 5L/3EE/5oxV36CT/AKlyX/uIJ/zRirv0En/UuS/9xBP+aMVTTRDe6I8i2Pl9oxclBKzX0b7Lyofs f5RxVm2KuxVJfJn/ACh+g/8AbNs/+TEeKp1iqQ+ayBbQVXT2/eH/AI6deHT9ihG+KsW5r/vvy1/w 3/NWKu5r/vvy1/w3/NWKu5r/AL78tf8ADf8ANWKu5r/vvy1/w3/NWKu5r/vvy1/w3/NWKu5r/vvy 1/w3/NWKu5r/AL78tf8ADf8ANWKu5r/vvy1/w3/NWKq9i6/Xbf8Ad+XR+9T+55ep9of3fxfa8MVZ J5stfrenRx/Uf0lSdW9H1vQp8Mg58qjxpT3xViP6F/79n/p//wCbsVd+hf8Av2f+n/8A5uxV36F/ 79n/AKf/APm7FXfoX/v2f+n/AP5uxV36F/79n/p//wCbsVd+hf8Av2f+n/8A5uxV36F/79n/AKf/ APm7FXfoX/v2f+n/AP5uxVfDo3GVD/hvjRgeX16tN+tOWKvR8VdiqS+TP+UP0H/tm2f/ACYjxVOs VY/5t/3lt/8AeD+8P/HS+x0/Y98VYr/4TmKu/wDCcxV3/hOYq7/wnMVd/wCE5irv/CcxV3/hOYq7 /wAJzFURY/73W3/KP/3qf3P959ofY/yvDFWTebLf6zp0Uf1JNQpOrelJN9XA+CQc+fOOvWlK98VY j+iP+/dtv+4j/wBnOKu/RH/fu23/AHEf+znFXfoj/v3bb/uI/wDZzirv0R/37tt/3Ef+znFXfoj/ AL922/7iP/Zzirv0R/37tt/3Ef8As5xV36I/7922/wC4j/2c4q79Ef8Afu23/cR/7OcVXw6TSVD/ AIetlowPL9IVpv1p9ZxV6PirsVSXyZ/yh+g/9s2z/wCTEeKp1irH/NzcbW3+OwT94d9RTmnT9j4H 3xVivqf8X+XP+RP/AF4xV3qf8X+XP+RP/XjFXep/xf5c/wCRP/XjFXep/wAX+XP+RP8A14xV3qf8 X+XP+RP/AF4xV3qf8X+XP+RP/XjFXep/xf5c/wCRP/XjFXep/wAX+XP+RP8A14xVXsH/ANOtv32g H96m0MNJD8Q+wfQHxeGKsn82W/1nToo/qSahSdW9KSb6uB8Eg58+cdetKV74qxH9Ef8Afu23/cR/ 7OcVd+iP+/dtv+4j/wBnOKu/RH/fu23/AHEf+znFXfoj/v3bb/uI/wDZzirv0R/37tt/3Ef+znFX foj/AL922/7iP/Zzirv0R/37tt/3Ef8As5xV36I/7922/wC4j/2c4qvh0mkqH/D1stGB5fpCtN+t PrOKvR8VdiqS+TP+UP0H/tm2f/JiPFU6xVj/AJtkijtbcyyWcQMhob6EzKdv2QFahxVi31iz/wCW rQ/+kI/9UsVd9Ys/+WrQ/wDpCP8A1SxV31iz/wCWrQ/+kI/9UsVd9Ys/+WrQ/wDpCP8A1SxV31iz /wCWrQ/+kI/9UsVd9Ys/+WrQ/wDpCP8A1SxV31iz/wCWrQ/+kI/9UsVd9Ys/+WrQ/wDpCP8A1SxV XsZ7Q31sFudGYmVKCKzKuTyH2G9LZvDFWT+a4vW06Nfq9vdUmU8LuT0kHwyfEG5pv7YqxL6j/wBq rSf+kv8A6/4q76j/ANqrSf8ApL/6/wCKu+o/9qrSf+kv/r/iqJTy/qMiq6eX7BlYAqwnYgg9CD62 Kt/4d1P/AKl6w/5HP/1VxV3+HdT/AOpesP8Akc//AFVxV3+HdT/6l6w/5HP/ANVcVd/h3U/+pesP +Rz/APVXFV0Xl7U1kRj5fsVAYEsJnqN+v97irPMVdiqS+TP+UP0H/tm2f/JiPFU6xVIPNk3o20B+ s21rWQjldxeqp26KOD0OKsW+vf8Aa10n/pE/68Yq769/2tdJ/wCkT/rxirvr3/a10n/pE/68Yq76 9/2tdJ/6RP8Arxirvr3/AGtdJ/6RP+vGKu+vf9rXSf8ApE/68Yq769/2tdJ/6RP+vGKu+vf9rXSf +kT/AK8Yqr2N5yvbdf0npb1lQcI7Xi7fENlPoih8MVZvf6bY6pCtvfwieJWDhWqAGAK12I7McVS/ /B/lr/lgj+9/+asVd/g/y1/ywR/e/wDzVirv8H+Wv+WCP73/AOasVTaGGK3hjt4V4RRKERR0CqOI H3DFVTFXYq7FXYq7FXYq7FUl8mf8ofoP/bNs/wDkxHiqdYqx/wA3XX1W1t2+vx6dykI5ywfWA232 QvpyUxViv6X/AO/itv8AuHf9m2Ku/S//AH8Vt/3Dv+zbFXfpf/v4rb/uHf8AZtirv0v/AN/Fbf8A cO/7NsVd+l/+/itv+4d/2bYq79L/APfxW3/cO/7NsVd+l/8Av4rb/uHf9m2Ku/S//fxW3/cO/wCz bFVew1TnfWyfp+3l5SoPTFhwL1YfCG+rilfHFWYa9YHUbNIBfPp3GQP60bcSaKw4V5L41+jFWP8A +F2/6ma4/wCRn/X3FUx0XS00m6e5m1p75XjMYjmkqoJKty3kbf4cVTv67Z/8tEX/AAa/1xVtbu1Y hVnjLE0ADqSSfpxVWxV2KuxV2KuxV2KuxVJfJn/KH6D/ANs2z/5MR4qnWKpB5suUtraBn1BNN5SE c3t/rIbb7PHg9PnirF/0rD/1MsH/AHDB/wBUcVd+lYf+plg/7hg/6o4q79Kw/wDUywf9wwf9UcVd +lYf+plg/wC4YP8Aqjirv0rD/wBTLB/3DB/1RxV36Vh/6mWD/uGD/qjirv0rD/1MsH/cMH/VHFXf pWH/AKmWD/uGD/qjiqvY6nC97boPMMMxaVB6Y04IXqw+Hn6IpXxxVk3mj9Bfo+P/ABBy+q+svDjz r6nF6f3e/wBmuKsV/wCQZ/8AFn/Txirv+QZ/8Wf9PGKu/wCQZ/8AFn/Txiqrbz/lxa3EV1AZFlgd ZIzSc0ZCGU0PuMVT3/Hfln/lqb/kVJ/zRirv8d+Wf+Wpv+RUn/NGKu/x35Z/5am/5FSf80Yq7/Hf ln/lqb/kVJ/zRiraeefLbuqLcsWYgAelJ1P+xxVkOKuxVJfJn/KH6D/2zbP/AJMR4qnWKsf823f1 S1t3/SH6N5SEc/R9flt9mlDTFWK/pr/v5v8Apw/5txV36a/7+b/pw/5txV36a/7+b/pw/wCbcVd+ mv8Av5v+nD/m3FXfpr/v5v8Apw/5txV36a/7+b/pw/5txV36a/7+b/pw/wCbcVd+mv8Av5v+nD/m 3FURY6x6l9bR/wCIvW5SovpfUuPOrAceXHavjirJvNl19U06OT69+jazqvrej69fhkPDjQ+Fa+2K sR/TX/fzf9OH/NuKu/TX/fzf9OH/ADbirv01/wB/N/04f824q79Nf9/N/wBOH/NuKu/TX/fzf9OH /NuKu/TX/fzf9OH/ADbirv01/wB/N/04f824q79Nf9/N/wBOH/NuKr4dZ5SoP8ScqsBx+o0rv0rx xV6PirsVSXyZ/wAofoP/AGzbP/kxHiqdYqx/zdeCytbdzftp3KQjmsAuOW32eLEUxViv6dT/AKmO X/uHp/zXirv06n/Uxy/9w9P+a8Vd+nU/6mOX/uHp/wA14q79Op/1Mcv/AHD0/wCa8Vd+nU/6mOX/ ALh6f814q79Op/1Mcv8A3D0/5rxV36dT/qY5f+4en/NeKu/Tqf8AUxy/9w9P+a8VV7DWkkvraP8A xBJNzlRfTNgih6sBx5ctq+OKsn82Xf1PTopfr76ZWdV9aOL1S1UkPDjyXwrX2xViP6d/7+i5/wCk P/r7irv07/39Fz/0h/8AX3FXfp3/AL+i5/6Q/wDr7irv07/39Fz/ANIf/X3FXfp3/v6Ln/pD/wCv uKu/Tv8A39Fz/wBIf/X3FXfp3/v6Ln/pD/6+4q79O/8Af0XP/SH/ANfcVXw65ylRf8TXL1YDj9Tp XfpX1cVej4q7FUl8mf8AKH6D/wBs2z/5MR4qnWKuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2Kux V2KuxV2KuxV2KuxV2KpL5M/5Q/Qf+2bZ/wDJiPFU6xV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2Ku xV2KuxV2KuxV2KuxV2KuxVh/lPzZ5VtvKui29xrWnwzQ6faxyRyXUKujrDGrKytICCCNxiqbf4z8 n/8AV+03/pMg/wCqmKu/xn5P/wCr9pv/AEmQf9VMVd/jPyf/ANX7Tf8ApMg/6qYq7/Gfk/8A6v2m /wDSZB/1UxV3+M/J/wD1ftN/6TIP+qmKu/xn5P8A+r9pv/SZB/1UxV3+M/J//V+03/pMg/6qYq7/ ABn5P/6v2m/9JkH/AFUxV3+M/J//AFftN/6TIP8Aqpirv8Z+T/8Aq/ab/wBJkH/VTFXf4z8n/wDV +03/AKTIP+qmKu/xn5P/AOr9pv8A0mQf9VMVd/jPyf8A9X7Tf+kyD/qpirv8Z+T/APq/ab/0mQf9 VMVd/jPyf/1ftN/6TIP+qmKu/wAZ+T/+r9pv/SZB/wBVMVd/jPyf/wBX7Tf+kyD/AKqYq7/Gfk// AKv2m/8ASZB/1UxV3+M/J/8A1ftN/wCkyD/qpirv8Z+T/wDq/ab/ANJkH/VTFXf4z8n/APV+03/p Mg/6qYq7/Gfk/wD6v2m/9JkH/VTFXf4z8n/9X7Tf+kyD/qpir//Z + + + + + + 2541 + + + application/pdf + + + Adobe PDF Library 9.9 + False + + + + +endstream endobj 5 0 obj <> endobj 6 0 obj <> endobj 3 0 obj <> endobj 9 0 obj <> endobj 10 0 obj <> endobj 11 0 obj <> endobj 22 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text/ImageB]/XObject<>>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 23 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 24 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 25 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 26 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 27 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 28 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 48 0 obj <>stream +HWksίO)| FB \TGǰ7[ߺ&4D#a ݺIse69k]W 8}Y?dCo䄮Ya&Oic]qMyCH[m>Z*޾㢌Q}뺣wޱSj3|.167}pY=NE.(cW̳(ajnEyf"J%c;xqB icE Q{)xTV˵l'l^f1i1gz)D ߁XB w+Y>\1@QW2Q!JbbJ,r% Jf%7e/a> 9 =63@}z,gTn?.S 0~Jv@-VUA)K$*TN:AN"VDEFwgN}حw|Jr87@ν0K\`bVr.S?g3rFɓ Ȕ*XaQ,wJ7-!Ϋ-6Lbxr9}j>otY(6G F'0 6^<v]C| QGI9pbԟS~K=vn$ G|mcf{6od +hHA,QitSjFxUX۠7fZ$NY0Ȏܦ~znoN Z͕Ar7t\.ьI½Lu.#x+5X!Kui~M"=~p=ah1 |EU,M|0hZB &XV"|Fku1AhZt@D¿glCf/Wmt90K\-_mkvZ&b_29A߳7)/ -hY"DtWj[k6fQ>R{lnq9?mE +fg+%xJ]TW2m2;n۩v].ʙƫU"T>O^:yͦDr5wn=y?M2@,twr +걨oĂd;8*$v =:{KX- Jʤ@`Ǧ925.f@ya +b*X)۸P) ICDPy=nH1"]&tJ@n&T-9 "MWlC yc;T#Q(*]o(Y4'MK6F}NDH+bA|qW9|B3M ?ghXa-qYJF+!h/p`#-} @?#4c(ڐ T M-h|6~R?{Vɳ$.Y!Sq #dEwSb> QXIJJuu+ѠQifsKB$8ʹf3U*{zCt4#>'6I V,oZ_,{{{}`]%J ճ(l!s{ӫLc7px.S(#Dg"N',1B-cz kw( A۶1"*`$@* Nj1k]tM0l`AExi'$ +$׈q@@mެ! =*0ܟ1PfY T{[/D?IcM3.M w*/(4\sYn$;L-Ƈp00hGC$`8{yEæе:Hݽxa#+ sQ^~MQϧiT a`ķu5nM'! ѝ|{aLrh{~s]}ėg)]U1{%+bt{HϬ.3`3S.쨧K\=Wju xΈČzܣT-$ ~tuib aA%#臡cz)3Ep4֛5K ^*NO$!T vؠ?uSyˉ{=+{f2E3Nnq@XSuS@l[}cAUeNoʦ]^D`ʴjpqBd28D xOF H~&BD@1,$oS쌘\玁 Bz;˚vUC`~gd.֗T^Y`|.郧0<·|a)u p5^P'*[lK垃eYr;l)]ݥpb볦Zn;\a%_ő]ZS8M]0"P޸2S=IBz4RPoۼI8UͰuYcѓ9SAN@E՛Qީdȉ&@Lg +L1+Z>#XtڤG k,]MlO^mzhZdϒKL jU X։ `mgDm +%h6l9UGM]4>⩮c3ey=W/lTA$#t6~-y8iaRY4Ea1[EZ ǃ=:DVt RG!p -y<6,e@r +D0 ǚF?eg&FstZooa6&նۋ,ixХa⋮P Q|z}JJ3O(h6*nbQBok-ސl9zr7ףYB]Ν/Z޷Q[od4 d$ovn̜ t N5|k'~պ@49ֵ+T5Z O߱G483S}l"+} -x?SL$Mֺ}\(&'\p1Inq C%bи2K{4|{)lma;qoLW~pJW agƢ|j睝_=0\i,V)uְlX _snηLkvM{27'RBp~SvЋ[傚x"@]0YFP#.<چU?{jIt~s_v%wsꃅtnK[)\|?28 +endstream endobj 34 0 obj <> endobj 35 0 obj <> endobj 37 0 obj <> endobj 41 0 obj <> endobj 56 0 obj <> endobj 57 0 obj <>stream +H\n@~9&?$ @"q-`X +5oS(+%,\_54vlqla6~bs ~t};-<.4fum_uwcx hm>% )zmpJLߛK0b{wz?ߟS0r^p6f8.ұ6[:YkEԾ71+\#iVgg +zKQ__|3mI]BWB-ЎAǂzjM6 6K6 6$- +s\A0W+ +s\A0W+ +se}~J0+7eՎss#####ҫ* +ҫ* +ҫݕݕݕݕ}}}}}}}}}}=zd`[UQEc%暯}ko1U[{1lW?iLr`-X +endstream endobj 58 0 obj <>stream +H{PSWs 7\]1fmhmEa +0"Hbb!(kyg"` +*+Zw[X=/ݙcg̜?|7w  ܸ12K*M"YYKbE>BJ+a_ ϭ )XERNeٝl#T#vL!)T"-dldGOЦbx$kDT{6x6wJ$8QJɎ+ +5)2?~UsՓ=P7#"fDc욤5⁸XO?[X",^XsR܏f[ .9&t~=ŧQX\8.jvu ishn'ą|}b4/U)w1L)=Hڇ˦GÓ8gmCDCRXJZ8=gzMM5SH#nC_yζO4Ir\ij=*nK%*14*l+SC0N D^t<$Ji[*WT2s/+f!E%Eh2&G0k=Nt`F7usq_m  Un AS*Sjq)څЦ]G$ŏmIq@KL?\M>ΛBr(%'_L\C{l/gW@:$Ν[*8%R-Y{{UF0 ڠ)G TᘈHLIoFa9aa(~,qt3hnw^n@շ9^W*lJǎ(8"2 +1Go`"GD=˨cXfP D{zR7Ֆl5Fĉ>1+)x\^\9/X_Y*Mply~m+ZWnƙ%!2pAe BeFmc9O'rLE[qH;> n$ Ζ@rT)C#Rt +SAt~Ló+73r~D&wT+:DZ^Le&"yV鷲o};Che$f&M17@@dIcؗ|Ps}  ϫƫ5/!AJAC$2Ff%f1X4*2,Ť#q߱0z~čإWgns(GS9#A6!-z_ƶw4?%SVUH:i)Wl(,u*xg5#u2chSC+݇*iY$tnVn#fWWA<9wH%Rr5A*BN_DgB_OZvn}FrSYԓѬυS/"Z1+Z)RWZmf-(#X0v>uw$L'&3 ɚxy7-­|T%vq/F\Z;K:8z|ƲyDӘ"K}'6J$ ֈ2,?И($P fAwTʃ{;ZD\C(q DH@Ui*U2dahx0#o V?9?,B΂u4'Zd5XY +Fmd. hqFD >GwE(+f5م8S%U5uN}$FxWϳpԈLXZpȠ{^U+AhʪэvK+kwР#c1d4(wP"~1DdHaZڨ$>3GUbRmgl*أ9k>ç:9j1][ CYr=-> +|'1. qy{|>5pDfD5efEmȈHq}\e"pΗ_,?'P!FRj7żAe9!n,m68IQraz[6֎j\o2 +_| =@nk98-XNz,Y2=%12z,BGrJ S!CEXP0[kV꫈|gcrJV/b7Ѿ`>9q&&35E|Dᘽ(&mo8ѣճM!ȲҿRQ`M&9q7% xhT^-MC%D|U_ӊ[/>tΠ# lWo/lNMDn;N:L]: hkX !$@DZcq<~|~y'Nb(]I@J(ҝcA4i}?y>^nyY[PW'@>ɰOS:tVmMwa Oh X=[Le{RU~ 9SՅxhX7.Qܫd1=5LDo?_iq;EGVVՠm:|gI2v0o!0NJf-|AYPjOELD@:åD('8D8S FcD?yj}chgų)}0$T\]>qaqH7#GnѢpG[M\ZS= H{ &u~X4< x?6H8Ӱ.- Uۘ"+ z#gv70Ϧ7I*a |PN& zg#W2u>  M),L_m)viGm]E}Jb<0eBܦYJ5ؒ8U8w.נkyd>;6'PC\ L4-UV mL H<;yL~š_Ɯ wq\-uMjHyzf̠w,z/l;!k8_8}!FLj~~Cs6kU@YH?Pʯ/G20#8d9\6{8^mjUww8do2k&qqw|{X7RqQ?O=4jW胬P]ka}%+!_EC6D܅c 6dա0Z]sO'~h\NQ K:ڥ̀E|#p 6^+oTΎ?*U޽uu4\'m8s5`d5vf[跹}P'$KېV7MEL+w\AiT\P |b4ȳZp $ @i9 {NwtAŏS7֑[I[+hՙm*eg(+d gsh@ _#3&@҄67шdfeCLSne7T +3G8ۣ`&?&bnnݴ8zTNORo=U%vYuoL-69mC{.ѯoϚ8%^ˎgHq Uh]x +'X<湹8Y{vwi;`T1ۻU m$*пkҭ@~hW0D97>1K3߂:Jy@`k@]||}h p4k"ܷ0hA-U&vcqĭmJ)׏/FO_EzvoVCF^:QVZmnY_7Nځb 0 0AY1O_&Mɠ v%lBf>^C=l8bu 2]lg=Ξt +=;7ԯgQn\77|W&~zkȈ5]1.}[ lVpAسѣo2cU\3WҊXߪ6+u ,eg#,΅hŦ8c|PK>I8u]|t SGDoVX m!$@&`sP,#v9y:\Y7 Y <0>~ᧃQ8!$h8 +]D6byVV}uF X +p,8xyMk Blsh鱎8S 6Oh ydUebdumz&((! f2%$ N7|A8 F(3CyH +8F"}̟"D\(A'U|yſl}7jڣT03aϮ;WFMS1@uc@NNrX +FO}\dsfcvu3*,jY T \R(/Ͱ KQ7`wg~/Z>A_X;$0뚲9N FhTp8*0,ĆfweVVwxo2$/ Ke9Utչ(4̅)NFGqPe=5RVaɴμZ%6>{CO_PJ)sн|3s:+s~$ +endstream endobj 54 0 obj <> endobj 55 0 obj <>stream +H\͊@}݋&:甁 .qbR:1ԗWJ~ks;u};>6eڮ>~ͥ͟<-> %[ڵ.֗y˾Mǻ;c/im uǺ?ǬZUbw|xj~cV8yH_hcz^Kv޲{~c Sy^~^ vliyN7 p +m& m& m& m& m& mp +p +r* +ү+ +ү+ +ү+ +ү+ +ү+= +`0 `0 `0 `0 `0 qghqghl00`???u +>we1甞R +4}ڝ1m'6#A>~rupi_+ +endstream endobj 59 0 obj <>stream +HTkPSg>1$9V+Q9Ǟb]x+T "e[bHBBM%!!"wz[qULЙ?wޙ}3 F ;s೏b2xңhQV0đ7\~>,%Țu dk{7~ 4<1/=9ǽʍq" I"I,S$۸e7`"AΛ۷#$_ Fg͐I<)O"m?{4^:Ѽ1}@8q!( +OLAP.APW¦>Ɛ_M3y1h3G(mrרJXo,!42+\R6/MZ FPု;vgji +2w:ARt֦(xXX4ҦT4pk]c!ʛQ._.\ >$ePWV] ת J4Ax< 8}f.-g nitՅΝݗrV#;I1h1DקЅ]8==r0mKbvF^AY 5^'>ɕ Щ$>>zev6#-w(*ҢRȪj#{ʣuSӦRN`&~}r4C`]ș0[S>'_s'Ѫbwⓔ\WZ.K֐5 =F\ '?%uwtW L2IABHUTð~bf||* I! 5޺z\WyoCd X<ϸL D+jYz[ 0{POxx  KHIL9X_|`9b߯B۾Eg 5E$g2KpeХ˕ύǮd}37%Ap@l086ݼ` O rXHHu"7]0|058uj*ڮ&T#iE6MkbB$.by}rAfsnɀ+RI*km; +e?ܠE8eq^+hZAzL^ssHi3y`t +\Nx2 +MgY!R;;d{}80No6 +ȩ`L֤9 N(?_Ғu`s w#+*B.To&jͭ1N>C{fރ:㯆ݜяOS! ZĥVa[<>#=~qop[\Vpt?moB#Nuw(}XT#b65o|(|1Gd{r4_W :57Cݸ?|NwK2NJ&*ӥg .Mg%1ϳ\ + KUJPt^Nou$k E};i/ BC<ݨ "$/9S8-4"vY&n4TMԯvcMS!UT{x&-1.· 'BC⋹HS +1_Gq#i/r^sм2R||g6=E^|@|zE¥0s!0Ύ "4=|]!O7pW/mYp:aUp`+QSf""Z/³+Bp)ͬ_GD8V@=q8޷R0t/ \=Ƭ-%T S PJ= x0E5d/)7a1S6ӤOko] Fȉ3p0僲:{Vd 4hPvd+768ݍvjԧnzI0'bCD4¥Њa[6As Srk ;}am "xSFPiK;y B5e^V=h5A`y(C*nes۽NzpW; ?@tܶ/f '7M<6nK/35m YFDY%-DӞlCNG6gY +q YOM R3&͖Ӯ.M Y7|$D TbL zRjq#L88LLkvvg2nu=Slj$uv)X %@IH+iu 0&`|NW2I3͇]} +'uL}<3!dVS1ugW~/3̦W99L:ȉ2m~d֥#%W%W ^X݂Ğu[^֎lY\|HܺnNlN3僿+71U\Ϗx./Nd b(| q_ ԣq@~ s|V} 10+{E$[r 6\uoaBQӾz>m'fЙǏ0fq()o_^{ijAv_ t2(X:ܻ[۶a銶jp2 +hfN,2+|*gy/RgE.98Mœ[quBEQ7:xG,Lg#Z+kwy"^dCa|vl֒t>c4=Z\1&cXY^lTzp4Q VaᓳxHrNxqD'zVzrG=fRͯ*Cc](c!^ΈO_'݀>p;6AL|ӗ., DS7'!TIi,@ph cq1v̱ 2$ :#{q~?&IBG^N03=Ee~"653SSXY_>M|N_ o*TS O'P}08YQ= +"^DvZrܽBFI2);DRut+PeO߾]ZdKWK9f>0$Liaޘ!3]V$5a + := ̏ȹOEgؗW@y#2 S"k Ѽ϶Rߓ +IS&E Nΰ|a+&1 +R1R9k'l=nmћu=' +}*>&d:J/mr&j0kn.fO>:\y@Q{Iz70䄅pgttEn3@Y`E-E.t*;auZLe4O 7A |>FR$; Q2}~*reEjGf`RڋT.&C)]K +Ȥ5j} +=գ4Hzl"c]▸ϤjoO߲^qS$i$5t5C[<pbbFWJ:p zqcam8~_O$Cs)|Hj5tw^o4u={#܀Lt.+HPRȳր5j-MBgbIҁO:n!Ev5lIxQ %mػ3>XB|)!mhr,;Ub'43;/c[\NXn a6kj?ej>_W/fȭ=|>P?*h(v2S S4H#y ۯj%jKK禦so<1+h(΀z>MI%csQ< 10k?bڋ2R= C$+u?+/s +L|7}e9HZssm@Sە+6Rnc ! !7'vrl>6'sq&@#Z)Q:vҨVV}}4jە9IH-W + rbz`.poj7ftz~OIu[z5._m"ISe @&d\G/AL9<a2lжLtz ikc`,r*?G;", !(Ǩ5:0lk0踛~C=*Vdw ӭ4Qㄆu:Sz[/ʳ} Mt_3,,46҇SaX.Tꆈߋg^A,;K rcDc͒m7_J0q+ z6KH1qy $> i(\4]Y*g@}O1LxQ63p(Fs(Α'Me1*r82I1re)sg+X!H6=4r $jthaV%= މ(ZаL"eI:-cUՂlmbu4LPlvp0UsBZ"f)1JC2s4C)O~&h^ Vףv<#L<ݣrkCh98lVlVGmz^k&HC mWQ.X!j.d|0c"pǜ-l.3̘0 hQ Tj u?Ī.a FA<_?dP/?B;6P*T!6{]);`fT~Q$B`kre(W}ĞLhY{ +V +K>B!G*%%3##! +gY|9CP_g\[3hو:Z6BUOO1 n -(Q +̉%/Q|SrE3 (<CjxT0PqEԪmwp 7auT܋Q1dX"?Qb,^RC$a +X×;O F}U=emD_4->5;Q&lI9-ft-^quaH66NB2O5딋$# S&4kz6~vmT3FRd!R +׫њA=>^1e>j׾4.c~m mƂ)Z9常ITcvϤFcZ1ц$yv;/PVZbLVnLK ^BvIqDNҍGU + CGȓsx{|C! 89Z[71v#Îj\\AݱyZZ||xݽL>A %XT/Ж?ɳ؋2V}78z({՟XbsNa#?2Fk6eɠCoWQvMW+jI~.㘫(̚4A}g`eo[fCDFШ +endstream endobj 52 0 obj <> endobj 53 0 obj <>stream +H\j0~ +-EH6`iBd[I lgҁ18uwUaM34>x7GBM?t[.T)|_gهX4)9æ(!ßє4}zmzJ}kś2Ǟ}>} tcS؆/*]kӼk]UO{JgȻEH\k#Ėk,XXc%ؒ-YB0},|,},|솼o[0=-<+FNHסS&fYaVfYaVvtvt)yO }}}}}}}}}} +OSTz*< +OSTz*Ny1C_'n'R 2I +endstream endobj 60 0 obj <>stream +HT PWcdXӭ讖 +JhTT[AfD7F0kt}fWhM Ȫj܍;v{T?s>*ym Ƀ99K~rACygϺ $BQg_3Ǎ>Xkn͜e*GE G Ç 'dXӍrrQ~qyoYjZmcF ,@ +wőTE33U@vN<}:]CSb1wPHۡ}{`d vbD\ ܝޗ O)a{H%$JǃRFpE+H:ɽF[eͽ{>_Y;_nc)`hA%c_c @,X(ׇ19 CMys!iCե5N(v ]Χ3PC(D]Jh }v,5%)+~& Mv`/R.MK(cm 3/4}8t-ą*:sK1=lz 9ב֪\zIYI?7p `m_M`B~ՙc`}^kURMo/D?K1:TÖM3Od.=ZBAJvϧ eP3R{ԪSع #a2`7|S]b%%7JtNGqyPio^_[][WWY{ V* +=SHw=e&9p9?(W+](t;H9E9HIv^ʏI]v78/qh`I,Uby!n~tS'r.M4͡)S4-2ui5JR5ؙ=XVLhլL4u~iYúMmjX^)Wi8DNm砮(6eHq%q#x[pmEq .t=IҜ|{kަN/UY 0l Z *D!*/Q eICd6TG<\a]E ]5P m23ǜs:d~s;BgR;zц DdQ!aAitDh/]AǙSߕ9.IJ?<38-)0²?M!aHȿ ygvYep@Gcf,:mGum̓OOα5qk9];{V#HE*1_fku_#)aSma[} $yL?Y: ~%QBs,7kuuX:> CClG'nܠڲ5QUtfˊ&VgFJW EA>vJthez}-icaB'%.*DmIih<~21R&nVKRat ]5]e)sT}V*]OV+1 g[Y6mn.ZԦhRo¢l!e +??!+ס~L,IJOfiuuYtN}RP/B=m6ѝ Fh:ߡtT6=&Њ}Fҕ(6S;h«jԪcj6sNb6.g FCظlnHy,Xaga~& +B33l1fkOf}i3A j2Xm_o*%{TI9jKpz ,ѳs%@.07FPqɦ~7;H-(()[D I4EҚum2kHҘNܻnl MH.R3Y`-{j̏pL8uGڑ])(?05ԝa+Ux2I[@.?yjZڭGpXRSzG_ġHgf$WBe0a.oHkkNLq-^3x]#BhB> endobj 50 0 obj <> endobj 51 0 obj <>stream +H\n@ཟb"ܹ7,$Ģ?*=PKXYR>7ng>ovxm/1mpfMma]1˿M]~m.߿x +Z.Rїp._n{uz?ߟ=>>FW-Kbko㡍a8Ǭ)c嚷Xeqn5o;߇)kʷX%WKf7-%k<=}\"Wg,xx2+12NOӯ//4{6Mh؄6Mh؄6Mh؄6Mh؄6Mh؄6Mh؄ +:eqr[m??~E_ѯW+~E_ѯW+Q.](QGb>}(QǸa7 ~7 ~7 ~7 ~7 U%_{fkaVfjTeۚ?_/~rgG>MXI4Kw` +& +endstream endobj 61 0 obj <>stream +H|T{PS$70\/ڠɵXD E\K ++EPy'wuU02:V]]YY(+ւ +uAkP2V=7lv87;s>Q4Mi鹹q EULufL0ۓåᗾU^fyUR"dsڭʨhWTl0Kˬ|Px6^e|V誋|U_iᓫJ5"^TT.[zץK-|o5Ergj=L*ެ/5f7VVR^&FTY*e`,ѿ,m?Ŗ"&$fTg6ZtWK,WP^7Pd4E1%s*(5E͙A-؟QF>&Js-.zBM Rz7F_E}P4CT"U-RlK2o]s4DVY:,e޲XYEvU|Jʔfw:vGG={y75xjO^")(TMnūiӶNZ#!D HНc =x%U_Bb2;MtsȄX;} +D9l +&F7' "AęjjL#5mmG1ii t+6r 8$. &<ę!`CfL`PychTT8BwF"^E:y][`SƵ Rr=TddQ!sFOvt,M_F`yyv%` to҇uVoIYkTɅI{NQϑ7u(*o&а|q̰ +]jD"DArI>Fmml v ~{7۔v ؋PUҪHmzsspcAaq lTC^OWi14  ¹`@~Mjԟ=q 7?I+A|X#]/'ⷽ"T\ ;z^ +v(sM-]Ppfݡ{ +Q… wX[07m[|R KCY +q1a+|ѐ@_c2*Va$%\jNv>sjBVs P%@-yB yz4ۥphٛ)6Ƶ#{<8Z_ ;>{USR *l| y@wMs +q{}C_c.|qЉȰH\AFܥV\h ;lC {>`6,EkpK,d pvXqlWyPTgg{F0;{EEB9 ":.Q@Dg@ă]#dQAr( Aw4J6oR[[[5_ןny&4AIBR"wވ MX !pK5H)ymH4.d0 (^&8١fw+{=ĩ0]ki&cߓNZ\cTAN?坔x +^kU[#fcjeZkI:Ayeo:)cmI7GI]D[P eo3phIwi :)&=薼UUAE=JW( 3 !9b7q<|!}BٻL]-%De}!b`)/)GYE7".xHvcv`CzS10a208 T^Wu7^cn7$(97o6L$"Gű3AC?Q)CB6aG +{T~8aQ~T704g%4tcj +:bjQ9?/7&PY +jCmp]CATw`onQ?6TpIh!v=;OT> kZ^1AAE x uu!taptޛNzdI6);A۩P cυjAvbsio40{&FhjJ._Of?VӤ4jvLnFtO5?Z/A–-y5Ye▢}F42`? at^rޅ|RR!,j-QgX[|jOk0/y_wDŸGIFdŪAnFXG*J+ո 084|`_7X8Hyɶ' +:`Y)wSR2EH,t>$Rp.՞6Wl_)scqk*8QT&l>\g=S!qɱ|ּ+?="t[a^Qo}ǽoNg\PqΤ-'M_v4aUSE'tgi#өF{L 8'TH?^(8}aTGu+NS2<[4#\\p8:F?ֽE=u>|LNy|=( +3$RZ{lNYhኣqc!UA^QL+QLiSYReRsjubXؠƩ(o& cYg|`ۃ;wj6KW 㟚oh lVsqꡍ)&8tĨEս %ɃR9m1Ģ $D.~O=UMY͔'r5Wvs-!;;Ržs VaOS +(1T#RuȺ96T_ob|RuOK[]38hgkXؘ}.1Pmd΅oʌ fSm}c~,kQTk7Sq(G-l*D*'r7/?k֢ 9irNg/NlN3}B8vuJk$J2|-hm -ł `9E$?rDZLqa[!ÕHP8_=sq>yx`|mt'skSo ]"'S:ye%i"[4OXX侀{|Ltt둉*14n"j[*Y OQGKF>R\A86†[۸CcwBv")*}mMF888dJ֒))kwO#@ +endstream endobj 31 0 obj <> endobj 30 0 obj [/ICCBased 62 0 R] endobj 62 0 obj <>stream +HyTSwoɞc [5laQIBHADED2mtFOE.c}08׎8GNg9w߽'0 ֠Jb  + 2y.-;!KZ ^i"L0- @8(r;q7Ly&Qq4j|9 +V)gB0iW8#8wթ8_٥ʨQQj@&A)/g>'Kt;\ ӥ$պFZUn(4T%)뫔0C&Zi8bxEB;Pӓ̹A om?W= +x-[0}y)7ta>jT7@tܛ`q2ʀ&6ZLĄ?_yxg)˔zçLU*uSkSeO4?׸c. R ߁-25 S>ӣVd`rn~Y&+`;A4 A9=-tl`;~p Gp| [`L`< "A YA+Cb(R,*T2B- +ꇆnQt}MA0alSx k&^>0|>_',G!"F$H:R!zFQd?r 9\A&G rQ hE]a4zBgE#H *B=0HIpp0MxJ$D1D, VĭKĻYdE"EI2EBGt4MzNr!YK ?%_&#(0J:EAiQ(()ӔWT6U@P+!~mD eԴ!hӦh/']B/ҏӿ?a0nhF!X8܌kc&5S6lIa2cKMA!E#ƒdV(kel }}Cq9 +N')].uJr + wG xR^[oƜchg`>b$*~ :Eb~,m,-ݖ,Y¬*6X[ݱF=3뭷Y~dó ti zf6~`{v.Ng#{}}jc1X6fm;'_9 r:8q:˜O:ϸ8uJqnv=MmR 4 +n3ܣkGݯz=[==<=GTB(/S,]6*-W:#7*e^YDY}UjAyT`#D="b{ų+ʯ:!kJ4Gmt}uC%K7YVfFY .=b?SƕƩȺy چ k5%4m7lqlioZlG+Zz͹mzy]?uuw|"űNwW&e֥ﺱ*|j5kyݭǯg^ykEklD_p߶7Dmo꿻1ml{Mś nLl<9O[$h՛BdҞ@iءG&vVǥ8nRĩ7u\ЭD-u`ֲK³8%yhYѹJº;.! +zpg_XQKFAǿ=ȼ:ɹ8ʷ6˶5̵5͵6ζ7ϸ9к<Ѿ?DINU\dlvۀ܊ݖޢ)߯6DScs 2F[p(@Xr4Pm8Ww)Km +endstream endobj 46 0 obj [63 0 R 64 0 R 65 0 R 66 0 R] endobj 47 0 obj <>stream +HWkJίOY GQ$LBn])Y]؍ĸI !~Ou Q4۸ΩS3]̘kŧCe ee'C3v:to ygs> (^R6ʤ ?|m|P tNR&?Ď"edŎ fk^y3$Նszs}u}n[v~y:]F+ٵ¡CN O}:&dFN4 +lFZtŋtعcJo%.g*Ew^K6c7:&%9Q4f}}2oY`n‹DhMVT++_dRL <}- @n֢ ].;ݕ(RSyГFТ?Y Ll6Ւ]prY3քb< ;$~ɜ<ߕ)1t`+;n[ voFZ`fWnuva5A=qG^?< /}(wcǂCZRf>-5L)[ Hd{K +7*uQԆNT^LB[Vl0(h,Z?+du 9r.gv{-t){CkY%qݐ; ͷޔo<C -؄\kKc,cd]' kYr<ϱr,U cNFw yDqXii";UA8x0 텡,@+W +rk5o% \N\U}ldw4lbV-ו;5T5K#u*ʒgt;İw"_"c uxWBRL= +wH,7:P@dIҚ' \+ģ<' Su0zKzq4j7?=3弶r/tpin&ˍrY}&r-<.:l3_JpbX,"Pbox88w ۊVD+R]*,'y}$![ڊ^>2<[ty"f&gɻ@$YEA+0E1N: OF(ڔI.rjd)!p$ᷲWG (O jvWV] \8l$fiUQmd3Ale9%H6kszQ $;(5m|*'0.`F.K!12!m2$dy黣Z,m4 >HtD[R< Nf#IkAY.~=Fύ, +#k0R4iP=aC~\@VyQI" ި g9@9(V-F}O؝%Ly ư^m4n}S .QԺEN\gĻE({π-3^(ɬU +& TK^چtq+h3BVH? V7]VL0 +Nniݬ[;- |*Z0;|Sۜ@&Jԕ Y=s1_SY{5jzgh_:ULꎤ֐WjcgD,y_j-LHI-4s*4w"?nd:W`$i1x +ESp7|+|J`%RҚ!@a>sP)pg[~V }) +k]d~a  +?2Y*_)7Y{3L9`tvISQ34h*X8vBb.7>5=hoD3-.Wڈ*9 lYA=xq Q9WnH}SEpn'%FFv^($VEE[U$ER$"{JKC>x^pәsaMpVYrG 0fؗH4I]~WBGz-b)vlX56z_PMСZTd;;_feӞkwja /4Y$,τ 0j4Lx^odzN/|nX? +dw|wBKŊq.bLި6bc r&Y')t?뮖nO֯aa8R$ezq(}Dls<\Gӳ`8-=cwdA_RyUz&سޱgj +zο ^?v50䘲p&v'OS3mlr@`W. ڭPޝ˛wZ`f7LEw50ϳHC#ėHEĦ$Jy\ˣN-вԊh0HelږF9Lf>+nΖPݞAw0 Dh&!a| l-= +*|􅿥r^ɈM;KHweZAUBJoUP^D<&鲌}Zm`-¼`)SEi|}((u&2f}szJ9`ڔÖޝ);umj hswo|,YG??lU +qQI,k3mC`$ϡ/T|h)zfrw-LPQׁ/U7ϵݩIxaFϏz¡LBa5Rݥ+b*VzÎ4) + 01'?UM]Ie&4OO~_O ›a(k.B\ 4zU +[l/xKB\Q94QiDI*J|0RK2 .j_O2ҐVwUav)Nm +Op\ aHq g@sz`, |^zɦ_6+Y[{`zCZJ@s>V$K8p$*. > %)& +T\6x{ Ԛ Z/B@gM6( \#K%5sO.ώ:oqI>/Border[0 0 0]/H/N/Rect[139.0 649.782 291.0 639.819]/Subtype/Link/Type/Annot>> endobj 64 0 obj <>/Border[0 0 0]/H/N/Rect[79.0 638.782 161.727 628.819]/Subtype/Link/Type/Annot>> endobj 65 0 obj <>/Border[0 0 0]/H/N/Rect[469.519 88.7821 537.0 78.819]/Subtype/Link/Type/Annot>> endobj 66 0 obj <>/Border[0 0 0]/H/N/Rect[325.0 77.7821 428.76 67.819]/Subtype/Link/Type/Annot>> endobj 70 0 obj <> endobj 69 0 obj <> endobj 68 0 obj <> endobj 67 0 obj <> endobj 44 0 obj [71 0 R 72 0 R] endobj 45 0 obj <>stream +HW]sJ}W-BHT6CKL,pa.=# n9Ih33$91O>wLȎtëuXE܀ L"Qӎ;tttƋFy0Dg/VK 帆"t-߷:(i`oonoM ?}Dy3[m؎mظER +r/LЄ2m;'w&ņX-ԤQ:L6:R*.a:eюmŎj_!nFhYld)$" +?. +E*֌$9d>`eV۫*b2NhJ`dNj *:cUDb:݌q#0,&R9]%-͏!aCYah>C'}񎩺fQBqy"*QŚaf6e͗W$VS6$[YU38-Re +˷,*ȕH .;e*7|E<m˗9{G# W^"L,[>d{wsqV5xɼnFl7`Bm#vHỪ4wTk"NP/[MR6|S$*6E? Ψ\fA0lPן ,+06ET %)uP-rB?T'@M"->gt bMӨ}.)TQyӉѲbԻiΗ]+X^:81(fB@e@)0 +? +3;:iIeRԵgP!@v "W1ﭒ9P{ WyYf 7| j'i"e@v7*"ٗ +sL˾q倴bӄ]AՋ~1)u Ukeo2YTED=AD'b |Zb5 ے-VF+Kb'_x$|f,"i.& +K"w-]> +01y5]&=$/ F)}[6jl_4 +6՘Z{1uf1X"ghX.QsUT/'xFFB=Եu 5lz=S-Yӝ=x^N`( e4l4Vı,ǫ#q^D-<ͨvBc٫]둍L:;~e_0sc846gh@F̍Y4P#d&*oH[u T&>dz,T#/U9SBk_졒Ϻcwv!^@ӳ+l`R߮ eg2 YldJsޱ2!wvvUm1>c<_]~-ݾ$t?j_{o?X6pܖO>KX f஠iC[TNPÝc[)qFM(,CSnEty׭)7Z~?4nЭV̀)TjG0> c,;-;jVD=|24u4Q`aLXJh`35(H8rZF׷UB,lqXXOL֮4zA Ǵ 0<xS*G +к!#M+ˮ% ]s֥I}r%ϸy+ U)6 N9J2ŗv w [p*L +H[6=cI49=S{Cy~؊)sO#xV`g&4#Ys!Q"<*Q*lu'|md|UWNdUtTL|jנy訦{di.}! S& vNf'R/ M510CG[C÷fLGk8X7 .xx^8ToԖ=)%s;2(:w,K+;5HjiR׆jkj+x*4/[ĻcڪSK =t7!L|A 9C{ +0  `D/%^yKRMAR,ld +1Q㤣぀< +xnHg8.h{h*%@I5(^x "kM`SK*xPSԃ3{4@G@ʈz<UJϨnҫjHY̎f qLF_d(o\c&&5b~S p8qA{ 9'BMƨ^-BdkUDZxNgP@ߐ(ASoÛfZ=%?o")a( +B~-2J&6>f3S߱TApZ8t;F+>~yȚ t1՛.-aE2iX6Q٪4Feަr#c9BBȸMxI-fG3,8Ka=щT;ӌ-Թ!.p#{h+X ^.-E{y}l8rC3A3)"_CIrwT-^k0vL ]0Niw+K+y]y*Y|N:r)H"I/@B_{:] $FfbդL%^FvNpޞy x~KU0;&Dbe^@̨Px՛h;zMjއ& +yvBL*wDphvs SUDADYOBĂ{D|'5GL9Ф.N[蹈"݈^L|e\A GDkO~ewhNɼ68cPwxy}/9"5g7[5s%ù>EЋAw3s.%@Km@)#:OheH뎌9Omdn2Y뼀*dsץ΁mOl^ƽ~„K?Sj7NP]ߏN;e4fmܠHKaPʀƚu{̳B +:6u@] s=*c쓵=ydV:QԲhv䗢(B\v"DOnLb)v"?wٸGE>6QŤzP"lƒ_ˑؤ,&6D_EFgN;~qv=܃$3DDs-C$/%h6>l^a +vj,|K=KXxHdQ׶ݨ 0 G*7%>qj6I1s&_SHT% ,]>7_]13ݏ& u%t"Ӗ u 4i +endstream endobj 38 0 obj <> endobj 73 0 obj <> endobj 74 0 obj <>stream +H\j@yY3w` I·B:_NmhƮ`mhK؆lZ>U}ͻy6,Mx57mFcc5_z0Y.MCOU\㶻m.K_{5x?L5zZct-Meιm?“IZskHNTΦ#%qA.+*e"cyd vdff`zZx ycwnnnnnnnn¼ / ¼ /˘gAaAdaGAGaGAGaGAGaGAGaGAGaGAGώ===<===<===<===<===<===<=҂$Z:wKic3. {7i>ٷT@ +endstream endobj 75 0 obj <>stream +Hd TgYcaG` +(E.,. !mZ##( +d5$jm*jfMN%rlj@6'g3s{?˨efgdg,NLqeZK3ݶDgMdGa<oJna0Dz},t93b;8&8i͹.eVKyNWeumSEERŸTZj/")0TJnfhumRV]Js;ݕ%v)[Og䲯wP&9%7)ePbd-IY{Iw6r[$'.(9 Sgf)I6{>ðt1,3aFL0 3AD09IgkL ab)L2W'dZӏsP2T՛UzzE٢T5ggy4ր n*o2$(H5<&/ϵn#瓈CA>_ycoB=/K ֻibz5q'/YQ)IP i0=I\SҝqN|=Bv;E0kONo Ba,΁}>}󀖻pdг KQ6hI 8 r8e?Q%'=J z$zB[Et(HL݊r3ERL Ƞ8ۤSJ`Qh BV Ls/iå! via;n!n1ϙksfO7{jb#eǴUFqwtMƥ⽉Zd@Ĩ V.qVt-Zpl s 0xP:/A&(_͌2$sɩD?w~bN3a;$r ژ/ulB5k>T_V)qZhIGq +[fWZGpGz ԺwbO$f+8ՀpwAmLOr<U東G_׽%?)u> +^dArt49xx/',ly6)0 T%u7Xa L2LTĭ&99z3qEy np3!g4ZCRbe nxXBN1M'9ADQN'Э=A m8jvZ?H}hW"Г!vY;x|j~.a2/>|,rCrKi [9z.65vS%r0{{;ͯv7X +}ywƛ邲,HȖ[o JU +O A9`8-@'SZUpNrž=t}h]Q?e.]mbM/yn2@A#[PClwF?Oّo7^٬ր)#ُ]Q%^@Pp>Oee-LAcш S= j_ijeֿ#M1 JE=i [qc/BԌk;Mm+pqntnM <,~wkYh>! kJ#XRnd`JǍ|/Dp 6Ȣ_>u7 ERէF: y6 R.$@TqkFV[ipR,2i : 5&Ob]Be2Ӽ +?-X +rdHl!"f_cN5Seg+Wc6g=[70hg +%JKq +endstream endobj 71 0 obj <>/Border[0 0 0]/H/N/Rect[173.527 397.199 276.0 387.236]/Subtype/Link/Type/Annot>> endobj 72 0 obj <>/Border[0 0 0]/H/N/Rect[64.0 386.199 223.217 376.236]/Subtype/Link/Type/Annot>> endobj 77 0 obj <> endobj 76 0 obj <> endobj 43 0 obj <>stream +HWnH}WӀZ2IlL ffvb-!^ Zd5fI)ߪ Iۛg %9-},^kvb̾Lfɶ:^gl]jv;}z7xL̮v0KC-W:/YVfxߊ19ۖ7/^\ʺW;{q߼|~"O~ų{jv,J66^lwbe& =,o*~܈W v}G {v Iz7ge;3L+ |ċ& 2YlvL0u`VG˦EcR"Jet`|sK^4H`95a'!} yYGlL.ٺ`ѩY* <:)en+QVW >%z WiHY-#Е^W<^Qi|^G?V˯pt-R媲) Vg #fT`B`p3d8 +%;OZ嶆 CxhU!ިm|T#{3o ϯ@NWxMA\ZEITxDK\w"x,C}{~ + +U'hԩ^x&:1֦Y5xm"׸|hL1n=a4סF 4VI9u]}/㵂t( `^+Hy4ګĒB'{U_Gz`wmɍSXiy WiVHi֖g +;CԡA!}x)'oBu iScP.T+4L(SN( ꆢo`M%E"s1 )|Z@O{r5@\=Osѐ.P"@SwT_u-ܒ|/pФ +t'4:ktf`&l0PvYa 橄Qhm@pPt.*89s1e f3/oNx fep.D#X-Qt?Z&ZZ 9,/k9ٌ|(,Ldݧ` `G6њ|t= 8% k(m8Po.J߼`#B>⟣uƳn'q4S7h(1BlI`$aF`CwZsrh{oy ǿwsF R0Yw# +`a#ud_+6r+NuV{g \ݛKa?-axaVM\iUht':R+ W=ڙZ)F9$  I_/7JO\@B>.POz3/q$$b6KDԪ,.X<ȃF cj: VCjdNpu+xy7F8;>b͍6]G 3R 2a^Ϣ[K؏'gGL]D@6jl.C])TWF,IʸWRvsGIY.'Ao*{Pa׶GC܍amH2@|UU;~Q9H4 ~2 kՃϖIG  +8E%7Q(n<a^3 +S*e/l8W8{a԰ @yraڨ6_\ pT@HL2 RE~_IS)V, KE l7Z݋Iܰ޺ 8ZZ{qv vrR5r)8c_ㆲw4$QQvm{*+}wt|N'ް+=A +@.XV,`"u"qCԢVx0O=Z43&ds%N[%OjJ%UmuZ:EU;C*NSx|x gRW=o0+8:IJj#sNڭY }wGRK`{|_XP Z M] 4aOZDYM# .2 e2+z;K9=w +p=?HO4߿`Ϻg.\PDQ>{?ؗ.4BRa݈;3;|!B@A6ٚ#/*NI q)(OKpW !TvqzNyK6Kv^Wh;#h҈>VBj"4y;l9)Tb7GS \E%|YpfZ:jFXYhb$ "\:E̠wh@MW_%jyPk"wZBϷ )6 +endstream endobj 42 0 obj <>stream +HWo8!Q$7RhM{vqqÁi[Yԑ_oHdsW/8% g޼y3sys;?vwK/{d2Lp&,ƃe 'C{w_{%̌7hw9OEϗdk0i^?˻o\˪sT6۷o3zϸ$ۉ=H Q@#/ i9thÄC4(\NZ)R4ÞUت-׍}SS8}d%PA_F#OԞ} lr-Um0KwQ7m/nz~G*$H;_d^Sr߿߅ !N1~W?ʦ@&m/Ė%?ŘZa,b'  +g*FZ-c,Qϱ΂2 }FDBq1 %;j0bЙss}?hc.-`iV6ao j&w[y/5ZюgHKZ=̼vEwL7dg± -4"&FyU!^l:_98Q5Zro+ X0*z4>z,m kzxёMWYnn*Flf󡧥瑫% Ct/ڏ9?8XPU:bΛGVJwF$tЦ2⥼17/=㔛-λѻ6gϣ\%-Nlݾ]]nJ,i^6gtP:u¼:~o6y$}{̖=>3'[;i#?OZy8FGE\$J=4zƊkh$Sv~#6'@0zo(:K!p%]eLO-pu),Tk5f7BhmwĊee&t: ˠ/@ b}{`O+]po}ͱn:]7q5P'8rGw- A4Ut kM2gwғ8yYf0TP7!XL7&ؠŎy#5宯lBZ.%}`l]RUi?-f$BdGY5 Eb뭢->hb&>stream +HW]o}ׯؗT!$%K6͇]ETkr%mMqy_3.e;}H 5;;sΙ3Wױ1Wiz&bt_mwX23Fn4_]>.oF/}<ӛ?(qͧy/vA26mieyN+Y>~2kZYܨo׿~ŋ/+1={wlJ(Y]p0|oi<8N2IOS4ڔM{ik5{lu)=d[Ccim,Ni*n^F !I^:rt)F6^z'5A)y7:A r<ٶv">^Ia^ߛioUe.J*.'ԷJq穏\kUfZIlUt%wխ*^%i|f}d1¤q|їyɦ+1zf>;1jR̬ŠVߐZDgT)i#^tR~("84-7dM֦(:ge"/I|8Rp2< &7jt@$ L>15ok]nx#*!wB<\˪^wf?&jj"ޙmi hgF↓d/HLV~(oU=zr(˞V}Zĺ-'H 'PM[J= b=O{YV.\fTw@Y<Aj0hueZXzS5\2 9=I Pf82o.9ye 4?= YVDԥk9P<`,zSeQ 6tXj[1PfFo4ݵ^3)]7+3PD=X t(RHLڕ,M|>m6#B+֓>)@?-`r$^U_Nς@ܘYKp6` S1sdJAQB>n|s!AHϴe(I;! zU`7n>wm m@kpgu1juO +7doѷι]jg4@h\ +ލd[7'Y l+ JdHD5KfhKc|_H ?/$40-" F]|5`0^Lů%&M,Ȟ+U7e0u^;ɆOu!*SǠ)ӠOL.Eu[;(XQaC=%mĭsGQ:`wwb Rދ'.Ocr1م)0UykSH!ȩ\V͏- +H(NgV!S4n``j ?XbqϲI2v FHY_ [Pk9JӔfYEȡdW,r#Y9 I+2<?s}LV$L*z#m3݊FusdZNjhl`{M\,n{t= K2N$vAbAE~!^׊MgZʩ]l POh%H+El5;5 ]5ޙתq'/3O h +۩K(lV0wYmun& 䥖Nf(NjwIDpփU͡O-xζU8 xI,!]j;-]$3A(fM,, $FF=h'@Gr[*."- $Z'n":t=5ys܎h݊-:|vHfx]fZK u z3i3BI^IfK^+0&?ګIi 9bljOZ<7 !R3ʿuLV)TH&3ݯuF4IIrEZh,e⽳D2%2nC/ 4Mz'hn1ț!FՔ0^p,Z"4ÚX2:x!O/#QMLS0Pdz#ij1wDޛ@㧞שZJa&(SoCË +㛧Q_2ىZªk|zkܺ/&D ٣WkiRN>AӕuLqF0aL!\]f,V—+z֎k,-v^KZlonβ% #@F 7$s&c۠2-} +`%Gaqa!1삉9;1"(EGUPIZ|џIH5Rbճ1<…#GP't[F{xՀRMP)_Hh)eJJV GE^ %˫? r +endstream endobj 29 0 obj <>stream +HWnH}W#hI2$NL0$cًEjI=&6iE{V`- b{:uqu[?v{7?'Ɏ,],=}!+}MU=ܰ/REOGV,a&lW{ ;5dl:3/ S1[XA>\aUv2RkԅPoVVd4?Vz0fWjb5l->#dL,4L溒K=RY+jj+1vDΧqăyܣ^.nQ|ROUq~njyfј7^ɐB겴 0g|,("Dk\鋥̔@s1']'7T m]!0PTrv0$=uYI;3{}]mtUǦ:[買#!Q'@A4dhe!Βd579}3L/Q/A9dk>׋4SuCԍO=h3mREIGʜꉙ|OOLp§4>%31.-bd9x (5;NsU wt\Ч=6& sڃg3Y0N~GG~Α>M*#5 f r33*!X#[F/l+imx3v9tvPXZk0) PX^E~zFwhLٮ )Lrt+OҮܭVYL2Ge0<[0т9ὔ;Mզ (V Xԕ|RrjPG|w6؂ߑD*tEr=20ˈj},5qy%+kZ5`YWꡩ%vdAUp:9]LNtvH! 3]/qr_ATK@mTvD~V(f6VՓzBLzrwIrfyUyPĝ|R]EmrXYڵE{_ѶQ/q@fmt+Rۊ Lڽ֌$Ho>왛H[8 AxO+Nʶ@lQ]~Яa؃(!)Xjă ( +WO}ZG bzYr aDݚfp66np绠m˛a@c^vpJutm2Jtz(CmE{W-=ol5Aכ1m]Wֲ޵_JL7v+V-&U8o +ʠ `laL#e6#v4In׭ȱꫲK/\ ̖yչc,ZH#$ku+4$ٰ\,)C!p.!4S8VCg8KܛYgs4P(!aYf[6r 4KmQzkEsF #n> OF98W#)3TTwSX8078L|ɨuV`RWmR]+>$~mrAs(ttb{(f )s%(pJ^OSe`=Xќ".-Ϸ[/E`q4y&M88|<Ϣew{Sc +7{&3Jzއ!~ͭ9$L"Qk&r{/~#J0;f~mddKiqDj"hc?Ƹ? +endstream endobj 39 0 obj <>stream +AdobedC    " + s!1AQa"q2B#R3b$r%C4Scs5D'6Tdt& +EFVU(eufv7GWgw8HXhx)9IYiy*:JZjz?Nlٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6l~e~b_yBLΠ,jL)5$*`Թ$/T1rH_7?! 0\k1rH_7?! 0\k1rH_7?! ??.AI 5`Թ$/כcR^_?!漯KBy??.AI 5`Թ$/כcR^oKBy??.AI 5`Թ$/כcR^oKBy??.AI U30\k1rH_7?! 0\k1rH_7?! 0\k1rH_7?! 0\k1rH_7?!oKBy??.AI 5`Թ$/כcR^oKBy??.AI 5~jLִ'IԠsf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜?S_'kuu9yTt͚QTxy:f͛6lٳf͛6lٳf͛6lٳf͛6lٳg-;R}_7~Gysy̪VcTxa;?J^owKV?(y-YP?3Z5~gԵgICk0jכaR՟%7;?J^owKV?(y-YP?3Z5~gԵgICk0jכaR՟%7;?J^owKV?(y-YP?3Z5~gԵgICk0jכaR՟%7;?J^owKV?(y-YP?3Z5~gԵgICk0jכaR՟%7;?J^owKV?(y-YP?3Z5~gԵgICk0jכaR՟%7;?J^owKV?(y-YP?3Z5~gԵgICk?G|cs-ݲOmpƭN96͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜?S_'kuatWחOoHZZP($ƵϨ-̶^WQK+uI$D"Ē9Tcn &"lE0x*p*8I=6+Jdr +E~+O+~~^mPJoT^vſnn5-/UYCP]=+7^ѼϬJodjYr9!滿7SBYf`8D~3dQ]?Tb{8.>>#uo@4|3{ϺT: b*#g$g-ͤ0jM%"˥syrPz֖Ak[v]tdY$dR X ֿ!#mUm9g[y'>hܟ LEk+ϟ$ߑ?5K}FR;WMggܿ*Ÿ)8?5K{5%mzjb'?oou3bs-J{u-s2 c6e\%YU]"*_[kro‚2Ƿ|"iK>KlIPS:zk$_`GmraZi6vqD~{#S{vhdN ;zXR8OU֥Ե ]/KT[YJ)4NC4H4SRY +`H^qPԬuOKPX!{ k'.42 `҂Z3_@S6f[ݛLkRZ(zu0njh|ɧ p#~5'J4o/5Uj4X]ጤm˕7m0K4 c(EPЂ~x⟘Z.v=3zGڧlJ!MYEU\aM4W2@Ѩ#n9|G>cKIJYg.z}3O͛m3H-w#( 8ߘyo(P4kXRѫxU|+_>[Y}gP(E /\kjq>n + ͒rFHGS O%GGF;t;DUf#Qu+7yw%䷿ ) ԍ0Xuh4dJHVyX3#z^l =ąjI+j_y:-{l-y@A]$Yf͛6lٳf͛9?:^uٳf͛6lٳf͛6lٳf͛6lٳf͛6l35wfQZ͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳ`gAsg0XRMɝ?99.ˉ.ƯD*)#8ۛm8Eʷ6*p+65Q6=C@k6dwz3 +':;d'6˥u"U+If:^h-,)~/l)Q}_qOtv=`TEa* +I&gmX\sHI (X +=aא?.a \<_7 .8{s2x}$ch$&m?ֶ7нBZG!~} ["SNƗ +YԀ7Z$0~Z[s%C# cc)9q?Ԋ)=J֕}~Ja׼sXȀ,a0Nܚtm:ɬaA`j_'J#S(Is%8?&붚զ-Zk"Vƀ/^25}8K7 j?Y5Zq zd%+F/Iɪ>Emy-ungmEG*6)m6m-{sC8ymF?ъ>Br1+}N"NK(eUy,ve2Xj2gy#Nz,/7S&G$fյm?XOis^X͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜?S_'kuٳf$ F}׳XXE4לjME ^6knoQHc-}m~~\]dcඒw($)cO|G}cYQuP0A*rW݌D|=~5)}?S^q%2FdY_ƲDg74!iBG|9OiLh+ [?6wvJ@M+\;[^?PH?0Ve0zЖNe!=|sU-+dZ\뫨٭Z +@"0pO)w &)׋湊VGUX1Z4?crhՏ@9|i3+ ^7? ++(QlE/Aq^;IQu + im ӏ1_#"~^eֽ$i[_݉7e>6_KY/ޢyͺdwRYA)*EuC|͛6lٳf͜S?+ K9?Wޭ{NNNcԴX,>>;|5EPk0pM73J@~͛6lٳf͛6lٳf͛6lٳf͛6l׹y3f͛6lٳf͛6lٳf͛6lٳf͛6lٳg%Gk6lٳf͍h%A##mb*Tƴ&q'"@K;M#mnHXe@ @EjB( I"ބ`]a֐4*zQCZeȐDiUoQBsco{OZ$9j}1!O*81iFѤ17U4:mh2-arJǫPOL|0LČx}2# @j(?,PFq$r$t$n2G߮%>ktAܯBȤF)xɯ)tƭ JD:٦xe l6t]6\IRlySk]T(tEB /UԢP/)jVCRܱQ,lQRgy^( +#ՏLٳf͛6lٳ`gAsg1XRMɝ;6!f@"$u87w]ϑ6qt2UIyy ?!AiUDxPs_*S@+dG~K+Xi?\s O0ʯ.g.(NH<߬V]c"N<>gK(Omov.XE/'ӎ%V^HѴ_1s uIBKQRM]"h8HG2G'W޾8Q֮æ5Զ^RDi"P-?)<uwq^ȡJःML7ܰO +G͛6lٳf͛6lٳf͛6lٳf͛6l׿sf͛6lٳf͛6lٳf͛6lٳf͛6lٳfK)5:lٳ`;]bWtH=?iEHR&n1+@Ol6iö́Tb +V3;e#/M H/bfoVU,) U)95ݣ"#͛6o/ڽ)Ȫ;A8fb3)$tP9V,GC-',1 {-jKcA!yVEW +fM^ySZPK)4 GQPTpiڒkwA2 $~SmjM:-FYh  q^W]0\X\zDV )ت8ל ,qw{$v +#.͎ݰpoξ]J(t`B#sR>XvP;rj3)[ygG<څל;-:X2:AcoauLFKi72Zм@QK +| ˭r|ݫjwrߴ)65B+O,Xjw2;+$~BP򾵣E&=jRM2?[Z}GNGΙi ffRǗw?v>b˷\RzkpyOM+^I4 Bh@Wlf͛6rL.u,t\ɹ3w@9Ǘ<ըͦX$r#$~fȿ7[ȚaKV~j,|p<[qiQxiZ5}kp=4JqdԌI1Tn(D1]xeyGO\B}* µ2QwZǘT["|{E}5(@dj {W皦M)^7QI %9f͛6lٳf͛6lٳf͛6lٳf͛6rukf͛6lٳf͛6lٳf͛6lٳf͛6lٳgksε6lٳfZM4(@ܢf(HWaH O qQQXC-%o# y+FьF<2y}|=ށczoKŽU5OSM.ZFY\w &+l{@GĴC_Hf]UR%p¡kUpV{ֳZ$s&P=?cywOm>smP(LKB(#ꂁ,,ܚ 5yT/t0o{3F~|ְ\XC$0E_4Ŭq"Ţ}=JGrXTi_ Bxvu=mAiBd9E܃-(|"nQFTYfZ~A[m +[nDįM|{}QK a{tr_15,N2: >,] I-fx6$nP~Ww*)e#ژ埗57I@Jz/^.:6oX'iUN9s/G0]t#hUC.jپx=tK$ D)StSWl4bdd=~yi@4$b+ԃ;yz_3SA(/N 948-5M~6l03 ԳyS,s&ΟK$E*ii8$ėX$RyF3foKh"%:.zdo̾Lo+JP3}r-W@t]6ywIUAM3\iҟ^D}~$=Yv8 voe*A6-3щRi2bҦXެў2*4*˟fv=׷ $:uDMR?`RAΏ6lٳf͛6lٳf͛6lٳf͛6lٳg'}^α6lٳf͛6lٳf͛6lٳf͛6lٳf͛9/N?d}י--$FѡuZ]Lٰ]Z6 [@ +8ՋL +M`+RXT8+6lٳf͛6lٳf͛6lٳ`gAsg1XRMɝ;6lٳf LXE=cPO/AVSS2*QwvwĀAy{֞XԒƤ2͛6lٳf͛6lٳf͛6lٳf͛6lٳ>s^X͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜?S_'kuٳ`mJKyblB|sߑjnu+ >1 c/&㞘͜}Eż[RiԴ *͞xu]IUo3̰J-r7ZRKAxo)sz 2u$m:}6*+H5o [˟0Lo'%Ey +ۈ[n"NG|\>nIL1EIGΧK%?.8b~Hx YV~Y{ҹ)Ȋ|fT{JOM8Dc@e{~\ga}qrZnTGW? '[sTI2n,Y '2Kl23-{ +u>I&5ہ}pd$n +yt8oy^]TpT2ըø;_IsW:i]Mu¤7?QSG֗WŔ)m'Nb/L6b[Q|?%f͛6lٳf͛6lٳf͜S?+ K9?eIlTeߛZ/ӤP +ڍp<Ԗ~l1Nd+|Yf͛6lٳf͛6lٳf͛6lٳf͛9?:^uٳf͛6lٳf͛6lٳf͛6lٳf͛6l35wfQZ͛6lٳan3̨ܪJ%@\NZMvXJ-:qZPc,6 5 tjSm22PNZavG2]>("Nj ^P xndN1 8Bmޒח*V`-z|#x{h#XED]!NXj1MIh\}W <ߠKm>K$2V~p~EiBt`ٳf͛6lٳf͛6lٳf[)w?ΥΟycK7&tRFV(A/%3+y&nm * K/Z#~G׵ Lӷ6%}AA@$)Gt˛,US_F_5Kz +)J-?,/ ۱PKSc:i`Cil]5W:Vlٳf͛6lٳf͛6lٳf͛6lٳfO8νm{םc6lٳf͛6lٳf͛6lٳf͛6lٳf͛6r_M|Tyֳf͛(0;2`=_Ymx)I*78m74uW(4NiYf͛:u2D'4=(E ;kȱ@T CNhDֲGCP~6'qqm,f&v:o*ȍYMAmk{,2;i"TVlٳf͛6XyQծm]ڟj֓]fX +*H6uU$PTV͛6lٳ`gAsg1XRMɝ;6lٳe;b@'*9R_FP^'(WV4c`Pt-X>Įmb^uhEF(-` .OKFC^؟l[P-WcV`$* ^lٳf͛6lٳf͛6lٳf͛6lٳ>s^X͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜?S_'kuٳ`mIgky^'=+eg1ݡ]Y ~$Vp\ 6Xn5S >g82]95DL72%p-Pz3i颈)q%zo5A=UjIx5_6kve[k9V2WVSr#kUm?N]cZ 4jml $5*s8ߑ54^LYt$5O5]Tp 98j>N7<؞:ʏ`8PgM͛6lٳg-;RcOO<:vlO%FT_3~mGtim-b("n9 !nM [e +R)0/juol%ӳ ߾-ϋ0WVJۛR呂yt1I0>[[YVS+TqF韝`ӭ줊Wq "W3OȹHV8YlQn:e KO|l-}E Hb; >ֿ^]OLQҒ3At?ʽ0[۱[)G |u4Yl%KpOWKcɾ:_|J_˭ksn֠nMMI}&YokpZ1~4Qm`<wV~I6Ty#L_M;K(-xi).ĬMOӏ/Z o$o잻S?:w:Y"_#NԿ&|]=uHiBȭߖz&C B:q +ʭF?*Xa%"GYIGV ?+ -8k"ԂGqfPmE"F"/SiMqkq{n%ՋI#=4m1 `Rрʯ/ZĘHSSFV{EB@ؗPT n|iu4=!fR +_AuɛtCN#%On?L)2O(o[eRpVHZhl.cR8ub^duj`)t6X- AO$yf͛6l忘?_s\Y|)?ԹrgN͛[v4[ĎiX +<hJd䢦ןw^Hm>,t\ +QS#~T䲙5S |XLͰn&ؗmHX#r]–4õ!#lٳf͛6lٳ`gAsg1XRMɝ;6lٳb7WXJh P()pi`6lٳfe9%Ef*? zٳf͍i+0EO6lٳf͛6lds$hhA#% M4͛61.#+e͜q{?:lٳf͛6lٳf͛6lٳf͛6lٳf͛6l俙;3\f͛jQ5Oca+zHĤz;͜8o.%!{?!>9ҵ2ij֯5x-ZBؖR|k~[yMLܗ%xۀ9s+cu_\^4$(BQ&X9LPK}j2IyWvz&{^b-<櫻,hcqbomv̒){E*V!ZlT|s-_\A5[Z>,q_7m<)o湚g( ǒo %|+[ZH=G4+桨^tmu-d rzҟ.k>nu;-i ^=ID~?0ϫ_j:kkbm4dq_0Ӷk]cCuk}Kic9?6#yXgc'eqS!~fzfu4]R0lj"l)ʵ,o=yZ+[ʑo0?%4wqZGo+[Ǹʥ;?"4&ФMJkE7x,j7\|-zogBFߡĢׯ-wb5RH)µ˭puwe<[nXq=?"KҼȚH"`WM|g&eGuq$sbZl79|\(]Bm@5W}3w9 }jB3XcpzE;vΛo9hdtLwc#RŸvEizŜM6"ߝ]4i[ZV>**_=ItրQRޅ9o}G;sf6Ҕe*2q麯OȻO]0=ӴIq!c~xgiѬ/ZثWzwNT]zBhnu*ߐ @CuM D@$,>͟55uol}ԑ%۷LG'͵ BH|}/˿έO\n,o&ET6Bz槛4M:{5ky&3Ud& pv+Kfx-YXԒcQES`3EѠA#7둛?,_˨E`iy<|5;@זnG+)nl7PL7fa {an.کf8RG9>OtpH'jƕ2}د/jus!ڪ|9>o8IՅ/AOR=OӒDQ%0I.5kJd+L8vBחٻcC )n繑ko:q)LJ%nW nȁXP@giMeiiyJZ^cJ [~M]k:녎j(#$0#b]ɩ\DTVo wҭ(IJVgޟ~}ϓ|7EI,ܺԚ( +<8m]B{ĢY`ERý[|kiou{r&vVaiƣ uX/4mu9lSO 7߇ߗY<,KNLִȀ{$ׇT1 ~+5yRi7v7֟XMްSn:av 5{[4mZnmHOF_M<,XJAzTk֚pjn䒤6{+i4-?I׊($k_mz?彶5<^J.Jzy$;W@ =VbCW-;V9Q.#'׿sf͛6lٳf͛6lٳf͛6lٳf͛6lٳfK)5:lٳf͛( +uRƊS+'>+Ƣccd!h2Uk]ZA| TBQٳf͛6lٰiM4ʮ$Iz\\#>.Ic`HA~lٳf͛6lٳf͛6l03 ԳS,s&Ν6lٳfli5Dd\ERp\:^i4n&FZ066G)fI $Ǜ-CC_+;ep[N}A;Wl7Kc U0\W6lٳf͛6lٳf͛6lٳf͛6l׹y3f͛6lٳf͛6lٳf͛6lٳf͛6lٳg%Gk6lRxmkt RQOB|3̟t+}@N~YTsԙ@h@FFfFCCU=-侜=s"0y'rUqKop%VzÉ+Eu j%+qc_X/Nq??7v7-i}\d5:7OdY2WU:g>~y?Lt}2u R/ ș*k̏ 9&=谒+gKθG$/wİH*UanfֱٙۈG*{Zd&8udnCfl_qxm5GKp:oXGI=VYNzeىF@NEÍ{#̚6Cj2w +o͏2jhtyy#)+F.k^xη67V񼱀xH>]`GxgF{f[餾G@WOaw?ΥΟycK7&tٳf͜P#2!zYE|zb7?(*|/5^EEVKc#o2@]g 0^NddYU)̵nQ7bK-Aec]KԞɄ7r]_iWhV H=ִpXF&G  I34 b|ݤKU#1#+~Ja2Ė[1CC),;Κ}O!'8-"[kxԸ2Xh|0]5aoMcRMn2whIY>צ>'͛6lٳf͛6lٳf͛6lٳf͛9?:^uٳf͛6lٳf͛6lٳf͛6lٳf͛6l35wfQZ͛6lbAdz')[k}d2Fؒ~/ )t{]vZUoV#_KSl/^[YԮbfa; mF8ĞR=/)6: `OVKXޑWɶ}OHIr) 0Ď=|r=SVv0Y~^:մZ|2E}$+W. Ӥkt5X펶m,͔z| `0e5l35wfQZ͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳ`gAsd ZOJK;.aY)1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c57W;OC\__? s1c58=-j}*DmEG^t jwcr(y؅ +Q/W;OC\__? pS+s5K ^6lٳf͛6lٳf͛6lٳf͛6lr6˻a?^lԿ0=nE`BHnkzKIM* h(TSf͜@׮5}V$&p8JnH?ǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\Oǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\Oǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\Oǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\Oǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\Oǟ+EI+s?1?VW~bԭ$<Z/I_yR_7h%o'JJ\ci8I4t%勨P(vٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳg!Ԭ/ d6>} 4zn=}&VZfZSo|W0:Ȏ*[6lٳf͛6lٳf͛6lٳf͛6lٲ;X}s[¢3"~Yk85ޤ!BDz +gE͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛j:mIJ * Lտ$nOY.nRZGZO^feWrGV\ٳf͛6lٳf͛6lٳf͛6lٳ~afEMJ'ZSO,j3R'c,͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6S(pA9<*]wHksdM#qՌj:y# N$'h6 +f͛6lٳf͛6lٳf͛6lٳg6-ԷD}bypN9S~]]*C^O\N۳1܀&9f͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6l|{49X0%A?(15 2}6lٳf͛6lٳf͛6lٰ4SJGs?8yW`Un@%Cl~9\fݤsՎIf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳd;tfF;r7=HȞ?ZM7hcGv1¿L~_NBeu4$f͛6lٳf͛6lٳf&9k~vdDgA-,vzSL]7v>7p\W|f͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛8'nk,J72@,ige}{ dX2C؃޹<ٳf͛6lٳf͛6lٲCCm4K` 5$(sf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳd3ߛ]=سnLP?Fc0Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%o&cȗd<-"_fo?Cɿ%K<)%'Dg%JHf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛9&on$~(o>9D<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-o6_M"fg,ȵD<_-rx/7yQBs{ yf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͜Hɝ:έ!x԰ë]/2AJ'[x54ƃѢfUt`Fcf͌tB0UQRIjv-ITF +Qs`6֬ZǤ|?36lFicDfc@q{9#{ 5E䦇; +>/2[%Ք4N*͛6lٳe;f4rNscrzɶSNccYdwvke.e6lQ%r#8ǩyc-68Lký 9P^30uZzg%hIv)>1Z(h*}D ϟBX7 i= oM_ʶ +ڭ2$Qҝvb@.?vZ|Czm㞗ǘקlk{Wyrhipiʭ@l@{򥞇j|W3IRŜWc^ط/} aЬfi+u.'9fuX-@kCڠro}GJDT~? <{.a`اwxrcWu+~|NwhM)r/4E6!r +`=sry7˺moĬ !7w/G O iPv4=[y&J WZ~xޫ՚E̬P6zBK< oDU|Xy5ܒQeڕÞ'}yRު~IPϕS(fԢiB?%:<>/GPN4+ʕ_*GΑ=H#`VtrI6ip|!9S J_<@qjE4fֹssP|!vr);ͦ~ci6W6rGjf~_L=N7[ՌK7XHےo޹օF15r?vխ5.[ QrHT2 '?vMu*B h/w;;{TIm5] +-Mu2٪ZjV)"~]]b+(ӽPZFƽ/9UcҮ2 Z'֤|l O"TպT_.蚄v@18{?:rEEUT+T~l-> Gt Y"~LhG!*=Og&DY͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6l:GLGٰĎul+u BΗ)m|W1#9dڝ(*%UT +(YKy6yB:gBnmR+y``P[o/TI ^YSueD=qo=y/C>Vv3vSrWppϺ.4k%7u]i?ѧi^Z$H8ÂӊL4ygڕyh3 '+q Z~I֟ϮU7$o%:G5rc *xM*N-9tUGlŐOïĸOy^u})n̊)$gZ3ҟ^\[] P + p?-][Z~eDTr@ejk6o XU]$UP$9礖Z-^\L<6PI2t =+6g*T] S~5O|?mm@?v?Vq]{ GVoOT,E~gfx4%D.g#Vz.k[JEkLVGKYu?[lQdFT~51?Ƚ4 .mY@`JP/.V̖^T{ +}"s~ng=ƭդW HUgzl+|z ԊrRrw8G/o"#0kʜWU"{Iyar53AZuaϗF\a(T7]3q~ߝߘVrgp +/qo!wNZ%%,<0\mX?1KƤ"& .5Ru];}ES$Cп.hzZG<ضDuſ(k^I1fX⤟jgAt+iMR2QB[ gW4]5X#eG|L64S7_E*I!OI0_MZ V9RhsH\QĨ8󉸒<6=//^!X.^9 Nm5~g_N "tkO0Ziw*3\$y6fjYh&^SD{Qd So&6)oTSIRCDW}ZG׫S$)qk#G6h;<)R)C.sxnS-[Q>dYu*=&Z% Pq9ϴ&)8y8 %W_&t/TKVx"n .~24hm$HQA%I9(I`qp~LzWtOu-R65\-:;0JıoLi'Kh>G}AE35|:.'tO-fFGm^xM&+d*ǁ-&?ʶ}0KpFhm׿%/1y0\ M\8蟻5Z0 K˲zE`|GG 1\ Y1p%|&dg<8 H]neOcCo^ LSt7|c# >mFX~xid{S=[_^1^(jo>Zi|z~$`W?#7i:~t&,,<^@iLLZt8+WsEpG LԮfxTGA=VtVqY#% +=p6roʯK1QNuٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳfSHV͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳ` c@b+h@jE +KHbHaEDAEUx͜R8T_g6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳi3gC9f͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛97Wp':lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳgN̋;GT<&W_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#3w^+z?7'LW_#2)/^d]夂X&*׋'쓝6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٰSƗ8 +5cO +VO7/ժD'*j 3ʿZ?BLVO7/ժD'*j 3ʾZBLVO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʾZ?BLVO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʾZ?BLVO7/ժD'*j 3ʿZ?BLVO7/ժD'*j 3ʾZ?BLVO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʿZ?BL򯼽VO7/ժD'*j 3ʿZ?BLVO ECP)5+f͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6!}} M;Df=;a+ց#`I><Pu 0lٳf͛6lٳf͛6lFp"GbG\&vm!NRI ǵ3LԿJv\$f[͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͘g1Tfm H#y*5HԊv~9ü8éyB %gvC^A篕B ٳf͛6lٳf͛6lٲ={QVdX5vr\>C/)9;L:5pmcMHJɖlٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6ֵ?/[=h %s6pIw{fsOg`{9|>lٳf͛6lٳf͛6lٳf͐:~Yy* +_&]Dx> OFt/̍KɗihXA=:R2PA#6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٲO*^9[Bj^#t;5]&}f0 +Qԡ-$jFTPcf͛6lٳf͛6lٳf͛6lٰYyݭ`Iaᜦ]']`eԴ5{rIgJt9ڭ͔t> F͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf́ }.U4frYXǕ,c;KHVWzȟwcf5ħwnlٳf͛6lٳf͛6lٳf͛6lٳ9*ּ7/r3\TiPdC|3]lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6E|n\mNS!~I2̍Y==X\~oѬq0Flٳf͛6lٳf͛6lٳf͛6lٳf͑9K+ɼr)vea ?4k}|5$7kN\-ED85H)"͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛ < +i:ɤpF]g9%yG!q4yz*G^U2y6lٳf͛6lٳf͛6lٳf͛6lٳf͛'dWGYXTz珿?M^n㶸@Ӡ )&ؿ2ZXՒ~gvZ'}ɛ6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6ld~^eiOH.:i|gqU ,D28>J3 f͛6lٳf͛6lٳf͛6lٳf͛6bzc}Eaߛ_~oQ}Ea76RUG`|ֶD(̀F/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/7>ޢ0z/,0nٳf͛ ?o43O7ʣH#"o?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO7-?O!?0?Xj4bKO),CZY'|W&# ٳf͛6lٳf͛6lٳg&ޱ0]^xmZ92Cb/:?\5OKoGIm3мp?-o%BT^tj7 ΏW Seмp?-o%y[L/:?\5OKoGIm3мp?-o%BT^tj7 ΏW Sfy[L/:?\5OKoGIm2^tj+GIm3мp?-o%BT^tj7 ΏW Sfy[L/:?\5OKoGIm3мp?-o%BT/:?\5OKoBT^tj7 ΏW Sfy[L/:?\5OKoBT_/:?\5OKoBT_/:?\5OKoGIm2^tj+GIm3мp?-o%BT^tj7 ΏW Sfy[L/:?\5OKoW閷R5q)k͛6lٳS,s&Ν4+\p`vl\iQ_ $4*Iܜ"{YX#$Jh\vlٲ f B3sZV707.,DM9 +WV͛6lٳf͛6lٳf͛qVsO^J';6lٳf͛6lٳf͛6l|)?ֻ-;9 /)j3w$Z("ݏ!_e z4E?24~mKfF)FJYEI{MId#;Un@r{HՀ%ԼZؘ&vAi]JӐq+譢RRƆT04?XK}`$1#CuH; 4{^,0J +|| ,t2r,pZ +_Qkǐw}~y5Ωm7L*>/|wҒ!(G.,z_俗5[Xo%Y "ZP/\Nn*7\(4Ěs1zSÕzpf@hXW¸ٳf͛6lٳf͛6lٳf͛6lٳfU}Brij͛6l<)?ԹrgH [y؄o՞(̐]72ߥ>3D7R΁i?&fo!iĒ[P?H(ུX gg_ xo]rVK DV ZT^{wjܡ LgcUI<}gJUӬ`0$*y=CaL'}4;9in5 x+O8\_VՕloR4t,"[XCe),6<ʖ'g2TTL6ѭ=g@uY +0wBFV֝HX]ˈei+MK[H4 04U /|pG}/ma$apZnKΣsIuy,Q4|TƴI/? ;Tjj%"+D+Ҍ#sj'QbK/!`T`Scz?u hZNaܔ`Ƌ[Gy$?^; ^kyRѴ1ym.&kƽ&˞~*/~h^j^UTݟ뮮y~vw+XZ;l~w]P +qr!1^KyH.Thr8--OR:̛WqFA x$zh܇$_^V_˨tfԖ,Ȓgheff(†^4V(R$v-4'ڍa`재sl+7 |[M3B_",Wg{}zk}6H2+2r*,ח>\{(uK0Wwޡ=q 2vA U⿵V>o5MsEtm:j34Y½G"[ (k[NKP"HqdSիtZ@j8Ge>dӞ8q$?Xy[*NvőO.]lG|97q?/&V+Ài@ 0qʫ wVz=0M ^>\G\ּsq56w_|WeQR(GW}F|d݌p AZgmέӬmO6N^i_<ۀtXGeGy}@9r }~e;ap(GW5V/0Kvߠg~5sʂ˲?S%9ݙJ +|?\w}oY[P/",x!;YX1")Ulٳf͛6lٳf͛6lٳf͛6lٳ_г\,깳f͛6s:y.ܙ."t&2k;]G7\M$(^FAN=} g7TF[Mq%f(铯;~QM>-Cְ4>=x ۃ~n̗2f٘:SgLhE<N Qh\.4֦qc +o;a`!JnOL8kagv\=7T,kB0e,ח#vUARm W^Z]xic*Uylycq//jW4͘ƂBA79:+\\48mCB c +1V9!e0:w!.DDnFSZv(xԚkD"QDOHҬ6I,0B8@m.cLyĴj"qUmKOФˆӦ RMٛR>0;tml3mXZ6eJ y'Ongf=L9r;Y-'+HRYkfZFɚ6) x֬w:LCIX +lMk7FPIroZZL$}Up瓴}BS$hmy5uO)i:ܢk(&p(D @: `**~XZ"`FEE O 8\]I1, i}p EA@6lٳf͛6lٳf͛6lٳf͛6lW4,'K:lٳf͜ΟycK7&tٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛qVsO^J';6lٳf͛6lٳf͛6l|)?ֻ-;6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳ_г\,깳f͛6s:y.ܙӳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6leoY?{m+tٳf͛6lٳf͛6lٳg1TZLtٳf͛6lٳf͛6lٳf͛65T&*iXYٳf͛6lٳf͛6lٳf͛6lٳ_г\,깳f͛6s:y.ܙaG|i6}F+SN ~Uti?7ԖH$V)6s?YSeRߍ'/h8+;0zѪ(R:35 4wYj6 O?\q)&!uQ†\ sr~ˍ0Iu{kh%$24-jIw @ǒ_Fg|osy&7[WZ&?u矖mB;8 '-ҜWap}28bkB8\_Sގ%S*Pʋ8zkxn{Oc?24;F=6K8NEA6E|K0jw(mZTEnRݗ ٳf͛6lٳf͌?9qwΝ6lٳf͛6lٳf͛6l>~ʟ]ɖΝ<K7&.$o{EX9ȱSq4Ay]{{oQEJ(r5=~Vn֮u08x%N$ g*\%dgt=J_^^>=&fF(Őxpc@ r)% vmݽwQ(1u#>i/7iv6+yS$!xʤܿgm?$IkkDq$Gc 6s<ƒ]%QM1P +?4E#qS# t5`2%ep>MW+wHG"Qqqwm"됣 g5?R;DВoEn'nZ͵ݲisEg*=(е(NklcwȲ?#V #yWVR1%VEA3 +Y 7)02)Ư-:й$%h *x 5ߋk&})*|Fy/Rm[G]Nq:͛6lٳf͛6lٳf͛6lٳf͜&gU͛6lٳyS,s&Ο +<{O9i׋XZFE+?&4ʅlj٢)ZX>X]}+Q E`;@8Mw;XBtsuYBuGV8|U[O9J@|SG[?(>.CX62΁q`$%(A2 Wmr-M-dyy_/y/SK1 ycW^L8 ,yB֬^DD2l Y*6[O-cIq) +%*9;宮<ũ]$W_z|VͣU?Ml3OKѫׂe4Lჵ+Z}$b^QyOw ž;HRFM霳M'S{ iHF&;[u +#LIdL_VyE As/6lٳf͛6lٳc.?Oi8ki]f͛6lٳf͛6lٳf͛95efEr5[h6icW3W<=M2K"~Q8Ej g!Ƒ,GgUǶB)vEai=(i r잛xO$C$Jqj`Qv;s`F/[+ Y#2&sK/6zM՘yfA@/{t%?i2 y? '.uwJc,˗S^G#"6Sd/lQO=pQ-ח^  R1'|ڔqdk]A*_?L_C#}iggiZ1vmntkUT1^쪵_98ykQJ|ֱ 4ץ8.OʭA-<*Wkcm367o(.ax`+C_|yFem=bh꥛ٯ"iɇ4{($u>t~#Yf͛6lٳf͛6lٳf͛6lٳ_г\,깳f͛6s:y.ܙӳf͛6l؝մwI $RV#4] +;KH(DQ@\6lDӧ$ nEpv45H^ F⌬*>;((* Ttvsf͛6lٳf͍7."40C +*zo͛6lٳf͛6lˏS?W<ٳf͛6lٳf͛6lٳfcM|lٳf͛6lٳf͛6lٳf͛6lEnfEqB͛6lٳf͛6lٳf͛6lٳf͜&gU͛6lٳyS,s&Ο"ߙm>UMHi9fhѡdJMI;^l䟝^ibeVTrBkJ.5XK(CnJR-pD|qˏ3jzu-SW[JПRe[d!hoֹ2UIoy24ZcD[V+:1z*ixW睓6B6{L+/5P +s璿2łTܩ{W Яl3e x0 +oM:E2JRH獚+P4+fT&5c- g:&i8-$E" ?ŗ $-Ad:Z8~n~b60( @&=x2?8<\[Gpkn;!8_l γ6=`V 3^ޣwĊFW/>Q׮468'aS"V])6~C]꺴څy9P)jZ͢_Am+o X FL/LѭtJЭ @NHo-ח/cK;* +*kv/5Itj9܋8㏃!xWƃ 4HOЪ].=hk %‘DgeUkV-e&)(loCL:uG{m|[i#ՈԔq9c80ƗAV&jzq>Ԯ >Vm FėtkRGC+nn/mm/-ZTWv +m?)5嵴ԢMM I~X=;doIeխ+/0x{XT ^T_]gۻ˻-pA=ٔ|YL9TMzl}b9Rh+-)>XK'|g_[i{sm -H(b1jW|OƉä#Oqj]e>;XkȴGyu!w,U<_jW +Mq%Eȭ̂I,S 4JSl qqGS E=-X-"D GtOM`@Z 'ֽm˘R$MX^Eu+VHo,꺧aoMFY_>ʿ+^sB=9iZi"%E`\T^`S6lٳf͛6lٳf͛9UM ?eΫ6lٳg0XRMɝ?6lٳg%>aqښ[E4}6B¡I%hn6걛uٹW9禿jVK{J lD7Z O͝[KK ǥR9vPҸ'Zl5?0ͩ44C/$ +k +ΞzhZ7Hn757zr{A4q4,W7M՚Hm[ĝԡ'>dcO u _ +]p_Ƴ}< < ^,CU"Vu={Z{yRԼUTx3Xԭ^q4RCF1S6lٳf͛6lٳf͛6lٳf͛6lٱݿg4~5yӳf͛6lٳf͛6lٳf͜Sk2ӳbwW i技 Fe C:7>\nڹsd#7QXⓚQa7|&]ꚾgAZH#KʼHf_Qae[jFd$Z ']b&4JȆ0~wH&;fА@ <Zg疃+`ē#]y?K4r%Y%.ȟ3n7Jwu$a="Kz&p}*Rnd;3GpB~Q_M/].XRA5ۋw]uzuGpDU!VNvj.Y$vxiN@TAЎ +CK+kDOoaITT?H ǝ9FPPr?(}[k2 NB3ȩc]P+emuJz$YC`ٳf͛6lٳf͛6lٳf͛6lʯhY.Ou\ٳf͛9?N1M]@}oDL} L/F.s^XB#eeOoyڍ0*ӷ-zbRY2qpJג7bӭ,l幼y҆a>K๴UnvmŷԨ>=0'. alnrHYTڸ%5n) ,jLqNm_X{@ߍ)/ʻd6lٳf͛6lٳf͛6lٳf͛6lٱݿg4~5yӳf͛6lٳf͛6lٳf͜Sk2ӳcdeR*#|]b]b݅ąU,jxl6hZXDX;>B׵,:RyzKy Ly Yv%6בCDN.(ői.IYO©?m7޽ UU[T~.o'ͷ:]vL@C +*S@-ఴi . +joIw;^{)hTK#:koc.D+_΃+Q{UtԷq  Z?41Chv1_r$rH#*k'O vM(K{.gPŠq>xu˹$i2ǫDŽto ? + $rzϝH_=E%ıư<1QB}ژI{yYy~DcpfiIG%{6OiJ>1){oӡw z=FsoyLJRӍ;͞\o~)kӮt?,ض[@,ٳf͛6lٳf͛6lٳf͛6lʯhY.Ou\ٳf͛9?~ʟ]ɖΝ$ +bg=篗|׭M[L}hXT4:&ly+N{C#T,q/w=a;M'1O4+3Ag"ԔI"qnŒVv/uÈYT2w!Sͫ(H-`\/ߵ)^oԮ4ۊQPϘou=>OR{Y؃J~P}'f͛6lٳf͛6lٳf͛6lٳg*ɡgl?Ysf͛6lt\ɹ3͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lˏS?W<ٳf͛6lٳf͛6lٳfcM|lٱ;tqUpTc_z_.Iɝ,k׽3'iϧ%u#?Mr?yWt=.]P\YiM+t;8iR)@߷}>K &HɨK"UW ՛”_=y/Wҵ#FܨU5S06Z,OPoȏu]XZ H> {+ ɯ!6H0?*=>]+̺}ތnY2-y-0ϕ,GXi_Isv$t-TtίjIsgse"7-$4P9|ڗ5k6֖zm͜ޥ@iPr_gYmcsoQ8[*|aAfeihŜͪK,$ DAǡ|Z Mf/LSY>yDDQdS+ZUК7ag΁$(zNm"͵ PǐOp˿n+]V3o-}~# .+asJ݋F8tٳf͛6lٳf͛6lٳf͛6lٳ_г\,깳f͛6s:y.ܙӳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6leoY?{m+tٳf͛6lٳf͛6lٳg1TZLtٳf͛66htdaU`Aah-#ќ'n|0m-sU(ȿ(q*Ǐ2i,ٳf¿2yf͖g}*Ga)~[hIw䑤j(g$|ٰv7jEYVXޤP7Zg|鬒45w5c͛6lٳf͛6lٳf͛6lٳf͜&gU͛6lٳyS,s&Ο6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳc.?Oi8ki]f͛6lٳf͛6lٳf͛95efM2[H抠|]ȭ $̒!uY*i:l~gzK]ETQ+M0ɿZڄ~i!xu*VBo +snQ] KO԰ޣ%+v~jRbS&ji˨vOZ-H-PuwZwfOnZ4Xw_YK+Ďf֕Y{˴D^pui_`\N"4{WKɣGe`hUxc̯6Zf\sܤW aS%Pá6lٳf͛6lٳf͛6lٳg*ɡgl?Ysf͛6l>t\ɹ3f͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lˏS?W<ٳf͛6lٳf͛6lٳfcM|lٱ(Ѹ syCqC~. HvuϚ&-"EAWߓVzo/t#m5jI,ŕ~9mZy{=Z9"8¢Pw'l_j6Va eGOѾW4_˟n-VQH#FD|uz宣)*IEFBI4)pdBJ*0/KMVR@Et]cHXcQ?ꊢS~y&?.,&֎ 0@daUƅEr [WҴKUo'x +LHdbEdʟSԶM8w>94Umh^L]*uK(JT-}>=΅>6Z#@^Y}Dnch (ːD3yy,,@<k#6Z6[49"-U`[sPyag5CSMf@GylmmUK@d}.J9~ʟ]ɖΝ6lٳf͛6մ{]vow2A*kJ +%P(6lٳf͛6lؕݤW3(tu*z{bzno@4Tt~%}Yr4+&_6lٳf͛6lٳf͛6lٳfU}Brij͛6l<)?ԹrgO͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٱݿg4~5yӳf͛6lٳf͛6lٳf͜Sk2ӳf$(Ȇ2ji6wa8>CF;4ƾV4 O +|'ntt!:$8Σlf"_5kKX獒yWµ~`Hg1<H=#ռkY5ӤdDd1T;MwKRr-aP%`8o.k Bg5B* +~Η(6 pTbUg8}*mαgfUe gPH ;[=x!ZZ5јj80⸜YFcrǍ =9*F.GqcpiSVh&@J0j|r3~eXn:Մ|)}01a$1R/O+uVmpME0 Ec H:͕$ou')uYY)@WRƃ¹d՛w^Jx1'h!B|23ɼkZԫ ΁ O0khfʻh*g0̆6֤+8mX%0H2(EiJߚn/e0IOH|yv I%eZIɻӵM1YÆyVZUؔ{@;dWyيɝHr&Z41[*?c*~F&_inDUP:I; _OM|}<cVEpAʵbANOyN>ЎEb>|N#{JಡjqpNXPTz#u่JׇioX]nb^,T|ڴĿWՍm}_UxWÕi\yKKa%2(F>CM >$bUȡXա[ Cl~5&a9޷׶qوVc:̻27&;pH1 #ZfNo0KuC*'•~*Aqu R=8Ȫ_NY/4.~5iT=|8ֹwf/`TN;UEP"N3ΰyU=(jXBrGF1ZY_ԇ#&bӯ`{n$4iE*݁z)Wa>_bheG<7JAQU!~cZ $-n%cwҥYGӒc},T0 *Ӽ Z ЖaFH}|:q,7`C7qYu;X=NsFzGxa&:L+neEcio̶c +xôedW.<ѥEۢHHGiT+׉޿³u qHH$ "FشVW}k1iT1;yI'/Puw =$U8[yD@"#<FOnFL$C1 Ԣ;nMfN{0Se.*nSMlJ\ "'t6.4kmy!L w+M[orTP쑒ؼGA%1a}Y<]X[b{2I-@׎t?(򕼶 8)b?-ki V9xNlw|izdRp[PLޤjg\ qR +ohY.Ou\ٳf͛9?Ū[o2Ir {2uJ]ʴ풋(X o[AۋuúflmoVZՕ±⡠Qi)0# +?-Y{6yL՘]6jf͚aO|kamuˀpVR<}-#HT@͛53fsS6jW6j`]SLXurF 4m" +;X+8&5*i  ÿ~>8q&gU͛6lٳj]2:N}]=F~>H~o_?go+"~X՟W_77cV}_,ϫ/ߛWYE>H~o_?go+"~X՟W_77cV}_,ϫ/ߛWYE>H~o_?go+"~X՟W_77cV}_,ϫ/ߛWYE>H~o_?go+"~X՟W_77cV}_,ϫ/ߛWYE>H~o_?go+"~X՟W_77cV}_,ϫ/ߛWYE>H~o_?go+"~X՟W_77cV}_,ϫ/ߍч}_pG7;Km +w 3^̉(sf͛6lٳf͛6lٳf͜Sk2ӳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛9UM ?eΫ6lٳf f<nf<nf<nf<nf<nf<n#f<nf<nf<nf<nf<nf<nf<nf<nf<n#f<n#f<nf<nf<nf<nf<nf<nf<nf<])6lٳf͛6lٳf͛6lΟ{&ɜ>u/pUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^oVzy[ya5mכUooF^o^Zյ%X  B0Ƨtٳf͛6lٳff*Ms$ G5͛6lٳeV$ Ezf.Б_ +wXXsduUH ͛6lٳcDT0$u6lٳf͛( ^l` F9Xu/M +}N.$ᥗޡ[FneQwOR:_NC_kKX"YAHg2Q9%}岶 4UfBЎ)~cֶ:z6nJo4ͪ[ߢRb}~ gO~fft;vXWʋ S'\YmK CE7OOtuYc^QOZ=ro*x&}o=?2aS+G[<4G|yXdiwk2q=iw&|[x3nda%b{ .ݭ`9fBz웍Fvlua8lHfm[ 7M[Sk4eՀcGlGXT}}Z`XL ӯd~bl$J쫭O]u-[ߏC%cq+:֜@}coɪ:qA%0q$O +/=AӲKMwWo%iZnaѬm$[UiXƻPokiqV ,xuӿ4u!A'HAg?DEVzONlt8]=6.E# +B>!ESL4BcY9q`iZwO/4t_^/-D&5_Q4 /&wQC>~H5Ap,{[v3mω+ٹtu[KX|K9~4FH'6fkk zl͟kuX4 Y!ӸiK*|>!ykIX Ԩe#Oa>s=c[/^DNirGlG:~ Ց=b4~%vߨ$-ao9Zu$ƃm_ fB$U&&1Ԝyf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf͛6lٳf3v7^@Gޠ7Qx|OL0e)ofkfҘQa/hiߔiq]^iROTF jlo?'-40]5Di'$PHv+xm[KK[ 8UORݕ@Y~RIki]ji t*}9!ΚM3ܑX`/~Mr[k7Vb)uAEQO%lFy}ݎrob5;ۘ[טX=2CrV$oJq-siosOkoҸ2~M|BZ*EFaNEzFY8P ^%}>NcIqOt@m%7]覃:UI#!OLSBx-Nkm@ey~96ol+\Rd?οPQ7H9Ʊȁ Gт5OTMԮ,'抅i8;mLwbrGqJRWal-2w+dzrzrE_hWڍ{6|A +W">V{X_M{y Qǀyb4ԟ֕9A$t=Z/颥~|@qcɚU]ߑ)~K*މQyc^T?ZY);J6۶hߒaguiqI:ΡkGZEsæi +~*-(a|o)tvxvsTP#Ieyc.q$H% +r  Eҳ4qƲ( +o!yT~}gq "E{#"F!$q#BJ{'7Vd@c~VQtj=i)R(T)1]CuwvR&qG~FǡKsk~[1*kǙ^_L< hԍ'Z2ؼ~i,X@ႊmM:vJkhD-44*(ymN^~l9il.5)\6HjEzМhv6:ɨ$jFԏWGl?,<-﮽X% B5g5ER-BՉnKu\i:͍ܿ/$@"FO mq4NhJ7y9^7Mk8@?%$SGwL.U_T߶Gu_ȸg徲{Q4pMXԑ\iIMg]B^5(Jt}g֣5ܫπeT8 ..: sTU5MWPNmkk[ONJɶ{mGGIZ =UiS!c֬m἖ƲqWW~jICicJ jNS;V[X{+^+å)ֿ'/i5OY\I%ڣwYXh03D>ܜ'[v-ZUeS"@UsIm$n*kpgr4ki#5> )l($m[{8g!ث1mba9"7 pҴ6nJ٣gۙagD<b[g74DE{~WX#-2 +/F/'a%2AhQVMI]K*>?˂6GHkʑ+i&-Ef~O؎!m}[Z-KWcΫ#"OdiX(_R+#|[˿ ZKE{q5ڙ>stream + + + + + + Microsoft Word - AJMH780857_01-02_CLN.docx + + + + + art + + + + + + + + + + + + + + +endstream endobj 33 0 obj <> endobj 36 0 obj <> endobj 81 0 obj <> endobj 82 0 obj <>stream +H\͊0yCjMZ<d +k |qJV|!37N:UdS$pnDL5}E>stream +HUW~#$yGZf뜊U׺z\~[E(A B!|` C~wvsnSfNV{^`9:o"HX^Nnfև啼,^]X|:'#S"d/G()zzrj!H_Uб0:DҷU<~q)5bX'%b,HUi\"fUJwˊOˏr$euL۵wΌԵ{W"+\P&aejxR9%dqy|"DH:ZOPA@P& LrҠAxJހvCǠ'$>ɽ+d+@t>d!)?W2V_[{nkfhGhߺ:%-% c%3,YJ#i,p G/ѷRОVT^I +!Q= +6 "` xlKK_w+2oG;prupton{bk3Jރk";nV4!`$Us`tzr#/ne+#Rp)(F-LTy2 3N]]JeK4̡ׯu_t{bpO}lD:Dzf޽D끟2d<9$pH|DK%(&)`gw͢`ӲE * d#PJX]C㹪r*EUQuH3"كHAbis>:Q43b*P`U1ԋ\: *y*|}IM̅+Y'S~>&xkI_ps/tߺL%Cpv\4zqmt`V,eRˬ2SY< R+v׷f Igze!~~˖}fGz7G3/ 3IaJ`V[z.vrtŹ|EDqʸ|PCU9SPx`%ѪYa% Aj{w{mY;pFF4uɏ.V5U Fe :5^y悡6y-$1TQ,*xpW*O@>Gs`-lld| +b<ץYtbk*kzр655-iq#0uzs1AX¾FΏښ.p[1Ƹtv>Ԇ{MpZeVq~T!gcS7HVaPc^sG5 b}XLӎ$`s/7试!R-*dQp\jP@7Pl`; })rp_KhwAÃ՞CHf9Qh(Wp??8H_iɭ`%Kz^z;|C;Qov{y&o}:011f*-l]ֽ=%"7|V9S._ajflћ)ӺaObz($#zdXH@FǙBaHsOyj ?6{MR6"vg.:x4 =5y~ 9(eHi(&8Z*B.*l\3$ )d*c, F;1}м"g!RRi_չ''9Fh A~29HeɢC}9;|oX p\p{, aLO"aQw[w< h$!gQU&rhQ)dYVN #I|-7-,L-EO!H62ig"pXH`Y5=WRg19[<p [ +>_d:”ekAy\qbrJђ uw,.^"L/#%4^]Tj#ķZzd|F3!g@~܊|6&} ou}\ip3ͺw+G՝Ǫ,`rf +endstream endobj 79 0 obj [84 0 R] endobj 80 0 obj <>stream +H\PMk0WqARjb2@MBxތlڧֻ0%Byq.:n(fQH6wpjD]`rNцBEr~WAvK?8OPR`q_ Z]ZOS||-c9j.x> endobj 85 0 obj <> endobj 86 0 obj <> endobj 87 0 obj <>stream +Hj0 +endstream endobj 88 0 obj <>stream +HVyp}JM_U1cl搀ɲ@s!R8BSZhPLš@j If +-LNi 04q0dw|F#$rU^_Ȝ?XX<} @ h9vq3_X>vxM 5u3oLbײzfȟ0}.^rdӗY #G%jY~ y֍p镬fºNsP9@!)wcHJ65f(iZMuC:TcL}+UnA +`^n_W^:zxyo+i_mi )8e-yKWG3ɴl@:Vc7(QuGzi>l莙I1GX̿Ky,!D9;p?;]O(E5"3u ?ky[e`c*j^klhA$ΰ;$ +}ry< !cr6cܧ. &E o!㛈x밑؎]8TLtQP+cc}l7ﰏ8ɘel*~l:Њ *TEu+}r"9иy\Ƶ$ywih", +>#밉a3W <.. kxT12r +FDzf"FoS33tnAb"Ɖ*1S4b0D8!o9|開JyP' +Q_H٢JrK Pu~_O5hnf5٤/S] `ON\5VY{|[x='%\]Xu'4a +i FD۹Mt.rapbX!6mb8&Z)m܉D9\Vr.˭Wr%e|OJR,TV+=!}#墚AC=^%Rllնfͱ#>pOIMxlgGD JMo8'C M[E3h*~+S9 +gyT-+6[UbE8.$K(xPGb/]0wQCCE)OD8m{8.>D~k"'>{KM3 D,j~4=[X?$ |j@n~9Y?웞GKԔĞ=?-!56&:nS)nܧ>CIGȱh # 1"jZg֜fIfI&9Bdkn]3ΕZ3Ma|cՌ|LW#D,.[h2 6ʗݾ2ޯ):T/FST4ь=&YDDt7 8b9*#Y/sIz!Zr]rys * 5aF\VD7԰Ght^kn nhvƗSy Z>oW#yRG)2NYd0V3v<*uYw`[V \Ŋj5^AkإfebeՖ_8ٚEg7AU]K#٭'xtQ{eMO X-$'VئqHLH],E-dE0Ƒxt)Ϻ!c5VF-wdѥt[|PӜ W9v-yjIǬ!ndeֈKcQ~:'{I785~pPɵ{s].KPÄ8FkI9Y^C,ICIܧ$Epw= =6yE^1~Gs}Љju1[GvLȈrZExb %P6<i7rOfղ<3kLt;.*&L AXES/M%lT% eSR.v%@Hrd%fu**S8*DqaZr]4ŜRfyX$=ܷT3 +KEEB Su- Yu2YmooЖnlK iÖ!],z[R,js +k,x*fKpnW_gb/G>_t-",_dilj?!w8NaƦɼge#B%5;`Pvw~NXڿ/޾o{>}kqkm-?3NLyhy_3,AGn*9<-g B[oi_yfļo\E-׎^&{6yjF!o *apOjYM+xأY-M8@'.pmaD5^C[%?k7A}oEDGBx]emuW`߀Q?Ee+.vs`}>Pԟ#_F}2s{ѷE&̽mz7ob`y~=${A ?acqX ЩۀjЎ܁D=&> .iM(G'pL}ewz0>Gp=luM<{jVo }q`H?; NеÃ然 ;1E· s];?ո6۸V! K5#-o\Do`06Vy?2Y{@:>xm<oþ,3z܅1?ubNd v/:/ύEr~tw 1`ZPPJ담* '{YEyY9V`s j2Ȋ&3PH~:oj2&s#b'c1m)|*&gQ%ǧىYijrF +Dg3N& 'fF>~^o]5679)O<5;#fbP[X83 .a '@CmD M*B 8j$h4ʴ~B!fkP +ifq=3nlz]՚Q>DGìՏ aq Aq>,Ub|:$Ɲ?gJ@dx;ĉ j5mSZ2k6[Yken +%^[S:i߬LmXm8=W[=yq-6bAI1ǘӔH_+^>K>qO|E>qO4>!J>QQ0B+'#`Le>Pd3rJK[V m!=ܣ 2YLI&C,GPd^MHJiqdj`G΀=Er> endobj 17 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 18 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 19 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 20 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 21 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 94 0 obj <>stream +HW]sۺ}ׯӑhQvbǽn׉[:)^yU_$ 2[Tס77fF}8l2aAҡɐŝy"p\عr07qNh7<\;9w|}=t4.{v[1ߌ]FH|\HgS֛2C/JOCUg=#\L|K_{Aa3EiQ2]d1>`'bFվ[ڞ"R%Pg{*3ȕ ؊5EBBD2b&eo:Fe 2iJ8vR\hl9Un"麈ˤǦxMjge+w壸cxVi}y56w;_ܛyUԷEl@F5׋_bAs2( FqϙRR )R[{@1-Ȕf(i67zG 5q$Q,_r-Q_0x{\ԊMCjRF"nh?]MvΏ*E$ҪƟzp?&h@!Ϸ@[-8@mv,2+ +DDT\2+zŞ7:I[+ehaaÆ]' "540:{xb8~r } UEuHyDLhY$hWorKuL=*}?H){h 1koTJ4M.xa">Kqԟ2o7p^d`0ySXNDhuJ\bij[%d( jk%U1)5q:Jܞpd<ՙ:We$xгJckPK3I BxT%,D,~W0~ko#=jcfv>jVӠZīXgFI.R$?Er4Ո*)Ŏtn}V*oxMk Q!~g)/;syH +$lbC;aGWx8- !?U/'݊e2l)9LѬyTDnl1V]WsQM~ CN؞UvO<3ھ"#coUowLbJkv~%eL{D<œ +u%To~L8y:i͍@ن ;P Y _9> endobj 93 0 obj <>stream +HWo8CX(^^z~ p%f+ZR )ɲE79a+3Ù7o޼_n61eikz7?զxF =]{fۛۨ(y?ݻw;ֻbtcnY;^} ;0?zeptߣG%e[`8Ʌ(Gt,N/.ϧ`?OsxT?sX 013Ugf>3s{83=R3҈6rQ ƔrpI7祃J>"7@D]80^0@"[Y-ѧC FYʵxL'? Gע8&q6`Ok.%hU}#w<-N@NdHxQDW<8<+kns%SYFL-.(bu!#2*ѽI˸$ԅ2&nڿ be*Ctu8&];mֽ8q8KRJi-6Zlvb 7{t7;e,UQ:F hTXB$1|LPuSA:8]`A;@OAlKX@ TpۡZNk89y|)BgR"KKSkvhǐ*q5wO}+{rRKv$@~m.1oSyNefu+E߀lMɌ9 Ĉ +]KԳ5j<ͪ֐.VOAa6 +S&Fj%$z~&tR8 + 1:Zt)}*4khrq="W7D^oH۽݂DUZfikXY.YSsو"ImfObMYdS<%AjL=tvDĹ,AF?aH+kd\J͓.<ɵ$]uͲn`bs8>[m薵{.R9?i5}=YH>Yʖgj텧^SnS;b׏q!fo6 >B;Q}QD뗗%3~oF3Zϭ0 uW2LIu$}|{.RA3P%#ACv 4RլRp842*x~Ҟ3L,n= EQ4(i=]Wq]XwWKhԝ̢$\ 9!(%':gj֫OYPa L/w8R5u=4BM=蘝cIZ)auUZL ٣j;{8&}Z;HENy~rɞ$f*I\ J*fq)ةɉhS|Dem_-&}lGF)+=c1 +Vu%(5`vr q~D%b/iT$ R nAWyhȌ/ 3NsoW#lZRLh@]fg_*Ra}м&6B_[:rZku MFyf|jOj6 oBL)'Wj{ծ;֩Pk&2*j/)tm:&_^ZDARPnΟ=hw +y^?xZ?R7>ϳJyHmB@B=&DMfv'x:q=`DY0lЌ}?1Kz޷"uC0GUڻZٹq6` 4إJ5ٰ6vA8LM< UF +endstream endobj 92 0 obj <>stream +HWks۸_rGItƑ#:+_N&! `Z\ԋi'&8܃|H3c::ǫ.,`u'cL`SN7F7;jSEcj1ވY 7X3vX0 _Co9|X uA{ӑsm8x;covvtM׎_;|-eo}kh3@ߘֵ _ OxXMb^sYR,fZUE$n n>^ ӍTŞch3˸}\icZ6ydl]TTQ>a^h'ܪXӒ CT؞e\Dr-#Q + +1[ueˢJEVkf\&myȈ?'bucNUʱ Bc3oԾ-Bdr͆Du~So2+{Y4@iGxI* +&ʕ?ă V7~Nlٜ*aLG0-;>Q=[ϩ70fY].[*&cOstU$tHEP~rKGhHʿ"pyh.6pU$-E"zTbb^[,,6ۉ̽3g/ ۖi-Rx2b{uRsX9_hMljtŎ` NL/D'Sf5lǶ|8LPBiG}yTŶs*DEQهúMf@[ˋOzmU6kSe0`3o0u3zFߋD8X*̊cwtQB3uT|GknNNˮ|~ +9o<7Nfl>stream +HWo8CXnQ Mv^v8m*Z )۲E. +̼ynֹ}vYbXa&):tng,fe'?'Qvݻo߲wXCas<}FCa?&&c6$Qei3qy(u0t7_g~4 +ݙk~?>: +:cQ:9]?{)ĻYln8>ytV"eFU:n]vGxf;W +=|cvVTF\3# +x^f.[jɌ@jȕ V#q/ bV{űi) [*mVl~% cušk&nBx4؇ xYf2L_9o9,"- EQZ ŊkBι DQPM޶m +] Oʄ"_=F[Jy&6SmDYܔB D !1F"&P5^*ÑfvK-/ W;xLnf_ega |rj< l+חO, <HЀ2|>ԘId*~g':E9AYx5xnysWfMЋgF°@+ c^= [0:V?gv݅ǽw|x.gN3:\0o FSs ?Ӂ<ÌS>QE"Jx)h5'NɊL_ۍDK+(w'1H~7Ze&- +zLGPe\.fY^)-=#&'AD!l`wy *RLGcx lU#ܜἻXDm#sJB辣X!D +457F iˬO-'Um@j0?ؑH4-M‘pJ?kCJhP6)kn)Wا@ྰEp +a<)g]TQvdIב}v4IݳbJ;TPT 7\5[l djs# P;haܦ}~k$hC7P zH1F`{85dd ܇Lqת=uGW'5J塎-DfJZ!D^_LZ(t28V QN3]aE.(RW=LK#Rm1߀gFKzϖ::jIXgulD(qIԧ[-O1 ىr มt)8^%* k|єvG^_x ɬ)#蹽̸ qLB, +W5ҴO#nA@<'KܔԅIƵ\nis[#ygE&‹aRe^;q-9QR}5/=W&>as/}x~>-͖Q%Ls#Z* uSzn @~E7)^x`vܛBՑ\fM˥ĨQD"Ւ+܊. ? ++GYQBJ,^x@@1Tf>RAlbJ$JS~*:Ĕ-#cf- mt2 +1Vى72ƙLLBx2/+!dhYѥn8: + &--ؿ2(ɃJoYPO}"ݿm%1[k!vz@2!ՙru}/8ZPktj]NGz9Jvn_ ^#_^^׋7A ܠFd: ]B&,rR;R観KUe<Ҋtr`[V 'u5h;r; 8?_Sƿ&6\INPas{-$qc-$ֱ^!ZN& ~^b(]WLm0rEkz?f[kV܍5~V͐*r !r /vq۱4^M&ҕ H< u|XePpյ[eEm}ZW驅X$HQĮ1^ˠ*Rfzi0F trR8C6Yu(RkQ᪥ +0wށk~ف6Pٓp ѻ%7b\CVkYgZ8}^z_(lueb)R7gZXN*iZxH9'v(c=aC`r̮J,Qy^eP"quExU&NRi<{p2r!t >/朓BCr"mck9!8 % ˗ +Ѐnn`jYnwǞ\(kfLIhyw%ۅb-i%VX>X7Ǹ욫amW`+mi4-x7.gnhfuMx3)4tz>CWGT#8sC5dG8FA,MxaA[\8i4>a](TRE0 +fF?ݿE&W;YH5⠶fuvesF$FMGAEhhZͦ:AL' !"3s|# (̪(B]FE.yM^Se %ۏ84  8h`,zȬlσ,SY%e+%S*`Qe +endstream endobj 89 0 obj <>stream +HW]s۸}ׯݑhQ>>9f3mIC@$(b \V=Hɤ'f[.9] ,lc6W⣍\YV _Y`e."\rݧ8#!Y֜XEـ>S/c\8ٙx8 gNOƯ`zzz~&8;alxM&eŸ}m&Wge&3vpc`/;YRWV e) ~YK N'F)N{ʍ1ecKGI0F8PH&2b Jm,3Ц1{-MC&\mEdJ<^bq$CFX\{&oLdVǻ i}b}9Z쌍&dXoBDHr9[{Ale/WJu["x|xZ< +Wt#JVP"lY,Ja2xv) +$ w^"fLR FoPtVd2T 6(숸~,TYԾuѭ2FR,Swh6NGkn+uoRLfq+>q+p/iP,62Bd1L[s,:=xdûnc0mEXqn8NPoz`{L[HG @&J wX7/GѴmQʲtq=՟Q5(S_[D+wTF'g%,E% 9Lu&#Sz#|2̍Tb+ Mb->O,.B68zՂ}VY{rF[L;P J MT'4I[c/ E.Y%䧝7G~yM&z +8(!CMU]$ ,9Yy^r~4ݧ?p52+/L0H0Blˍ$ҳS܋eN:K8eZ-Cs땈:iza- @ 50Iw&53Qm|S=ф T!m$=CGTQiέ,]Yd !#L-౰ЉRYsHuah}tŬO`AN/RiAJhsA3p/|V <5xG2@:pۍC2H%Q.1ΣƬgHзv8zܰxQ'RO-ψHPR\ D]ؑ="KaXX-F&y qƑ;mv2@KڱoN`_.p%&x.DI](*$~L&58E&g*#RkA5 -4PXi.h=iX Q]RꙔApy[mSrkzFo;uGg`ϏMGD)cS׽ N c?.$?9Z{98/h^~f'D =Ye;CߦVdP=D`B#:Qղh(ǀݷY!Pl>E +e7^ʡ W{B䣃}t.\8H zft,Pl=joGdV!Ɇ:6㪎 n47B7ǽֶjBG<$kC# lSPwu*CCGf5-huw2Ҡʢ=K(H6[oG=Hvi!C((= $$|Ϻpss'5u$d_QwWtIG:Y;Jo wW(~sm1gdįc#yUބ%ԿSs`%Iʳ`8 Ezbùu2SГdV`~ozXOsChu-jV=қOK\yhW;"v<ǵIx@P娓nfz/Tѣvj?X{W:8;zٌK{#dG90/)jXL'Dʹ_Xk#ۛ#כ (k.v  )5|[,g|B2Q.f++'CWm Z A +%-*wE[̸b)lTk ZS%q +NڠUT.:eoK_Q& P݉ +1*OTWnGNTI"k=~@3g2gʝ ]z%6I ٙ宓ݮ":7rp ytlL~ҶmI2f^IALEGUKB>/ExtGState<>/Font<>/ProcSet[/PDF/Text/ImageB]/XObject<>>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 13 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 14 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 15 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]/Properties<>>>>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 16 0 obj <>/ExtGState<>/Font<>/ProcSet[/PDF/Text]>>/TrimBox[0.0 0.0 585.0 783.0]/Type/Page>> endobj 102 0 obj <>stream +HWko8n`?&XlQ qAѦ=3X(SIRRϯs)Y~f] MXgqydn~a"[& {<lƳPh|8K[1A< a}cd0/Ӡ{a~"{Q-x` F/0N;4&6æ ^M߿5>Ne&E j,bF:Du s|p8&[e3"I<&mkg2߫ #剩7*2IF0<ͷ" Y\M[saddfr]"\u؀o I0tZRTg˞nUaq7NST6SBgXD*iB`wKPGgshqeF$q? +aZ+x34YRiGS]c%,p p8EGrf% + +fxK4OJRs\ gJۆJ+´0h#!xHrIٮfKz#^0ǟ|0ȜY!@"\NX$tXEf +Ԓ]8cXxP*bsjÜl,[_cƠ5a+a_umP*Y@Snum,Zln~,x.;Jwh]UJijC&%SYylU\#؈xфY^jCZD;Ҭ:<bS?++ wD:Jg[Zw&o3QAF A" ؘKESRq48Q2B <4bop֖[N2pZ䆡LsZh"JlTpD"Θ2ƣߋ*8M=qif3@I+7?)eFJ7tg}obiA?ӫ%HQXcW +U$ ,- +G8oܙ_rrrcR A={a/|g)W&?Y䟤[mkcnBG;ܩU-R||!PO8|$ :/paXN t20\kUlMvsE]75mp~g]D D/T!ͨOb~Me9R p@wJVu{-#kDInc;nucK}6k4ȶㄇ_,("UkXN0eYB#k/qdE~KG$D[ {-7iɁ;!p9佢#{;(=7x !uY]!7W#h>Ӽ[$5m!9l} @}7 ?VLJh9a_a%"YiCbT߽Mޓb 0pm>'q"7y@O@pr'8?w/C[SBچ獰NHJpť_W&,gR6lS,5E(%}CԨd4Nn*k;AUEK?@L4Y>stream +HWmo6WIֹu/ Cr8ZF/I!E ^4ggfxvyk񟸾y*bu>ݍBG)R-2UyEծ]zUloٍٮ}2P>ۺ`EmĿXv﫳]ٽxqvQM1o^yEQKK +i#%^qg7U D1|U: +iJWAyZga +^FLF|ofLS7׭ƾfS?//.(T(zd-#GJɩHeͷXv0[3 f#NdTwwɴ#\?xSNu%~5jq'eEW6qp}u⸘u{^Kg/x^@zXq]$6}P`ʩ5$6GX^>a0gG"G(. cG(vF**ERILU-Go?!t@5"` +{2M-xՔqe#_8O ݖxDHۡowj%p\5[#ۘn%q~m#lz=xF3Dz 4pt>CB+JA b@?AwY'<k , +OPqxa FK.\.-F鉮z?qm]9iˮ3IaHUG>-ȥ̧7epEpjq<7GYKjS1{*[ɏS>-Iʶo/K 戜o;",#/oq!dY@/-:v#Tړ +׳Y#Fxr\gLF./$f8AmڡQ-r}6̻h?uM;qEAIFI/ Fs{=K<WeSjO0*3Wܦιڮ`~U80./XGOUxx{eNLƺS=ziET[OUV[X28ʗjFHlKMv(P]f28 !lk Xjv +msUUJ1mqHetrDm.4ȼj{ rsYf OPGd4{Q]iOjp&~f n r\XeCzg[sqy](`{Ի`a(T7Cm6hZr9w3&O'U~xer٢Vl(4W _ucJQ’FlOQuPE#&{yFB")EVx]Ɗ HBs"NJ($f-Os򔣬X3xMZC@Io̗Ul8^ʡάmB:cL3R;UGAS[+ҥPxSz*!)R:+VVjJLm&)V"^` )=#:%9f3+[ رna/r tNy^vm'r[I>#"$#$V7dJ/Y 9XH-}qV5Om6/@R@{Td~!2`;]+)ҟ偷ԟRsB*(( '1Y4QC1ndBf7/ZobbaHQg,Dp +7LNU wT\ 3ҤY}1qZ@OJ"\vdkr@tUx}?B2z Ԡi"3ۓOR!EA]F3N”cWȋ(~n/=>~G.? _F+~>*](&Q,L?gܷцZn*1*)QV\tvl(Z #Bhan+Fl ]w+rc@#SO墸 +cz%p0hR,dY [w:?T:rNE# + +¡"[Ik~?4"?KTfK4h +_w-«, ح*A̩N rTG$%[q`tM=xizzlz/ʪǐA d}kÜ}+d)!lrY_Y35 +->ekWBfܔh+v=i@ , ^p琶6%xXͶF*35@0d5K֓e.ȑ +F&lڡiQڇRֶb@1st5RP9%S(Vqb2/]Qϩ&}->ԿU+B>居ٸjG\%-Χoݣ|LUw|X 4X;q)07辥Ir/"inݍ1>!Dz>椢R(qA0*V!66w`iuioemu̮&.TZ^>! r=5ccF;*" QYq¡A${7Eش̙#flEhAKyQ{ nP?iϵ{𪯘^ ޙ*;u3S6An )e֍m##ET[vn~VRi +[.lWB2mOz%9q XasLՔ!- Ĵ4P֐~ϑ$|v5%w=}MO+~iƒl5;:j\]F̙kq鱅;[Y.c_9*!oΦfљS|naC0n>~eՠ0{ |DŽT1`1qq^YY žѲDc +Z262ӠkFWu~ֶb2xˆXF}Ïf.SOBJoҸ(maq߆W 8irځODsDa-"OɅٲKhyJ܊zR~G>[b<9x @0aREbISk|U?k\MBLTaSe˽'E50yi,X\'ş\HQ2>kb"Ũ@}&\}笊wWK ϳG0E|^4|L8,A$?_:QN 6L +FT9V:w1Tۺ[} +cmQ^}tpMl]KD3рl®.~?L D3IvHIKYc{3ˁm.AɾE(5R]k4 u^X/;vY71z8l9HZźd1fewI !>g5~]}Unp^sM)jEj*F HKgqTDe_n cngԧ0  nZ +2B=rywFa{# YDoRry/{(EOma{5B$IwU cMA\B)~,V_3_H J{3"N\ + +X)l12U Vh2ˆ`*@n. h@|Qpw<޿sM'}9Emeܰ3+|ɹ-Ɉʂm +gHL*am9,WS69#rfrM3|vUR5=M2E'}aCr_czI0Ȩwm#c\7: +"gF¤\M~?+ 'տj{A-J;QE?2Q2=%(vrbQiXL}dżWh|G3V U,MLEP_cL!DFq Cm<={dǻVؘPUlCQ)dH `[셂ɝ1B .纍K­J /Nr0)IClߞDNMF}Cqrqiz|3Rq0&&r(R5ۊ$@ ]M&)fκǍ'^էD>VW*rz0GYC-P5z (ˆ}HkVnׅ6"X4gG+JS6ΔcQ|4LV"}krIM\W1deJWJS9XS43>]b(c_ҕqn 4c*Afylc 4bՔ5Rۤ!md#(CJUCJ5|X(g~4˃U4j4Ew{kɽte7%pd7 yheTCvZOLrt+\·ޗH8>oa4shk[OTVF?a* C &>éJl CiFmW1{ _%" [~h{Vs)/7M"PFXvBKToEт:y{gs) Z"A5k⅂I%m$<+O[!2G^ȍ׈ID6LtY:1(+kyUMW}\xXuE=!aKiQ% +vse`MwFab(쿠Q301 +?m&aeA >拥pVnB͂OzTU]oHa*M #I`=pQ@$hC\)=> F]ޚFS;D +AH'[d4K"ձdK\ְmoANB]΋ +j%ΩE%i;v^0C[Llnw)2I !jQ~ 8[ԸC EQ[KAjF1}` +5o"'o7E +&1 pw΀(@՘{~Eh|Ve1O4ً_|ݸ͔ MS{t vլEn]qi~=) W{WMհd>R OQA'"й ހ\X91_Q(&t@O V088[ OuEW=r%K[0+4}Fj4Ry lN7SL*XM9*7;Y `ޜ!Q"'brd߬%¥6S Tpkz7GZ?h^KuI,nI ŗ= ax@I|?>}:ѠWw׽XlFz2RNP3LVk"1j9Z_ZWD (9 = e‰Mn|Ϙ2ŝ%[(&Wsԟ nجiYq-{tM-;VYkKG*]da17i|3N@ܤqIUH5+OJ iHwn)&U6EXFj>(|~)ػEnHwb^ѬYL|xJLesPLBjV~|1fո'V΅~$2="PzSv* 4%:pM;P kzdke+#Gh_Ƒ{(czI5KTR:oROU]Z,p~e{ +t缇}*v $ ohJ1Y)^K`ؼJA\$}TFg8A3~"GW Fz^{5gd/ qύ7BInzS&JDd@р*jH %@Z;]f߂rsuF\ЙGtkFrpB_a=ѯQvM[ I룍NЖEݱf%R<}J[/jzT Z0p[5fϲ¤5MvBǎA[{[r:#n5(^p6LsHX52r=.&<ڈ,".C%)`1_!-(߿>0qU3xx:7wC='7fvf]Z~:ۮ'l&cG~''=iq;YڵKF^W؝3E>O)pKom^Ki1a[[u^"$] wCɺě'ly >dH{l0|"'5!MVxO*Z^ٍAѶ~_"sn78قl.Re:(X9Tp@%)]>sJV߉4i8k|jkm_t&֦2'S_.^%+ݴxA=IaG5Ph\:2 yH7[e?m =֠InΓ֛۸cf/.,]ź",Xz{0+HL׏d+U&o1o3BVTwi=ZNc|(=!'ۛSUYo1dTsr_ǎ?w|e(2Q:{v&ˋG חt!$2ޢ5Kb=q&Jv%FN*&b&Np WςTR4&$~jYsd h63PSċ~RK$D>L۔#f"[o"^Xf 6I bޠ4a>E( 9N3DN`uZsk) 1}5rs_)|Iզ;˦(}yV64i2dKb zrIcOfr{;m CP&PG@`%~n2Y^AخtU ͔k!j*Sx5ЭL.gRǔ< _wJnZx ,3 +PS0k{W$*^ZFɃ'w"RmGtdU[<0Pc~Egts\R= #d#a9"SCT[z*R p(m"e곰ZE +EntAt|.Z?Qg;bӲ8aܸ8 ;.E +;W*uWA4N"S9RmpT rvA'ѬI~?s,^/ܕUZ>P?p)gi8ՑEfE8>Z\*8Hۿf@UOhC2}agj?s8oS.-NEIGV:iUB]־.gCw?۳g{?qA-MmBu|C + 7IEݟnާ4θY$N4W͹\D6; 1k1 v{@i]"-3Nǔ,ڋ@,CMpb EX/nV.^9%NKΦ1۝t6*@> Lܵܖf)ZX}>H9hgP/փbڈasBM*8IT +&:;z<RX` e 9"c\yt|dl6 Rmy$7c>kV mP \V/*G}+H7~V\U+mnH-SY׶Yˤ-WH c{Yb;ĢE$^AHSrƒoW)7Š"ЮjZCꊝtf1^~'?D"?DMֳؑJ03 ~1~BJ[*ud;:#۞Y$-Xqix%R]SW (Ps'cԃ`=DKW$/5ce[#mRs5ADɐSFj vD:UP _#yFbZ,sߊnYӍ͇ӭ> n4>g2c Ry GA7E\FDc۩[Dzu CSE(\g~ +ћ#b45_!ğ_++W֠MhJdyv$5Cʃ?ag4;n^Vn42udAvBJP4.y]EԩmK8i +}0o~Ǯ~(-2rh]y$5gZhccjeNRiRא7fY5)Qx9F$m1 -F{Ekk,WA"jL~Fyt}kiguؐ{s#( N9BDi~q.I@ hsl&ˠ KR+l{OyLpbTXI؟n=z +9 Ze |DպH56`2ҌFf ߮_sSJP"#i gw6Px +ZG÷D5+ J%2|52T%tP$‰!a&xq\1_=ZE3ĥj>.D /5@i +7 QYxбh2~C)WKI+ca^bf4ɺc4hP6 {}i +ĢHaNܙմ8'Nf #Sd=S /y"\I@:#_$Rsk?e|Qo\fLX1vzŜٸOr,?4iyRwB-겤%.,}KL4z|6YiLg!t@fVG㛑gڷtjHg$Sk"lSP_MDC9gg)JTULz>uz5{9w#Q#f6i+͢,BYETWPU ~o⼺C}EØn8AOEw+5Np@Q]@/Xϋ Wao@RWUAn;8. SdH,M:ι:1"gʚ6VfϞ*Buؾ߫m5I zQѴ, 9QQ@pށJqyyd˓ +3ېu㧈ii_zyzA&=@&3Z:KzLy˝i B+kGN$(@==2'Z]ډ8 1O#H9 C\>_N]wʐrI ՁV_]/H+ *GKpe> 0 h@v`^`%jXuzB<;4ALTZq!Vwlg-`3 +d<Y;E> c LR 'Z|)N,8nv{`hTIA.NY;^]n=o_yUMOB? QMPlAqOpdдţ,pH?`񻚦9`|8Ni=aX"E1m#gWAe J/I`=d`Jwts_HB*U=0 +^wTuJ'X7'CyE8E?Ei:N  +2g͔nfPVxq\ ]R<, Ogq1ľ0 5 ^P3#R5~{TQxG׋A#k!NraL+ 6Hj}kx ++l=C6oLM>mBnDw6Ay}Å%']82H 0bC: 5EB:VN6wIGZl_ +Hd0IĈziDbta/J[e/ 5%)C/b6UQڋ-=n ZF=hX*YǗݪ(vr.}DAxD\Nh4]kaf.`.M' F6v +`,Z*CyW]հgNI\?<4Mc(?朘/+{Qݲ{dpf]j8q4> @96ZuJR5͌GpuB8K[_eHjj 8/U)oނZF۾]7}J[#mi8/YtZ}^ԛM>ko=4FhN/^R|P;ٺxU[O]׫hX ۧ׌QnMNې+x,JǜS><B9&+Fuha>Mq"`F5XlKb~~e;'m){ǦΗHYe9eFmZ* m6={0S%)//ɢ]XE6a>żC^>T4YaB gZ[T)ؤPgI#d +t-_ue!R1ەx)'. 8[V_+=k1Nv9oYadL/Ĥ/'&Du&CiEHU1agH.3RJ bW?^\ S]L)8ace1^/rui"[k9`kU\{[Bh}KגkMCyس6靣TiL8*~ ' '' W<߄x34 Ny0AxGeEڜVBg5\kCկjrdGVF?;Z aF_kNL),;>4йĀ[a{%Xht+1kF5ye1 ob3.;GܹCG)s@_t1gY"C?[^pG\#(3kmÌ yOt +'q`gݛ4PE|O%.8x <ʸ[YH|[.s| 5q윁dCp0w~hcWdyrM1*L1ڕ4D.A>|h?/д/xyzj1c>=*ݍm2ʞ_6ӠGA*x` 鮪-+R~*"[h{*݂ vK1!Oȧj9_ͣ(e<9xL&ds-4˜ ZE d'֍9 +}G8B!A AyB]FZ@Hڸ£Áyͷ]iuk=teqWsq#u_oGG:h$.?tӝN/ +[>$X +:"X_`Sru/zoS$WW jVB_AU?V%⫷֫6#YՔBn\kV]B5ꚂJ"o"SʨKFB{c>Ru 2+(jn(|0\"TnA`(8RxspJ֥Gɂ[JP+B pPKC>(Lŵ6/KYĝ}pDŦe~/ WdYYD*ѼFh"%빊⁂-8S!sYa.j+|,2LC-)Ű.H/(mDÆ( TϺ肰zEQtN9G^'ʒN**TX"P,ٱ̵UF*ޅŵP? +5xa+]y(P]7B?iarbBVqcй ]a̺P +F8~GI +9q)6ũcT@`SqZz]&r0ʿy}m'+ /w,4xwЁJ3Bc||Rϛ+7`og.@gc"W4ܘ;eEѾ vqXn 4i޷@l Iݖ|~>#mǁl(ݴ\4-jmHeIcHPhgiq&vOg?5Ewa"eS%ՋPS٫Zeq3 o }9y ֬6I8Z/8g9>Aeayu"-<&HCW(*w A3 qg'D^wBK</95d1X:2__1UWuw7ɁtetZs`jhṴ̄ۤh +n/S!RZB$2x\AR3(c^? [ZkQs!1nd7tf[EDY㕋Ft:+eЏ_k?0ïT>\h沽AA$Ժ~P؂^mH___V71MҴc #U[|yәBMNr:z?!tQt^nud*}|ݪEYt9tzPY%Rmm6ZZ.YsXj]g7&A0Y,.?G"{P^=Wg3#taɾ"cA6oүm*'q$𫰏d8ՁI|c!jk, yz[ZDdyawY&##bqغ/<][`<>ٻ+ͤmɮ-kD MT%9ͫ/auY>}gvhB Wu[eYnp.= Mfi}8yHsbN3]j|ϖyb<**\\R? 躌N,rW[ߑ iOb~>Tب|G8!1G%v%02ӝFAjr#*Kw@ SF@Nu +<+hMQoԉ}o +ro[O +BURmL7[@wb:,P3~Y WRlbkF`6Jݜ_Grrf*ijO{FN穋jNakOSebncIhT(;kpLڡ^녳+m$Qu) zDv--ޑ~M^M ꌳ4/9kĕ(BeDLjIb$n@ij5W ڽ_Sئ ަ۩_JwÓbN=YK⽠db3B>& +sf`Կgv;mm tb.V +VP*\ i{^OJTjQuQS*KT(UTߌ!F+-eke*^X@iA6d4]7s7m +`:WTr%f\ؓ8敏A\Il+!jB 7bJjB%:t q7ZHfRomvIυ%kʆi^> "+7X3[T! 5YO.q b{m\e`Qzظvvdۍ@t+Y}i1ₒ\EQ DSjͅ;&A/ t i{ xZXrU0V {aY?8VN_W^ȶ<$]h2R4(F-P_:DPPC:}mqh\aHacJTuS,LҧA.tlV1 +ov~D{C;IPP*kh)a; XZfh V}*ܲ}o.LCv ppsp[,\sVN19z_g7PȖӞ>o>_! ?ؖڵ>US嫾G_@U>`{>كugN>3^;u;!CiTNp2n]j6DA. n:x4wWNXȺ_dW>U/I!0ͻOgNXY8}d%׽"|b~ڈG`Vl?ٛ#؄rołDi6-w1L>FGQңއZͨwEAwG)r%oM1ZFB?'FF5'& SBh|,nxnV/mDl"s,[yT֞SH8 Bm>5)W2QǣJ{J,>aV(iDkULSVzIR z~/}dda@fB1>iNCAGݦ\Y_] `'İy4}F Bthִ4U-F;Iph]mNGh-Q 5'Ș.w ,mC jβ’:G}*_xtdbA= uf8B0P%;ϕ-sJ9WFuA2fr#dZ\=5q-STI*U9ۈlwe5ʗh]Ce5L[xaDs΢^jFzdr!eN2| n: @r}_i=^{mG~4Xo˘p&{ޝ@{K 6LmNF¿}4PyEC]v`%KZN]\1&zôi[*hA^qDQ?Q텵mԸZH^Քol'nt~>pzڂf"wH}Vy^f{?>s,^1jOm>LHE#ey6D eTIcߒv@+F6E  n0m elωV4ŭaPiͲ3ã.m@*5iꂇk@9e̟Qh ̡)dh -M"_eZe8 `/Z穅DQ8"J%-R -l9i rkҒ_,Teurܖԥ[=nZXƾdkFS]Qxe_97v좆kDnO b!mU(q+ QftPK2Ek3AutTG'(gӷ-~SK$] %# 1T$TwZXu6l~Ri}h|'鮒,Krx@4ߩےG OH&a'}O=4y#{CK]rfO,F:Y*$s8 y3Gtb+ɱn#$e8`m + Aт8;*jTei6@g0Ұ + /tyՑB3|V4ȯ&iSMdG:PO.U~O_BE&AċrN57^拚ٕ M:A.!4ӽ\.NIuB4'3op^,=Z$AGM5)mq8@8UJ>\"-"25Oo/߲Nԣs +y~DAwȯ8JFڳ#&s+y1ҍĎ¼Q7t$[?)+B!5* 8%m'8w0A_װj^Ҋe0fΌ?oH)7Jwv9Tčb?DfrpV!Oq" F~v%6DPWhO+ds (U!d7Ҕy]>@ljU.EKVEI(*|9rNN $a"ޱf ZS$HT)_rp=2}.Ĺ)P!1hbC-&UY+i\xkAn*ƿ19AIc&(#ѝ5}qDik!EF-mf%Y*mQLDP>asDe*f(#LjO5_5ׄpR)*햇(5>=cXܳ6_z1F x~E!{[2hIi,:"(,ʜHqK #S?y_Ͼuj]xE=?[Z-K|K: A.-弖wr*.k"r-'1ug"/승RNuRjC-u;ub"ODJm axc;AL/qpLϡ=|KYMw+NO*=N\xL)gKJ>D:Uk{?qN_E8GPZ9^Y2xf2C4??J=udroeUg./H`svoU;kݲuOO$b@yǠo)w-. ҋkֽ;"EF| ۹|0c΄J\gQְnKnŖup+{4CHɭ=-{ڸ^glG6AyIQ&6yke=mOMC7y(w(0(Q<'a}b$}ۡۥY1OA;I +C}ޕ{`֋˻h C ?8%AzỈuY:kݡn} Eւ9K<sThkZ~%'`{Q$2*q"=.JLyhrS4{+=6.4ЍNUk%J#G4?% vJTL`0&Pe#f]ӓť6s֒s#4"E/T<—] L&z>+WzXțȣoJ5kHoe򩺴ts%]Ae0oc +) ?(-Q%eTmFtE-]DŔ +QńE/ pu"Zp[Wc%ߏm\8_QniDnFź&rҽ4Z2F|67,e!ղQND^R!dIcü!\!Dzt6{*\MU/^23js\i)F@o"@J# +:^u7r& [@;fGsTS1+sW( cho:oڌ$׮ j%QIV>'FWԿKO!CtH*“ +˾~Zs{>{"s&%Ta늦J͊hW?-៟PX7XO$Y9ii +6'*˦{kpU,i}wcӛ^T;HIޫͬ2mlYBj_r|B}O{ɍ&~jnD('Zԛ2Q"=%_%v*/y'08ѩ'S~J1f h Jiֵ1ļJsz>ԏw$QT(0x⠷ظuw}jmV*p24[)O,8:\ ym5@YqŢd)Rx8~PipTT:]%f .Pu3s6՚*瓹 Gp9FH\ +wr +婏sj +jߚ +7+!#bǾ2^ųwj"@ lojɛVc{\+F +.q*&=:(\ +RIW5!XpВBT_EM4c՚$YNq:HΔǹkHA a+.=VkEMCVemFRصj~EtҘrⵐ :6:"ִW@5C9J`7*Zݜ%uMA l<[$1iu|cZ_ +YG[omxe1-׳m~lƕ^:ֵ7# $᭐i>K?Oj;f0yrqAnƅ#֢/K )0DTj # ڰ04 <AZx1 buOv?)]_ojIj #ʱּet)X=_%+ wv{"a'I{@X%wG]sQlިZ|TSb7*9L2%eD`^6UX7M~M+r +w_$YHe*ӦA̪TaHxŔ%&Yq+NIjf*>&RjBتVin:H/'  bjIfXLh l^$XXV]8F{.H I*v-91{\Z*)8%{Aq-hAgΣh,ִGT +t/4~A[Hk9"XJjTyuFwᐾng5I_ ]i^r@kݏVqhPĞޏ"v#-1퀛!/ϭٷßt;4sv֣L,8uϠ)}H4,xVk$J:c1?wPB쏭Fc=cD +`+% ʡML43{bÐVsqF.< ]HR`5=.ܒLJ{%t +̭pۦA?C*D5N׏#R2upH H%XCxdYtdJdT X j\ohR1G$( PMxCDr:f3R({:̱UyK14h6NI?V%GoT_I=ozU3BRzpxDƍm="N*dAwV"al눴sBgKȕ ;G1n=_.t߷V=RC0ŲźAޑM'?8ѯ^M*Wj )|{rɔbT_75Ϛ{0|TYJD'UϊǗ=DlCV3ujGk("/n;&JJ:p36x[[4-mFt8a572k2*'O"p_>ݫs\S׊\6 K2mٷ 7!ᓉG0QGXe0f.ZKL/i2#ah^[i)P(h/ҢH 'GTTܸ\5$7zFkuiixըPݐ{,FɛWC`gkBc\; (׸/iF{ZXm2(@2< +~K%tpY[+V " i @8UF {EdmKJ\d Ѭ}ƯVMAZr +{5H.[],PkX1F*pd>CNW/L}l^ ++,br <|U54ڭ4Ej)WZdXǞ/&ZDW., DqzI8I&4SJO`7\brƱǽbRwQ.dDzCr%/b/1'1_İ_bh//̟txy\^XYyq)Nb/VIp%V}f^3"H…ց['vKfڨ0axQ4C= YY"ͻykA +4 f2A>}2Gnh:me7ho>J=PpJS 0o{Vs![hͶD'dO|3,ͦ)a(sDdn0scYg8sY8)4_@@LtM'к>7 4 4*dhirk q>cvX8@q۲ nybހ[= 32/s +R@z4nCh_9 M6*m~T|MT#'\>X- >I%cewFd -{ܴv/B_;7%;7|DDW\<2t^]ɼ 1Y!Y;&߀ʷ0~ؘXtrxZ@ٜx9/;b{ɵAEjdmRPsF=tpSKqWv H ɇ蝤)|j (8լ1cɤe[Yrj`:l +q3,tIT7 7Ka -jSG-7ƿj9엖z6}_ZN~]jޯxJcϗqťxGEKbL{:~ɹeO`܅1sH5gbpFq?VG(^i sULlb/dx 3s0զ)`.k#$r:G150QWCDhS}D¼dpo=,hA㘩Rec.3ydekz30.xni2O~O3MȚ}[W/}/ns ayަ|_ԛo7xku*_l`Lk@eu5ADrx˹L/9~VQ;3 77A7p)Nw aZ 4AWx@< L'?#]/w]T=rE?*$/@{(F +sVc yjѷEe!Dz-{\?]ҥD[ +;ԍMv.gXS\|^SE-7?FrK>-U v?%˒νڀϑ@OR%p6Mc_9@1q wVpȍ7sltZ5ϩ{|mv(xe"1KQl+Ti&Y{u2PU))z֥`1AR9Cv]( p3(9ZHωM +AɌ;{&M1" +vymzw)uG_=\{m"֦ʡϦjqlRwO!N7>oCvIrl=˪A[лkVAւsˎddp؄HYi }\Csc0tX:(a(S!/T֯F=Am e5?_ rFOꄮJ:\bMkƛrO8b%IWmϑr6}e +'S,tM(S y~Ue7Y X}$7ێ˝5 e^ wF3>$7f .Iv.=pA&I T~5]B5ć!z'GiR6H!~/2-,R1tAla?Jdթ~qSROMSؤWRXz YA!wN)_Hfqwkc9, tq"vr(AK6)zXŎ_r{ٟ/xE9l/^f@/H^p1LKUZDӻHԷ@EN%q"Zr:{yj:ek:?<+7(tjQֆ5%u&D{Ľ8pFJƛh :E +K +CJ1QCM{C)N#5pdN j;J;u͑SH0;|Ou*]]ZS[ +8FZy;?V]֣4KJq࢙Isҭ92m0P _jZ{u@\aަqVb񱡋/e `򬿭QB(y0VƖ1Vۥ Y$pKK6Rlssdq"Q&GW5eL*/Ľk|1ru+ȃ2j"IS?X$܋Fm߄W`L^;V^9K>^sISp$WkRSMʢ͔MN5&4$,DiUZӄkE-L$f,L\@Ck']wbPR#evh8},?Ƴ m5U{YR=j Q:(Oڷ4U9*PZ`r^I2(kP'cˊQ#x2BO+6j+K8G&Y[,[Rbiu?$jtoK”xU|OO1,HVL._ ;IxHp =fd?*1_Z=}Vzr%H'0GI.gZFm-U_Y1/xLk:Ϟbtv$45P~z|]9qks꣞Z^] FBqܙ3"֥o"evNqKyIpL`x^]׃5<FVm^&ܨ| Cfr0]/x/KNzRlm\T)5ZM\_޼\>&=&sLJ34`nW3gK2t +gSdԐZ[<םMdb5Ck;ӗ+g*ڽ$V|ɛՍZ>G`$C&#LI`K*k Br |3+bTt'[Pf'eO?Q\*8< =w͵gFU8#oiHib:pB̶aXTD咜H$ jMtȷ F Ы?Sk^8zԬs@ʙJVQu "moK;B.byV>F7C=5}>>jjPWڶ~%e~f 5OF9@\; ޶q_xLd8~qg`79B<1cc"G<^!># H Y—?_WUۦL!hŝ87%b^s?yA)7kspƯF֛t}T_avC煖˺J]eY8p+ހT_$eJ\Bİ-DE{O0>yl\֦yߺmJZa;(HgJ?ҝgŵ)hg2*!$߲XL+eq|}2!xmJ^͎x.n]t ka9n'dEÙ~/;k}6? _ɮx.گP +^!J? 4|K5e-Ƽs? />~٠MN1ỺZK4:6IrQ;F~xkw:K&WfqjQ˚_ +8LD,%6$K˧IJ j`Բg,Q]_f-W_"p&$'"Z6w_D( ʛ:U3M4L.OpK*r;B[4&<ԓ~ #ke]' +VA/$76SPsQwE2{`e͜"{2-:RxJMYMn)$A WIy%d Ktw]MT9%-QefWCIa!#i"i~=[Y}ܬZEYtpV9wH3ΆvC}ܨI!YlQ%zKЁytF~|oCpzG4xD]c!ЏhEڋ*dN0ou*f +Uy,|pRKD*KET dR6CԆ>]U;̓$BeY=J9"FECh [Ѩq>X ψ*+ҨD\ [fr^55+kWN!0R$ h<ӻlKXi^'q_?"&r%<5[4KPW8@$E3⛽,v"q6 +6\ڰPjHo)I,n; g9z 0&f}k_BEz/QH6 ,yIgWߦK䭭:cE9 zܵIV`!`nqD;DLIM**yȏdk`KGhvXLǕF7ؘ.$FV}UiHZ-ҶUw>f?k@i$r !!yL27{0/n baM ǐ˻׋.ڦ?S((j3> +K۔G g}f?fhn{ܥk,kC9Co]u?E;/@xF&y*qSP[rhVt_8Htaky +MuB*]ۧ\Qw3Vxlc=lE*9|su +[o S8B$tWT2T<]J75퇲~ͲCT؂n2FN +{lxM~߁'Q.WQ_k t dT[yaZ.78a98؁=C-gSq$¹*Х5_F@6@-g8ָbIʟeŭ'rr 6lPďU_6jRjUkbY9VP7?=t7Z%*B}‰_e%;LUj捓ˊ3O&UW;QMKGZドy~sSGɷ.<P.pc'ơG xRqN' Cl%K-n^4 @Ѹ"嬙uUK+tVsJ;NkYUtȑMB8 /I|6I~6(6=\A#mK 6/jWō<=VGL)/xeNrV1 %_^݂m աf(dZ@FckNBuT'lwĎ'+0 xjtAUqZVR⮮w53^47#>F}{147s.+wDĹjtχco?q?ΤfySAa|]%Ie"qvS ؊Y8|3I@k)Tt=3ns-gDr"[ k-,74mLm̻ |JPki zp%,s[PUG Ҵ= +d`mm9^M%(k3NhIIX!0ޙBv }½Ο f]5 [dcJ'1AK޿4tG([s#s <a@$OeoE;w|V>KoPq@{"Em__Dk$a*dy,TtZ͈+#yY.6}iGJXy2qgT~0$l}ڧ~-*1z[dx476CVUeV{ʾi<;ƶj2Kpp֐f=Rk8^@s^hlRm4O_߁!dop[E(MQ1>zc/u }e ֬ .7Po \g>Sx2 zx炔!o80 +IFe;Ff պAW w7N;4zl@?cޤQ8"nlbRO(ޚq[cтw=7yO])"o}#5(]F{6*>*e%^b'9|N[*Q\ѿa^e?IYKEMQ}ڴ2?g‹ESȸDfֻ:&uD*aQc=۪wː- +]KLxV<;m2Q#}/2]-NoZ#"h`87Ya>lj|C C>M&>;KR5CftyaSchʶpv(m{/\PIKC,ҔI*GGQFwl? NJMsvF*yo8Xog}r(W!6\hUSbt;[r0sxtk0&$*Gcǁ?.07#C;̅# ʅ¥ifBSF=bĥiFsz]&& J{-U̳XfqOR*z:1}Tz%6ȣ9dHC}hP睈L⍉QaYxS^(Jݾ%+`L5bop:t"i +WBޑE%/9!M'Љy&o2T _w@’^'ߔܩj\ u+M"֛0sE\SZGEW;}_}`sʇ' +WU<|6d?_8F>cmHǨntww@($w73Y[Ar8yz]Pǧ8l}'(j w[L=*5~"k~^cQDuwƒoU(Ynoŷ zB^Jt ,hw컽ڽ?R8_ik0g{-\KC /ʖ5͟lц#Mi`ScΪi٭+!yY'tCO%uZKl>ii`.y :7HxFхZѾ7ODΥ50ئJE^&eylpC#XMdT]4bZ{WITniTPok0^e˫.ʾkƸ +v-HQ>| vQ +eb) Doz ͒*H5ar~%SlpT'N>H%UuhͫKcagU|Ӻ:FHkegun=,acEYhʹ0⮳{It0S'@Ym,mpEr8xq/ QpVcI̓˶$= 65JnfgV6esACO=m6l(d~ۗL4_d[MK*&M/L:Fj?H_XN$lxpB[|K}}S3;PKިدpOapz +i. +(8Fh^e0! xc4[&:tJxB7 +gvJetc4^[55\[1zqM,0ts:TC`ʟ8@\lLӹU_M1_davc6iS42*PүyKI <|ecnͅ8[YG.DeAG; sh^Xϳo턉y=F+c"Hd6,3L3B fm͹sedEqTV +`*ߪ.ɸ(d>fKH]?'Jz>tQzzS(!҆75&ޡm)L%|BxΩksj2·H67> +i/DX޹K1 \s_/@w(TnmNM%U01h''K`xy`Uu kz  RAd`ј*gP,w!?pW" +â > r&tR-ILk9]bn{HECmK֨S+56}[K)}k'_ Pe0k%r鐂 >HǜH9msƮ$7P]y=Ս0 +{")[.q LXmU᣽cH质㇂w],jlH.?&*F<)RUjf^_4hGXB=)%=,%ˣmDnJjrPrFS8+LSI]M9A?ؚRUA|SEɹUJsە]?τXnº޸o_4P@oi҉sj BڵH`"2K*]QC_As,4\ej1@'$h4äo@W9pP&IhJ,pPjUH+;o/S 'B6Ec+I,Z2 ߬+c,1XTXݯ\%Tx Y>M!̫uz"yi~&6<i;J ᮂckHoWh~r68IC4܂A*Y&fⲳHLeST7!g& +O +_H60)" n-[<#?уTr[|GVߚUƀc_/1u :<[ AC'w򋹨l,:7VJ95{}^/uk'g#rCxw#@ 5zB'# wVSFp\-2{XBgbwM86։E\"RY3n"m[:HKh 6mb[cϜWUfQ6P5Ba&L)-d)!Z,6g~xyym<3Gd<9p1D +ɗZ 0!fڪ$+aQ +5:Ij}yl/kYBԊ[KIY.%nOMNu0ɻ3Yn MTLxks%!PCQAnx3Қ;'ڔ`U.TL.Akmx@Ԑ)X>R; rHe|rpӡS!Us#4t 0Ț>ytv%l$` 4VjDLqJL|ܡIYוX' 7aw-TjSz5±2T=9EW7;'GNl`k|TH R39vSJ9I kQ)0_ȼU؜"KO9U_P>?C`%p i}$5޼lTS_®J 2O4g\dD|6)'Su`2_.tIS5]uj??@%]4;2sbߌٯoDēvR\tتT` E]-&#fDAq,Plhk;B`Die6m@$'n.¹"H6V^X{U`KHCm.¢'(=IP^.: \ ]&DS`ji_i'mSSRg/Y=:VzuVvpz s6+/y7E( sZ !6ߞ8N|3 +?"6;HB[C&~gM)x'WDkv 9y$.U+Ix5zE$?7LAmH%ggr|D ;Ze +a;%ákXQ,@QhNn2xo +[:^.؀Crˑ6%.N(a w +)-? =N9}-B7c_L_uğ a^Q'}ComBܦr@B8">W.q^+x{W[{ ?!-ȘH Oɠ!{G}pm] xr4"PewcY)(6AZh| Rf4~3N-gI5ڷ\?wcRߚ]}*94xmp Cz[ds"#lSTXI !ոC0` lxV>:Nk\A#9nSNo9mlk C+qYYH a,Q0O GlKLW0'魄wfW Ϸ?TjlNBRsQ{rSS$6kK3>C'ԉ tUǢջoi^L]YNB̝u1USwTY9E;25ͤ[4:+C,8Y;ƷA +o"+N5C Wr7?z΁`q !o8XWT)] +yQoN !kuōxWVdCy/oڜ+m06*8(J~g&;}'U5G<2J{ܔ_%$uU ɭ0 = 5yC<`2YTV׺hH.yҩ>fAm4#O!, eR=PmUGFdxP?ƪT,5w +3g=%0FD)a(ڱۍ>gV!\٤3da0 uBn!BX=F]TLӄi#VLV%]m lqJw׷U)f1z}P$KhȞcق&[P-\,+-$ŧc$/꒹Ea:G c..R6Ӆ֞ J0cm[M ܕՁ>Hp%;OSE4Р(N$zj~;} 6}V2.Ap5Z b$/u{~{,JRwEMqjTzh$m|I`^Ddk~`aeK7Wz>*cȼK8s|>:Jщ 1Jq=.- ͍b%T+G +jLt6 iϬWEj#E' +Mڡ/*%du@E9!Q568:amCzBhvU P4,baaPsa3}:6ld$a(^P1GoTg!۪N^msp*0+%SvcQR'-H]}q mόulҬÝU^,1=炝Ctu7g߉^]FBYRZR*ȴdHǙ?S&Eb5UZZ"SЎ1k֞/m_6x_Ou1NG:عCKpU cUsxv].:޳솠_'n?'uc +ju_Z|˩tp~/7{ݾocXfy|9~b=n e?z핉Rᰏ}m5 B LK*w-mcb#sֳ?9_8Nčȥ{[-"A +;]eP_vi7Ԕ= +Tyk^E 1"`Ǎ4rsWyf;xaYsaTP1}cܙ齏.) cF ~N+/׾5x>ԒaNZK d+X;Xp'"P"杫P,)]ݎ|3?++,!KReqcBe"A0f*2]&]3ef ڎwV|XVzL X +q?GoM/w:)m}dĩX8 LRd:fȞ kHm&X3li< =l${s*Qt{,M,T*T|͇ͧL ZRQ^>Dȃ w񂗆jԒf3pwSw(*8H,T6Z{Zp^X@'TsU^1~)}ȄVOӾY^ʳVfD/$Н+:6H<[$&OQ=vc]dE+ G.2sуT +<#C}3t҆gNf3ߢaA5utdKi hUGMrѵ'5ϰsv6Q3۾zM CN,=W?|(ũkRTC_!c z㕃\Tki[*k-NS+BC3UyVKڨdy.N֎ ӧDI15yDM`6Mm{}xB|zT,Q, B- a紈DEG]GmF]9/8VZYϗ6~mr~lpٶޫ4kv\AQ)Vaۇ/V;l}m>Q߸ ,\&6[qh"Vd>M9>W\yV9?~a.Qg㠧u3^깲H[/'So.Z;=iDi\H"E~\k[L(( En6 }Blj+xTN-g=B-Ǯ *[EN-z})서.0@KTRc@OFyoC\$)G>mGTf=,Qld&"l+[c1׶32g^Xݜ(\9Xd>jv.\1B:&M2,"q',?^77t{-M\NA[kC qm4F톻)K @"vܬ~s< ai /~u/\+ U,L vevކ̎Ws<)ߋz絑X$9n +.#_\ȪH2<%ɧ|?L)DmrLtK7 z"} #rݣ(e>!#>LSߒs03rV 9OKc׳k0adH<(#|s8œÊq.;9cbq"1 6[Fjh~6j\a{R,XBjMKIm1' @)5yHA=^5x-wU04]Ww4RܫMm=YL&V:p;E؜U{!>9ԡ={A;ݣeA\Y>s yΟX֤YG_3I +:1qBmxkNxI<FRұyt~ΓkRMcRITnto5\Ն۳Q1v{ 2DyR -zCF [! @PĒ)ȓBT@TN:*1ny?LUv&RJ7NAl[} .SHf٠!"TVJ¢a'eYm +S;Pz Zw茌^σ*sP⍨+:iCs12DLAmU! R *^/lpJd]0QtTW"ݘ7gF:z2&<8)σEylizaB}\=>*+x*NG՘\Vn:-pW'w(J7d&˴vXj},6&ucRrBBek(He 0$kBPᛋ4xiq̕${y NkUN# xӟ\93<;lAywxcK;۷%;1ĪBAq )<KK>QfxMf2T[,xmV<~Gk q1O2Ŋ +UvLZ$_,Gʚ~OfjT ʪ`b}/Ԝr8EQA94o^8y +դtvhAe62(2l)j~O\Bu:cKAdJ~ȳa9&Iԫ2u:` YK46D#):/* "-ŐHxq"Vwⅳf־9$oV$cMo9 1ј}UU<œgG,b}R.EGDՊ\PyŤծBERM&.q$* "^_ZD :KTkbI/Yv4}|,Z8˩WMVt2n{x-QbC(jc1U !{/j&vj܋7+jקBŮi(ʟA@Uoza(24Uy\ClB +71\3Ol0G7شB{'BH@>E +ed>dגgCx6qޡ3Α1H`=? ¨ѢD0`^LeҾVUﲲq_NqO`MU%7=~ۨՀ'{Qȋ-5`7X;-F, +4Q9rKdEㄚ^S􈜏Q2nngc#ʞѿw*j=SϪuEi'YTvDmMs=Ia~-bZc0ֵ}SX5R;bն=_:|.d(vba)>گ,Dvs.GmK&!DA-puu,hRKa")c}=Fj%κZ_87 YίԸ3Fwm +=N01G|,äс-/B/wv}DVoMN.h_;u&kV j]?} sq/rN~Nǭ/"F Uy3aQRMq3 dYxCn-)83yxckK:׬xE] o1=8}1.K/{gϪW},gٽٽm߿ fBe:Ӫ1~`B+Rg95@~5xk"^䌙AĶ΍') {$6Z׉]3@"EUÿcLk;}CDWj}'ġ౒8[[h'kR%9̗~7&rd+ +x㝟/#x֜LhgK -4cG(G 2Hos TAbXkkBG}Z\!4b-X!g´Oϙ7]z(t@i tΘ=U{k8 dvu pQ)cf8d-tdWJ:<~maJʵSO' 0aCA8llqpvpV`!qreTdVLH*"M11Y8@{Xxt5 $ dȇ9y졁4UWcc.|:ShY۬Gq4Z6#Ԯnv0xlKDJ}u<%ֳx"q´i)Pyِ:+At+"2jowAH}Jo'M9$oV$A0H)X='x݋Ϟ៪g,KuX> |ld~O̹{:gmI}7FB!ױMzu3BݩGU H'dM"XR Ϯ>]rN{^SV?&7BB\@td 6lQO@tn+*6KnJ1PLҍk Ǧ0ib M* %^.]bvX!Eq;Bz]Qdޟ1=(&'-Ћ+j,Fyd_znc2$9+7KwQWstS֑;]J%t4&hԞ:')*9NC9%6|n2h*l4c&'ӄ$- W'nڂ݆M8QbƩ"Bz U'{Pr|c8u~~xw?Ǚ0f'g)Z/^rӕJSu::Fls}B#6CX+'Wrȥ澭c[ʮa|Y↻+NT-=a>B|>& Z7̹TZWKB\32neiC |_ +`r ĎVqC@jјO\e NП1dzlw|?^ Z/yds)7)@~On@&Ck:؛E-eE7o)vWk*US~i +D䵙!XSŞsxt'^v3*WPgqjjHL z ] CPVCl](BT] (ː%6`C0$SmZ$qcDq ɜuB[s\`ϸ0a1aB +R*^IoVwcn -kG]GS\|ưʨҽVGSS´n7# Ð׻kEk`d8Rn81019odlx^01z&o NzmhMp^oӁ#a#2(J1u஦"3|-F(f};D'n俍 kB7^,hQ6타 +n12bգֽ8e}g*ҡtC&+‹sMS88B|qڕ8XWD,ķv'N Sr_,XZҘ wF974w>a\M?tp'TMԤxU,5iQxסPx{IPK +B GM>1̞%9S'rǘB?cpи3B;ޑʱ/ٮl;nH$[OOO[Ȫߥ(~@ խP {^,zxc>r?IF\GTtrOzX]x5ƬNeF䮱H5D +Dmǚ*|*y,TQDZjC5cNi5s+cauڒWT/^dNa"&b_=A]{h$ŢFS]Bض bu`phmQ? i[璠Kul &:,Ʉഫ4y9ɯM,,ҕ!ըQ? I͑R1tI\Oncup&1}sc_u|܆\i9@}5%k`s!_4PJ"KCO88[}V=˸;Ӭ@G(&h?{gX܈^w??v6 eyM_YI7U +mY2l4y++3{R,Mf:}iF$lP;@j1ɽd9ΑE[йb:g,LҴu&reŝL9,nN-3d-W#;s'#eK[iڻRWu)x]bUaI Zt(ΰPF=1zqLyOe=`,ƻë+:gS61TFeH +w4U`%ۍJBɤ8}u$xI} ^嶺 y\ɦ|y$͞z~V7-dMȮAyaK%~ +y%R6 93%mN>{bh@SVjErG:i=RT<,5QdmA[إǯbl f +uЧ H요LGgRzQ!DԶm~V{O@Jvzy-5ToU3džVo`/H +wfDZT3Fgu+dѬ|y8>5$JnD+Ǯ4Ki|PbcKhҲk麝w8.8xԿ.%fes٧&a+[qX}}3pd0C.NW*dFx'?Zmo$Sﲰ (ޫ2m@˚hMei(Aqr;_bp] Cg ATlc kS=,g)4#> +gSV#ٻ ;7 JlIƱ:M[! 94(5j. AIVB|D"E@^9S%$e2 s1yC?Fzk"8i$goL|"ck?{wS rI?+yǸmi0Л|NCf[&{萄 uXi$~|V 1=8d\9F6Ol%v5B㟳zvDQ:|uy IʂPSDhip'aD!6Ez9PDNJD즡^8y(f?mLFN-&XTYK;wj(\6AER +VZ{:[JG"Mu=⧘9ڳ]BqHV[EwvAX:3?zzV;5aGc{Ez +Vv;Q?bT9#ܪCMޅJUSFL1\XEU&۽)Pw^g.Y~MW3r&DZ0%MT1Z1KT"F[qy~53Ԉvo[^W}'2=Um&@?;df(sm4Q~piP+ +&CE*;B۱b 7Z8f>b 6k 3`̧Č'%E)E1Xɯe#h+0k\f@+ǜb6R@"`Bxo_y)c8|'\ېQ,!G>4Teoᕦ2^}Ymb_5ANlƖW%r5pѴc +OPRAMz-ɑQ(k(S +:#{ƽc%ՠjP3y5m,v&ɡftFq>#ۈ0 j>Y t g/b"K.0zT Kx6T y/^l`|/ݩXPi]#!h)>꤁sW*f΃-BRk0: @=>)hQYX% +]Bz.ܳ !N.]FeeȩS3E#vY/:V,UzXf?';h\F. A-S{-s;H!z@Xu v F. 1:YO'lӳhM5}skJ<}zPck1ô1Ag>@r^Ù0X&"n0]cLYTf}QgNfb[qPHˊ`ǒq?Z:+aUߏ9 mVWPUxLahhHN?>!(uҶw:*0>9KLfN˅pFE1[zf]qWs߿ `I<2ӓNƵ$Ț) +RA Y< $ZRS'"K][ 8/QqEEn@m":]񒘙EBa͋ٻK!Zj E$R$,{}""c*uMTU dH k*c}Z֫X`<45[4:YYcӫSb$^#/ ĹwEY5~#ynN0f (-a\j> iHPdAwTI0.9,KwzS'QnR "@E5{7r{yo-E] kTmx-M FȀG>Irō镸wI@ZoRRYc3>dyn˶'CtvJ6hh7Ez>6v((dg򸬯/t" =ۙۜ\Ԯ4;md!Υ*+~JMAp`],Vrp~א{*H} 9^ Z/Q<~j(8#)@a 0ϡ +?*9-f[wUV䩈}tjR=MSYחRTYwZf~*O + 6/e(Lz`"VRF&lVHIeԂ3d)ZHրіfepDg +/F j[d; 1kHo/ګ肸Ḓ1X̚m,Hp^nG P=*[I[J26߫V>ݻq/ЄGC~Lt}*F=;'OՔkP,xkFNHhJ6PRױ 7jH6 ١ &㶉ۺvAo.I%J#qH[&LC{ZPld?Z%X>52Y˹iV+S}NW/dKLzM!A9%LO-0%@R2!(& + +74 |*D=\g)Iq15}k5Vnֈ,Նb{4}(hph]/4I8TX h$(9nM~<I=c*z;r.H1qcpD?/~i +YoOեK eT +}y1\x'*NǨPۢZkbË0'lNVXa*ֱssPyoCiV071(YПyJ)nMaX`1 7b.ysӀwV0{I3Y4sw86\˵ ;!{t~r& )EL2se X˕5&Bg D[J +Kg{!'v +K(uCQ }&,ZMI{CWҽQY):v!bv*=D m6D򻊦UIzc9yxYV؈)JJ/惕xd*'t@u~*Lɟ 7~EtC]\ؼJj\u)ORAR)Iu#h%Bn!n<ل>]$܈/sq[ +%N EՐ1Rh5[U=rm!ǭjac ӌ-)ri>+ܐg5{ 5tiwj5]M{U@|L0yi(\9a_ M6gqGZc'l2YeҴ6^BF1[*NF"RZ[% +5zz^N)[JJJD䫞`iO)zY [O_>]}*c @q7aE6TfՔ`Z9| ]leGn"L4Zܘ2Ⱦ!%_W QgžYZO2*k=1P"o Z +D._ő:b2v.)W҃6#~R2(gʺ9H*㫸`0!'3W +81_!3Xnabą5z >A_NdhC¯[5&6o}8&؁D8޹'M9YSBC6۪^Ô|xz+yּH{\x<<0 | +oLl@mJYEDsθT=YTzTuGMҁAYB6" eu3LǏ3^i]D6u3FzcQpprY&)zr1d 1)EEcQ6%m)+ E5ktQ ^۪*㩢y8GPS9aZU +ͥ岸=ETBK0{@$w] e)KZo}MaK-+on +]44!ۛ|̍RF. Y闌V;-U}ح'U]v= yk 8ox!5Vnҳ@A 6'O(7 +Ovq^܄`heO|~H՟Fj{EYP+Fufjc|:gDS_Ǖ;c?>r`,<ξzz-:5ޮS*U;PBxFވ RI #hgdћ}Ƶ&b[pX+:Zv ØKw +V,kAoַT7EnXTk4Qy{34kH;S{Us+҈Oз]l#)Jë́zH)nGWA!5T,Ys+jZ2a0.Fꗚ(Ay U Hr,Ĩ 8)qܛȳ:F~l`ߚuTi\cm1Ė#ʃrAW[穤Onʫ%u@(? 2xGRwrJl<a㚼? mM6Mg +- +VĮpcH'6.7x ;- +v`[ Y2u VtV;CFq'5S[]Ѫ-H$o-x[95n}~xu4|?7NI<@dk~'[PpSԴo5WS"g+WގrWIMr1YktShG P,ȟ/$p +G <%@/ئXns:m}='u[ҊJ +Ʊ97+ ӄ[H8:jR Q &\.1|LBf/Bf/B6>2DGBcc}cX#!/B&d[Btfcwl|/>/>.>6/>xNcc;tk :׉fc)-LTDHݘFR[1PNiyÞA:92'&bpNќLܮYeUc/|o2̽kz*H3q)O4W$-^үU y]3"eѸeQ,җ,z/h|EG$=tD~E/M$_444D4^h|DzI""H_ȿ(/zɡC-Rl=_zHCC_䐾P9^rh|CCc~CCQG=d[K DuN( "{ GhI5*ى5ӾbnuR/=fp/:{LDrN eCxnªdSdZӮ/52QfcZkC[>m1jÞQȇl]~2AI G&I踟'[~lT?,2BA4flས<mVpp I)u_f] c@ +A +_frrŵn'}qe(DƪezOj{Ui!;Ex`ouvm'zkM4lXkmG JTvW#^ᡂʒ ڊQ,б?1(rAn* KԿf)ToZr|[%6f ,yl~BYwC Zd,: LrA+N5Ej*.'ܽ?n'j2q!^yQ_[pWDdx)_DUc7Y$d[F:7NZ*sæb c1gflaUBe1려l3 V^(eIaDDK Y^(8P-^w(bJB,l1J2JO>'9`ׇ ש 䶤,ܵʲ-(&_mR [fH +2|).+uNbo '3@G ʔuҊJ]#Ak)~LDl6bycGl'1;UΑ^F +EnP$Iڠq#K-h=A2 SK<@މAR'+up `NQ1뷧DOݯJawCAB+GQ]k5̅M`jj"HLyY Mx\>.G:!`zУ_lSA8C$DH軆uJ.(,'X +IFu֔xg/ =bs{BcLLP0gE!7*2K!!M4Kg@FO)A+!7%pjqc/BO몏 +wޭbOX֭߮R~ȭF걤 +*z+N@QYKH)ת٭ VK[ˡ]޳LgvWz2d"n^etim`-hK*G>'pBo0 UY= 5uayn*Ԗy-(3 +dE(JYu^=ʵ7 V_~UbT +Ԉ_ekT_D_fvHl!<_:mg1 ?^VlRTDz8iOY_y(e ˚wډ`$ m]WtVe<= sƹAmmڊC] tq,;.^i.'a$×a5z@aq`ִVd02b D|gk pe:?E!ĊָJƀˠ608p&44 æ{7|c8/mT&$bpT}&_G=̤:C:Z RNL3@lt0"cUw8F:RXnR!Bacn,fFʼna4KͷVá=( R^yI^1If_$ɎW Ȍ86P#dZGA]7znNu! |K*\ăN 7 ދKl[w.00ֆ1}4pc]Ǟ(ͱ)Z/ tU S xV 4':( Se-- +$VGhh;93qչYs$tX9OW|е:cL?/WD6*_)k<RrYy e%6"OM:"3u5{1i\Y*[@7o?'= _TCC%[4.(b0Ѫ&Lzchy"{Nv=;3$p%\qw+#^A= 8=Ã;~WOfKjJwMUT#ęZQOɳ8\E5bRyN_+<EhV11jjxߠ利wLjB{$>&Ҩ n=ZZa%K 1f|{01pWO=%B%0i&Ĺ3 o#@> H36'w`\=>Hjlu2@g'[u .`$:ءy// h1lv3'z bxd 3Сj6sH8N4)p>޵6Qz޻j.UUo4gohOU~k9r0H(V40<Ń(=>3Cb8i1%E7q1jB~ȩ|ѢVk;kd +JNe.hadEI>0vaRhb.|%g\c9(&:AulttGTi_h*@'oQ2-Hl('㩠&)V$rfʴgZ(5}h~1({#/N_1Y.?.6G10U?_R*Jzb2ʂHԮS؋泆BEXKM̾~?~N]Y) Y@gO}6&i1FafI;nQJ$hE˃h9YKL~L߱Z!Elz4j#מf0ۋ|7FvԴ-F̣´b!umḯP$AbRouw5J "vޕ&aT:gu?s}]/G"V(!ojfx+ׄۋt)-{jYm! ҼS39N!/ H)S;ZT +Zd;QcNK Ͱ s9itStZ)M׻X[}{PM +QF%A,RR<OUH3FE 4-)>-ͤHSB#&4$*^QnohO`n ϗT[".N30U[Fg.=ƍRƽ`h"?uR$D8.٠u0JjR./3|" R6ʟolBQ+X6!5#$MAOhP*R$A$hljݱ*3]xZh#d@/lk>lle:sj?N[.= di "Md\og!jSZ͙͡{Euz)\S[<9I`F;r~}M m>TBAȜ)1ARB"20Ei넞6H0;GMhpn# 7'Ghr]CwXxn+6YWi V7D9[d"'f 6І sLٺGt߈FCf0?Lj IBA_Xʴ $ƋcV'3̗DT6+!+FH1ve8e݌:MAZ@ֶX .®Hq؉Q6P쾗=zpے磌[qPו<7;#=P#"P'MwL&T3 +ng {xz  $ BK);3{ a8уD6*sLl4vbH,QfsSowUbOi5Nt^Խ{(_l:#Cph=oá|M׎B)SS&k-ӕ:?MNAӸTU"aL!SH+#17|k҂ 3XD8%$}8-g1H|?Q!S6:Нχ$Q]f9RǪSM;U !#z!#0m>P&,\(UFmF'N8j%|=n,a\ke]ŶXMoE5ё%ឋ\_{~+bԧ`E~=ePS=RnŻn4&:5 +E*i~p7J; ;9QD*}r3ݪQ jjG^̫~y,Fvn|Vv +`߫Νw{VdEm1Xe۰{ +kwQ辙fVv۪R&WJ6FCsh +9YLJ:.`%\ؽ71,I/$>UOPђV/.LGA>%0Ԫht +Ie{ڍձ^zyTW",f]Wz3%KPRAiuiiQ@G/ R 6d0O(н`Tnߝ@=MyIw\*w@an?"ECj,]]/#tNN|+G SlQ󖾪o<\K0lZ;GjKI^[SՙD܎E+U3&>$]ƥe28n2qi}w?v/Ms*rڇJHJqGr8S&dȎ, +'eT +߷Pڠ#*ʵ<3jf/:zօRc1s'5*=$al(~ˠ,nj{mœ\ܓaԂB#0` ^?s#dz +eU,ʮ+7 pѝ +f=Zt|Րtv|~#Ԧ;y4Z5Z+*xDbǎ׿6nDT77 +God*F#=Ǎ짐oRWzF]&{㱞]#ŏew7UmǭsB>N{YU@wrt!9@ pd &3Б@kq@Gv*34ϐ8Μzɀo;rE~5c}nIYyrŊp*X΃@/qA;vSj҃Աoݳ>oh;n(Qb8ZLz [hTϩLfh7T|..)rx~#%Y_VO +_(0 +wQI  e׼xᛟze漖>ƙTuL2OtV((&*"iBz8j,eSW_qjR!)dtUB@Dě{N"UQZ%0ޯqn[+WڂֶYmHE "TOr'u.>T-]RzSsb7 s4<-'MlEF{Me;r(be}gӞ'o2Wbr~h:}OqP| tD3+##mHuM7+ۓYK?" 7Kvl>~HhO>j,̦?ru[r +nCI\Iz:D:2OmxFwך+ F jI H=Z%iI1( X{)A+;Z ms5i㛳@m Lh]_|Vq 7cO`PHˀiN.?E~ A;d[4.-0y_ +wCƦ*@=ǀAXqΧ̠l^VBZ,dt,h^#8)A^8'# q&~DP A?h}0DИR޸~4̼-Lt-ʭ2d +&jVRܚwn^ wvy;ŨU,)DS&ޢƤ@Ċax||&`,6 +[2/i7a*obdMVB$lsk'f olwCM(F8A5q$W<\Tԑ:A'XxaE [T-f|s&Vgc& :`IRo1{d5!{#2$V()DZvml~}g4 X*1PP2ɣ;P]&=ıUw>(I*(@iƌJ?z@b,iTPA#O`"tGPL~/2g袸G@(8B(1R 8T!sDPG*=nW&,.Y5:T45zvKRhD@/"Bɜvǐ2$0t&,/"5:|>#6PbumCB>CcNGy pCF=HGx#ʑvyC`8!=7"󉎠kZj캹p*8y8Mݛ08|/{69:ىܶxJ|#ufJu֒LFwajYۧ#Z|S"*xFoYtcHD&G"hd5x*,g1Jt.3М46de"Q5RAy65T<:(5DtF1ҬtC;X_WjH^.2/+jrl@2DFQn#MXS _CT?pF!29Zʼnt٪.%1)[TjM=~&X';{ה_K8:la%uhdkw6=a¶Xb g+m险(t)K+R[ijPgQ<מ*F 3iʆȋ)X VNtjғi'.GuZ+5}W]5R Yᒬ[2aQR0#_3[Z'|dTDml)>'ksk}BV@܍\`mA< .YU Jvkl2O08Bm}RIH,Ex C ;ڮғ?Xm"UM|GS2yK(3a]r%+ˑ5Aao8RMs!U2֬Ϩ&#,TUُ[hNWvw.,q^.P8}m3JݫE#|-ǭFl1~8X:'b$SdAոU8>Bv"(x d8wO7FD"܂PSQhc; +|{Ydz!NX2D)LDZ4ٹތ&[ m9w@"m +UG&/?<%Tr݊I]v ꂚB㕧]T1JCƊڍ@9jxz\+ S.oͨ~5jNQ9jI mQK|x5]`w>T+7^!){/:`Wz0V+2C=ϭDc +Μ;]bRHf{}Uc8_,\>#2}y`h[#tXjմH/mUSO$+`۬JY12sk0ɭ{D*;3;sb{@VhSf~+0/~>|?atys_ N2 *]Oem@8m<9398Gb:fRLA;?9>O ۾,#9X(@)U8qrz=lvL\!zOI1IhRSl?-G9 ~~ vAo훊ΉInKp +~NTuh0< +Z++*GqԟK9-U\Ty4/F$S~&Z a%JDz? Iv˯Sݾc~#yn/2vT}Bj.NKޓ-)j9ϧ9b2{jC)igyEZ?i36z~og-yZֽ>~P8(DEȾìtXO <!g )ȑh9 OId#.'a\]jbhoZ ?'4Q*roRiևu噭[NZ%Ou K}*3=+ [Ϲq 0@ӑ[҇$hrDKO "Kw"M1-W@?1/aݔWâ^-f%gN}^I`@b, N{1/Dj`~HR^F(ԕfa;7ϖ_qjdlˆ5+vJ`LƯ}dSp^wwW]sT⣫\ij]Mm1WtlP*DQnMN4B;zQn®|Mc#wxsW$k E4U8hgF8)RJѽ$ ul4ה}UQJ9#J%5ng9P\u*tӡ$If3E{JǚXgZR̊XDta`Z9] FzcۭDe:ZTT+^2uPOdSt8':˵Us3Zufw%OSçE3~ڏ)R=*>h޽ jV]&1;[8VAAz}M@C ՈRBwֻ5Yc}]7`Kk[g\i߹?Vk}mQ*N%s*Ⱦ"AȁϘ"YS#KIȾI ur)?7+o{f+g%#ݗCݢL *8?'`mJ@t>*`Y.!5ZjA]`1RPt4SS3M,&2./+:%JRY{Z~ք%tL YOY\*nm!6:X; *ۖ;rĺi NF{f gtx+f%rCR߄"L?4g^k]J굈鵞tUFT#wjcJ +vAqV'M|A[HeY#llֿacgE+Pi?*̾O7{ٻwS1î2'@ UG+G̸Vt5:myJ}.UNu ST<]Z);}ko+;|;w0:^TLS!ՉcSyJ2fO)xE6ĝ[P5 {_c?َW7`q@j_UrDzds;Yդ*h7^ͪzi#}A&m7'trTbk_YG.vlSYߘS:_?L}WN¡DQ5ZSvLlqfy pce7=ΫDg/nɕq0c=,g  +ɸ s;cЗՠd:#2LR yxӮʧI>-DjLmB>#EisOX[=_([ cot§b$?6!R[K^Z@(na*8D߂Pi۰ݧ=8 +EO].'Wi-6*{vZWܠZKd +cХKm?p@"*li1f2SK4n0N{/vZ3fi9I06J*#h{cHXȢU$T MMZ )vU nYgSIBIp gЇqf(i`:WR]iZ[PUSCtʮ^􊆦MaU i)#z(@IZdS0i4Zrؤ݄Dc0t׃swN]ӹr.BŕH  `j: ];S;[߾Fv +O8C&vKaVx{15Rqpڷc;\9k·%zn.=nNЄKhZgl'ɜQGu͎ɩJF1^GKV +&iviբuN$"ʝ N,btt_vb aÜc:hMʹ/]ND''n Η-dֺs3ƭʤ斆Jl."s֎v#EC1HE.@&(\glK米6u-r87Eɸ}niצ~JIWM_5-/g5#ĒY'jUw|yN.mX2WTR݂}X5X0:/e+" +K"h.@&Ƃ%5%nwrm?+M*@S.Tf5ಓS+>Lǟjl۷b,,˲nO5+%W`ߧMQ~L/Mہsբ`'Ȋ7h;u;M.ǁU!s !?ڗ߃<℣^:sE# "7=AS7V@sņ>ZC+IڝT.@B%:yEĘM&x<Jxm(:y`zDQT\DDmsXbeZ<[le/uNTY܂!<,a-cdv{ &' 8d7В+Y9~:PXԉ)!h.$rQ|&V͕&(GAw53dC_%G5PEP܋Cs78:`4P1xLQ;K<8 ijbSQÜR:a5ʌ mjw;pQ-Rp{e fAİ%|/isݫąr6|i\5QtƬ=0$71D~3QZHRVo'(bPg5F+X3^.j԰do +Ʀ؇oQ%%gb?.A&C`䣫 )P;HUX4Xܒ)ʻުTYX˪Wj&9AEZV`X=(a5Xa8v@k7"Y /ICMi^74,y '!h5?N̜4r;=.Rz\b~ﮱ#sڨ獣ﳿ7`ܺ:OhKX0S B9SANCKKQX\{I`"gv }:3$BP$[kXlN[_F/+J6k%?PJU|rhGʡp!h +,Gڅw0A/I=x\M|r$Jp0Wk.h0.ft٥X%?2z';4 lJC!UEbUNeT zw=34kXød'(ِ@as 6o&p +wzк 7V^4 ] &Hy֞,u\f0RA9>40~{kϖ~ڧFZ\MTYޏkg.B/tJ-II_߲p$3'ݟǜeӼL#\2*<ОM.75RîuFlF}-4w9x~l|/^E$Ol QgO%'Gi?65t%GY۪ Qc_'%^8@,xzB *NhDv !7xP*(*YQZ$w2;_%Թ={;pڪU1\Kd/H=dqlPQe%1PKCγTn].H\}~`1Pf#\ʩ/kdzkS2zi}p>(>\sXi '޹d7>ojE?O0|RJ(_'?{ޝRBOWei.rhpcj1To%7jsp#eٕ3J^1ű'kZ\$0t]k>a.]ƴ7PNJ {)ى:$>aO6Q6;>,Dք/s@"Zwj0J ozXvGfWx-iO*H2mYQ'j~qr!%dV.ƶ\#/t͹;HnTH(CU[Nə-ā2]in#Ф~{KQ1iL7='KhYZԯ[tx~߿pc;¸>"x2}XmwO A OJxOv.?c%:^kaCN|jWp5knt(B>UK{lJnלU9rtutʅU _LuZSQ|ND+^8kWlo}H%ub&ƴǹc*<I +uW\uxS7Q{)K]Ѵ};>!i|N8 !7IBA}Uhv i|CUk#@]Xy ($脊fPoRP55q) 8OAÔDe8mlucR 0$9Q~#w 6|R΋񓰧wAy:~WAnnb s!ih!վD KL,ϏD֢ F%1aR.qqAόB[7뙃/wP6W8\x@q젊Q?bA9g8Lxj~=c]N`%UnR}' "ŪٿoG?nk>o>-Ф0/ӠRاLr5 i #SiX,H#wL< `N1ͱjQ9qI{C%7qי0qJz2NwPbbs*{}5 C]I? _x$9Y,cc]bk11:]R?ЏWy;딃gkk8:s8L +ztڞ ?%m"BX'g\{I=Dco>!םM5Bxgn65V+H?v +H4SyHd|FsjJ[BFA N\:SР={hoFj S^(_VR'&Mkh)jJQGg l^{E<;&_)c'YQרZ=P5Gry VߏoVn 8Us;ѡSvmͨR8m69C4t< +'`_*g@sMzX;,K>Zڊёgc?2ȣh.oUy_._KÏ-葺kP4 '(O_6sb!<'[\TDQ"w#huT %Ū跔9Xjx}oUZ͆b[e˭ h6';1ؑWqѼJD)7xO-{RM*I4e) +4&:zp41.s*s#Y@+$;CH6Me>,*%%WeVʅK"754ThvO;-pZt!$צ'ޭ.mx"i,;ɷ@Mu϶inQ[2M<+vJtBՓpD5|d:EH#Պޥ R69 +|@*Vm>zeb+՞z<٤ı3%;}*M_bRKP67L=R6쑼dѶCԲřxV myՕR-fX "|;YBY"bsQNﬖ}lbUAg;95Kd֬%܊yM{ ŰH۫W#lHX7 q(*2+ +PX6@v+$V`=C=[- Hϻ[=Vx:6kt,cˊb#q\%ַU~'wojiGà+fޔqKSٮd;R8pώz)U>P @y lyMdWM[nyp,$Q0a~A\~,W_^S&En!ū*QdRlzV^v_NK/fh)s9@;+{^B+#@#Kij>[*G}%VI7jd75Jc`1_l-s2齂~:s+;ͤ;FlZVӒA%FF)vő%7͌4+ ]Da]uX(,Wqie֤|Ì{cy)^:Bխ1WWX_.x +ZII'=sf^GS+i67lRqɬқSSꏧB3V :EزDS ++xzmg?={s>?)unj`#{hy^皞@DWMljRB`6AJ,T!ز/v^--ͫ^rlP +:/?^t`Je1/O|5aTX0+AKDASc1=x:oSlS5]-HE.7yPM=EJ32hkd.A@ľGzJwIK&-LZ5mʹ=g +Urg;+D_Yn+:=|0oMdq29Q!oYɦ3aaR"gozL/%y9iYYה=r,?_$43ͤKLxΥ4(1>I /0rD-v/aGkK RDa@4EjJxpEAmsښ*:Ku,TZ?`)`ܘ pa( hcP5zk&JX{g1 Ӌb&νxc UaǶ=}Q&wݰ:n7qLTE*&1YX*J%+6rw+>U'iI-F3;_9yC9"Ym ,MAhKzo5Ƀ0lCij#q ӎYRt~G` 6R3i-a?CdLM7Y]E,$qG+Bu$NKVs.oe¤?"gJ5\[K6StUM=ۅ6N(9 ֔GL4`=֖)mO8m?N!rf촮-kh찠^5u$j{Ǫ(:8Q8IXĸ=IOvMxMgneӽZY\8|Mۈ ++ě#@8YM'7!.v +(G^ĵ(;;3VFz+͕xE;ztciUYi81[(5AIN]G^ń߭|/ԉhc0r0R^%+D[ SS3݄ڈ=]ZZ`>c<{ ȩga!B C¯HyS_eJD~bvPj~H?#Yiqm,tkoF.}I,$;,b&.2h`_u"lL;*+?9.U+6ԎDba}@HP(t@Qm&aڑjNPьv$ Mt-3噔UFݖ(*&4XrȆ(8犧Gx^sZa6:T(&JdL9I7lLJ94hL LɈQ,1KrV$DGn>^3H*"1BKd\%U)3ΪM.z(Ֆ_}~f-=xj 6=﷨.Zs'aM>J&JTȅ5rrN?7Wnb5)S +rJʺR՟8Ui^AiPem86d)\ ֬.eUAq-]!2yz zb>ݴn /$\A(: Q<R@K U`]5 eLs O\~rӋQA%2A%':XP$ _D_` s52PiȜrLngv H2'FB- }ϸFj6\^ܭyk޺j喈xV& +@!**~?{~M+vslXmɢL~ZLp6d'}*=޿!hdIszLE,Y$o4-j7eҙ_:@O=꿠O$^Cyص6%AGX{tőN"ĵiH@}|Wt R&]@+&ѭ9&&BL *8EGAыoURmjR4%*jr?JqCA)>lRlSJQՓMD\(]j,W7u*$nºWZ=HMrHYh7Zᅱʱ7ie ],g |:=umNSrϨ=6d `!pCtes)+AmUy.`[':[xVK HXn@ gz0gS|2|hn)>ĝu]? ]%#ļQϦ{s?׼:tpP{8}[  Ӛ@[^Kqs"!zK#9 +ہPu '7D09b;~],^dt! 48pPk0H-6(J^,ږIA}<`&ߥYۗ`2aW|Z5[w ,Xr= |;/8jߘ+}_M5TI?4M05RҋTC0v53*cSЊW^ i!xƏR@j Hjh@jOaHm4 WB+9,+t^'M0|{8PScOA5&LUܬ۷@uop[bt7ӈH]!.c)cUci,ċZSɲMkOkG# P(!T^ٳ,ޤnKzC>A]Α!86M= *abs"o{?ޫE/>$bDd-_1k6.Q\/m,$,@z}\(B÷5 Ģ"ŐA sjNEc&fRޭ5@x6vE꥞ѽ=*R*s!)D:(U~lS삘J[9)[seYRd p+z$W㰫U!Q}m T:<qi`s 5O&uJ;Ӹ;)$+6ARWh4[_b^,0gCӐɍ +wkqjxtY|e?)4@$o wɔGLlh_A/('9EvA`{F\.Xu .6ӭ)I큈%MϒR_%%Ym3's}\Pb<2ugU;<*k||uizW LyQ&_˅ba8f_a8Wi'*k!(WBiw1:1/=.gB-N% {bϫ/R.)=FJWWƄ+z|JjRY": fs+zָO"<>>6D hl!۬aٍgmfd3KHeR+ln&)^pmHbhu|-E + 3fԘ$ (4R/ + Qmݹ~r +1P3Pz,E\闿U tSe&GcSZh2k8ebYo.ET_ӊ^< +k2K{bߧdPhRy֪UN3OW5ЪFsmJ~fў%/Sdaqe1QD!aHgMVf8BĂzuUp7`jf;)6=K07aRs傞1G`IzWl9 a*g:Vw*WB=jW(bY:Y&zM6Vť'4)eMc')43+3sAu*vMNa9(-<뀦5WN 6V(:N2ֳV$ 78PΠ^| RYKjszbaq}M5:Aft5^Ӡr{Ѳ韥d:]SZm,#0 +X$[c5d48WI>m7G*[e?,!Yairv9ԅʱD?mc4)6B0;ЮY.s)|¨=R"0P*ӵVazĮ>f'5%T듙ٖǾ\gF52SNfOecqZE+FfeLT NOͶڝm'5ٖݩBcOͶꓚ>i,hg%~jߧ۶EXƇ- p!#S824u3$-ߖHwL *B֗(y/$ Z@TQ'Deq' 3ؑēz(֟I AУ{ĶɄ39 wį #*K dʾU͹Nh"BYUB&__hW:]0ot'Ԓd Ldar|4$OJ/mKп>J|_!4xtMTϒu{>2? +8AP>x`t {>}^"[g MLY m+: "&ə6Y&ũ`9dɦyV5!b\h*z+u]3c9 +Bu2קb7yٗ6&aKSX;~"HuS| ?Z nMv +BNO"bk&7٨DʶTI\ C]#Cd}0X˝%ZԞ-p²x:uTۅ4`ԏ[(p(^(tKrms߸ ~{Y5;լ#+t271LB.A +M5VձY\*Nc-XS 0yhmV%hc)l~H+M:7v鲢Dzzz{6ZyI*"DC|U5 ˾!s MzZh1Ztmxd5ep%h!O{]YgO5v)ZV!yp4\¬#ۥn +7*|:/yXTTPB1tZn2* ۉ\!*Y Ro9u`*P peF`*)[@Ʈ" ~RՍIMɉB)@!]3#e4@~ gz{7ȋKJEZY + Dz&bqYK*j>+!%P Iٹ]U°y 8~V{,$_z\?eˊ۽]64{ē>\*,`*vk -?GAO_5N,R "iX"!C +Yvr%v0usG[X.%W FTH:snOrw>o=^o"y'e}OD۟qQT6!bA}\ ~T9H5;\)Tyl *]p#Ze:⪞` +T '0JR=` +P0E" +I΢.qY_4z^۸?-Vt,> c8`[ZEü[[\^ۢ-YEKbH" +Wu;J( %KҹG1DZ4r|c _F+*'4 S ѥm-tLdc++%By,yM8"Fp+gMu,$T!؁H x$%~,CۍCR@TqJ|絲o_У:?F|~':ehi)]a2BțvOJ! 5dz3PԈԩ:)Dii+`\l3U OOm=t5{%ww"Q&Ƭ"-fL+ɶ<\k + #LclѯEδRysζN]FĮ6n9vֈtmD9ay(:hNm#x=[u40Rzzӥl-!>=8݇cƌYU]2Zw)T3mC9 WwjHhK8Y5Neqqa0kmCn~uyNJ} OH)7ո* FlI#m)$,zjrEjɱ$>E]yxQ¾?`E̪ z'DR$#b/ twG$0"etq&3 Ǔ臢GeT+g\ZǰLb<1=:6VLSi; I|oO_fp +'|ApɧΛd4WAo(!5A=YK3LfXԠ=7Cj(ùWOMs-=9E[WO@(MńsO7Y;;LR9!I%"#@m9)v hTFW,阎Ť5QpZ hd$ǿKpv{2} @!ӁhW<=#g,.${㙖*U>l wL!=kKUvy%9Z'~VCxN,  ooަUVؗ_΅~0 _ ȟ7~W0 F/:WL*?c:'U@G#4O.ZPzN sh&qܻv6vkRh1aചSSX0*fR'C5||% Hm pN +OZQH>Qsy +R=s"gMt$&UYӮN ( +#hÅ&.Ň-G+L>SzGcq#$NLw>~>s&ٲ v +KJۍ4m4oK5 >gg5;nU4%9ML@\[…5vnn; +M]4v9B;PC7߁ݵ6SKHV/SctC 5q`8󝿋cP9ds~׾+l3>i{kgKԬ=._9bywλ"N6I +偎Ǖ3b+[ ;䆘Dbľ‚蝎@:z3ЙֺoyQm,0ךmε*ޢ9󆚏ÁCOE]S)(f}2|Lzf\Y}M g.l[:& ZX'uqT(…2۽zBA~[ʈTHOe0L:?R:G>v.̐epM =+:P䯁YCϥ&1]dqm3*U꒔+NaZ +QR1 ;jKARC~6D5k iTD4`kuxs| Buj) +]iYkkV/@I +1wMSm>cH]Qekw]J#0u%ºk2nL_lS޴b 諴0BШ.WW4j9^( +,mSU4f Ļg !m*%~KY"( |dr)Ck~m!EoYl!M%ADğd`el"O??:$y&)߽YyE +1kz̅ȏ)Wu5|YCܪ $sfc*rub㒪 +a976i"C!Z9a0V7՞ڞ@_Y׍b.#?ƪÞ+^}e! cH)|4&iD{ZM庸AZw0ݭk2sQ,}QB15df2.RaQ _2>w1$Wn|S*Z |[ogo7H36 ~S|sV˹'ץIBooce!3 ފwK$1\Ҵ Fy1F~ɃV`rg:LňP;YKd*|vlI&{Wu܅^/P/Q<*FzES6JD9`mg?w矬+#K$gDF1^O / +Y/m5V[[k7uyKPbJ,IqU]$=*{ B96*sBDZrVv2(to[зhh (DOG'RszHO\LAN)̏ȋX xI +=(٧`s47sPلpxH 61,wqG}7uZ7N5#_&4սb8yWO&lאjעifgWj.?ϨF.1mhi׵4"eKɕ KUa*| PQjKSOr'Z7-S@ї ([4s,)gE >opN=QaCWQsJɟz]WBJ(qjp6|zj%NfD!$k`O+N*JbX 6PFOߺkeQ{[5x,u§sk] 0q^gƹֿ\lWPqniqzWbxz>Fͫ}H4kTY~浲+lMݬC4G2"BFTtmQZK0G Cs6#o_mP'DȌbA1CG38ov*p %Cm_1~!#*&PBqap8=WZkSm.K)o|0D*7&=Qi).z1:X9qѠDJn)8Z,.mza6&Й :[GTpS4YZ Z(dYFH]HTFR;24rn$-CN0rAoU*]HLE޸ھ +xpr`q6>c6T{rN}˭uCqxs hW_`Cv[ ZY['d"KkuUޮJ4Ga8m܎,eEʕ,@{2'tbz7 !2NS8L[ڴV*xQ C[0OEYuIࡺl&|rp=tƞmr~'NtPHm'^zФ$KnIckސ;oQ&or4QL1[ZB!*^2zնOyzEpkƛ/Q<뎠)]Q4[::)mt%Ɩ M $٤@@ꕔQ5 vWvMUgs~eGAjfӑUN5ܝŁG~xK窸#P"3Xvo`)(t|vQC`SCٿuzgî0w,+!Jt?bC{^>HNHqT{ӳ[ 8+[tpeE༮}ic :*^.,֭.J[H ~`4NlrVM҃}6 $9@bt-6EJ jczȂꊌ?«$َ]'pH|ojiWG~Rj@6 Gكu۔[0uIUU,Oo\i9k=%Tj~Ug.".$frf 駫 jG4*|VgTdeb#mxDA::ֻ$7Ti܊FwKDwg]Ι cH虔M5$gBΛTzq>lW3gWoouEDdJׯ*fYzDŽ*6ZO(g5r";v̝=b{%g d) ռ,B-!MJb ,m9-r>Z1֦7bx7IQ]d' NqȺ@}x\G7Q%U- $$yr!ww{rWDVsU*kO]<麔tZ'moR-&\]al +s!3Fi5'5)EM[&I@=>tLt᳡W-X?X2CMCժp= %kDNyR4a4HC4)ĮBaBtO@®?T}K{З}*ՀZ{ i2H˴Ԕjmң[o)nq$^wJ6saٙ +@N 5vJժ>Pxi6xT\6{+8^J+|@̏縡\i?W Y7!;$OuKfj=ٍۡ'7nyy>z=gk¤Q[%gx1%?|=Φ@eӽ[Tu KQ Pd^mX(LʂFi޽ 'J6$7-{(\qSgo=XP?Ў9e>'6w H}LN𻡉`b:h>51-*zoUD*yG?5Lj^!wG{n?oFк3Y i,d'Ņ&WU]g6Ƀt/hV_éPIEHPXSųJr1ARbeI焸L{#hrtO%SY%,!;;Y'YmXce%SylzM{ v1;$65ͨʮu,lkm"v6-&%8M 2yW0 rfB`:%5BPb;?Tf Z"N"ӹv$;,CrʮHPVUAYA8gXB6V 9S +5F\Y#Tx"O܃ݳZx'rKwT(K4u^SMpƍS|A7 +(kbGqbܚa3pH(w1 3PAt&v jqtHBq}15.>xpneDV_IQ{e +bq~T*Ewh7ya PGP.ڳ7[R-m(,idB n.NXCQwIXG]8^Y)k7nϘ=CU417JsfS(ː*z :KMmDO`NaQEBVKs`:߱Vr wR-h\HrF8q\hI^P5_j?ۦʬX;u:U;g{^2&/UMRwj^Hfhosឮ-EN3ܟ|o2rc2Jд? gݟnb7Ap<`t{Vü2Nx1ii0$^d.Wu~׫=glсNrQ\z yۜ!\#@[p0ljeydKNހ# y~Uh{ro@Q P.$~w!ۇu>r&j,a^YA,%vNW?NWsS5jZ(6!V:QGi:M2qv{kT3J9W%1;^2t_a{ A mh׶딻\ ^%<{ &cN:5cз>y!"Qvg|5՜VޛlK<>|` egVo,tsT[.L)kW>%c T5xyd},n5YI{ih);<,t FNCy8ύGZYJaaVT|sYjo==az@M1N ^*]"Z^ra=Cة ˩[8儾gyfϗ +aXBp{R}ZHedvSF?BG7/t\ ٠K2MpG*Bdx;i,3b5`N5-%sgz_WIu[$ӱ>d@s'sIJWLjf<%g3@Or-rs:_Fj5s0u&l粳sk˚,3d@j{kƘZ#yvڃք MasIJ]+'m\XQ[E[LVX D +0Z-'Fq_ ,!]S_<W}%z<:Dׄ-_rGUD8Ϙf%ޡ&(;-P]ϧjw浿ZufB=pѝ)E 4r;p5s%+٩iD-m05{:o/Q 1')vRO\iW47vDIZ%97NU u+IM*%d}cΠz̀>:Z%u%TcP8ª點[ +[WH1Tn#tq^dGLN |Ym"S9B:0}p郶zFܞ|k)H,~ +l8xoF[ف1''̜k)=hq!uGBʣ ^"omWX"a!p׏nBViP"CK;]Cd̂) +&!6u~Z(-;7C"V`\%gK@sH(+v .-FјJ *ц&?NI Q]ȐhGZnuIqC DRs\"D}ac!7WFk|v%3H GjJJ’8!%(' I5 z??+pPYDH.l%'jアq9YZ-Iw? )R+ڂ1j:PmͩԃtsJ:h.-Ҽ1S"֤^鉔=h?Ebn T)2tn/6ҨǂN Q_/7ڸ۴Ǽ_7LZOLZ|mmӤ/Mg&mg&>LZsILS2iI_iD|O{ٯyIa0X~J`dTiaC&@l-Mz { TiF{H! I} )0N,>'nvE"ҙu*JED9ޝi2V87ՠMYE}Bq^ϵL +.z +l};7D h暑4j5-۪VcܭҜ7?m}wJA#]vzUQjWIPB.ftD&qmz$gbDe+j5>0{I[0 +~Le*.%lq%~["\1k=!X5 q܋%9p[.YS֚`IWz!.z*k^3:3P]s1,SqWǔqEDĦ@8:PW-E`jKL>mZȚK,c$ȆW|` xZNWQ?lj18o[k%c̛.ѪH7+Evk?=ne,)TH,DfA9%gx % Q聛v+B'p Ȝ8T6D_?쳃= f,tnHؾ';btaDBF4-3гe $rp^mRٙ|$?to6\j7aSe$pv9S_#-UӬl T`aeAYѨ_a4 Xk,;$,lf;:ޣ,Ae/Ӧڇ?؛eM:oJBH[FҘ,>UGu We6=-h4T6Zptu=>X%yLE,l&P =6"(0M'Mz볢gϏo)`xBb'[ + FdJ)<8o=t PPR{R( ]ĴvlBSKy3N3V't10kY̻GTOuUnՌ,H:Pv5F399˺݆1}a9:e/[S> +E^a^|5ftUh㊖rEɈO6PMiLAiJ*g)/ Z Cl)y{f)u“B~a6\#4m2X`Ŗz6 &_ʉ 4.z7}?`KH94HD5=BBHrB)ThǯȆ? ?`<߲ںklx1v:[+y4y.9-n~HK'Lox-_'P\kU +<[_)B(gz#Mf 2b-VqPM(&2j3tW#*3"lFįtc"׶Mp"Z;t"x|ɱM֖qYMqt:f\DPNUR *>"UKZ6,2ۆi=} mLڴ]s 3J gB{amil*t2{͢`;,!xYqdQru*WsV%ɶΉj?#L/ٗA- Z߼Q\hc<ó&忿BWY،I=?g|E4>ޕe*5_r_q߳K/N'([] #]2:JG_%n-jN}Ubc*%12T{hP uqK< +O +q"s" +R l9rSl[RM]E:!̨XAjjj#"8'ޞ}0 fQVsXAuA$znC ˦&27 +`?ޮ*e^|!Q:g>gfjk{VW]xpaa(?rk@ )1e8gv:(t/,{c%(%VнmhiK u5Rg/W[;6>B^^yNTz(rѿ /E]?tauA - }}xfWM&' }yQo$$Yd3i!u0,k}홉0U>ʈdC!$AzR4ԫ̒*; Lhx|g]>Ta(sg PY;-(G]fU;@17lI:$aU=[!70 qs@ב9DJXϣbe7DOX<2$~8¿f ?f|3S;Yhغ$בfl'gj@ĭ@,̳Yb6SsvZH,`YX_Ĥɞd|yCS"nv+nF_-^u[p:W6QFmux~*q;!Hu}*h$ZTu[63*nWUׂ-fl4^Sgl%ڕz/գR,Cԥhc/2)Eh:G:0,هl}h{ . G X-abKa4Ne̍OU7g #PX޳0G@(mWQ' +NY-5tMg ?Z6H73:hУugo-ɉ;&s ~ <<8 ] Cƶ2K=cUk{*{>AKOp %ud<MTM0=0j{ĞTl\mKM@|V0N'6h-Ihj})ǣS +gVC|`su(-Kjs2CeEטW.b ١6 *ˏPP.1#TCEԞ%c&c`J쯱ʧ߽r\^8mb_yP +PxDQ?Ժv?P6\b TY5/ƶʣjO5CΔ;<*#>.mL 26I|s`^WYy\)xg48ʛ~/nCx-8 +FtжA(]5އJ]kU; =CaHp3+e͔aDm>--OEƞƊ fD 1+_QH2eBA&Kޘ=\2jƇ&ߚ׊sAi 0w~Ǽ+ޛX]i9O\%&NWTKpYGi8#G߆VXTMwQcI>{<{imoCy!Jo#0KjM$=z2M+zuPz+u(Wޯo4S}3CtiBW \:OL1/R2ikqŲ(d;ԕwr$GQ󟵥 =7sii~؄CAT`W<ғzR lWs Tu3:BaxU+OoXx@(*,n,EuIŨ$_5G(ʥM"’ܬHެ"{) B5aOIΞ-- 45 }f +&:׹ Dc:5r;ng#eTωѳIvcLոxdyhP#Ll1bT0T.w>\yG&90s{ƓOBPw& J30IaϘ{Bsqoz(s^SAr!zS„fl!UJu2e$^S6l S_?.)>kqN{Ůaʇ-IALνe>d$̢pd l +7>5ڥa_L<{g:2p,U/pHemMGz( y|ڂi OVrf P9R:̝M^3Fpڌhtnf(t@ +H73[>ߣx4,y89@]Aks]UV3t'`@1dil.343[y`mVe'2` U ud<뛎Rio?D츳+S!`~YPX/CR鬄 +& Ka\!KP\$4VIu })ǣS +g&xÿQ%?;в%[r_UG@iԩTy6+۾BgHKdEBo4\O +GNf1hQ +e s.lR})=+ +y8fϋhUY^:5j:ƅvl&O23n-SBnmT~qMiyϺ+}>8&EG{H"ے/OYTMiٜIۊBACJ$tcX(Nc +&猪+D`ʃUL1)q.mvdY=S^׍ {T&+Ah2bh4\p?/I~2G͗t{8؏Weul}o4}},.m +*>xSh'q2ϪdhbjpEELrĚv~;݆ 5d "ŀF5C8 -hN6SqC5bE>w>$I:vK oI=7[vc ˀŽs|廿[:ug[ -}avCTkŸ#W€~̦N7ep,nIvT)S7,dNۢ'OLR|}PBy :);o7PܯmaDy41 4UI\~V6982KatHs,?-xB2n+ +b "2JHS5eLfiC7|OLj6oℒ4!JNT[0V&ũ%vcI%h4&: D֚uvU㈏q,I=pwT"W}a:`IP߈DU!Sڍ x,cO̮s* C[hue?ȑIHͣ[]Y)u Aw-QuPfq[c^ +!Lpڵ/U,]_)AT Q.LFSem,JM@ p%A *jlpٖ$Sh\]c+7R~2?Sej`OaNmyo{6YxdKL:C:tZ J^KU1,1J4gKX㻪6s*a-*EK1YZKJ %6a@^Z=^ڭ$ +YS v=MwQH*5TQ0 !FVՀ d;]JRswi6>l'4[P;%KؚV?;èLC2YKyhv[A=&4e”i2VO jk|a!)=WI]o? c׹ʒbil,Q&2ꩩ>ᱧl:|ִ#S@w +c}cc ~H!L<ؓA!"\~V)dqnTuq&2:b굸w5?_6ǎ?w쀜F~ʱy1>Jq^t߿X8Y;}xfÙ%(_r=t4/ꢈQwoM9iEhbY' +/kam.T0*4BU\<#u0rcwiף t%@{rNB&FZ^gu/OK9h9#K'j w߉ KbpsfN5FDթ[hKqz3ZȞ3Y'ĩsr'ei8pϼs@p J'"Z }_99M'kq_xdjollД[Ƃ%?[?ſ#Hp>6^ߺ2X!o'r}׫焼~B  ;@g;iC3Ϻ1'spAb,uВB'E dqh6Ht$l#ě7?!uU8N#ݏAQU"Ltݜ=Yib e?kN@w ZW'[Y7<;SfZ} ?{mI\ɉ~i7溤7>WOb)6v+whyTwܓ3q{L1B&JLGTh>b;EX +G%uHT +lv+w0Siփ|{BmbsDoخu]NUS]Q, Z0KGqڨ鉈ӏu=|n=d̻j^Ptʱ SMPg>tC;U^5*q/H񖇊*VS1}e[8Q[$Zg=5(Eb(?S IF1zޯW[hRv+vt*mrPg38 5=%az_: 66 b%۸ $\vkzϢ*{$.S`)8t.6 rVQk*w^@?eX-E^F;asnD]{lk~m@pOܹ(V:)@Fq=p5 +!XcI97b.h F 34MB+2.ZBwvH'KR<ڸy6' +74dLFxf=;iNNؗ@O^;QR[]95W"rgӦ=&G^ VMS +=z +trn>=.Gܜ s˺/OS1a}u+ {j*?ړ'twL|k F xf-w/{yAY;n+D=1ɴ89Uצ5Ӧ9UȏI>',gp<ZR‚ga˽=! zA]q':L +ƊeaEߺ׀"23S)MAKŒNYȀygZFҪvX=~WÕYƅglFω2 K%bQCضRZGly˖/SaۗNjZxޚRB3 n ~ZY+-I:Bj x/TSy!+}\l+t@%;A'L ;^φkDݘzl,B}KZBh5=Aw.XBql_ӈ穻FOA/ +{~=" +}hr]k UX~Fu XX~K+x(%.2=o~RmHQRh_b5!TSWJ'Y闧n`VXo(,Y^I?2qc[;75E0뫚[u|ViOA9^,a~Cˬ ?/. &w:X*8H4#цX$ҸŪҵ!X̵YK>1#Q|ͽs.qGWu~Iu} PO5_¦`#Q i9i} DofAmkA14uǦ=jb;0/}g% ?l{0=^ՠE$ +Tt**HƼ2L%Sz4,_2^ڦ#22H3| +f햞 la*(@^[I)泯N=>67葅F[yj4" '8yvjhݸ>Z١VL49'~ K:`aWq$T&]3XZ +Y%-b֭t^^B+ATj@+J@EA\d>Xϟ<~SYE<\//WT:頓W*E꾶'l&Qm;@ŵRjGJ2FTR$c#Qk/H-X#, HNg9aMzG|EѪنȆJ¬W4.Sdw΅ys^sbNslbM2n޹ҵ q!hj?Ǻ x.+.1jIED$Eb 9 K&뗳OwcFP7QOQ?YHm~gºkE2zVODF7%gک5xueg> ע"(^;QHzvv]s8$~`=kZUAk$-] +]U~$IR譊iE_Ok>G:-Z2´RǢPܒՖ}9n{:\!=':Dεfݗ&ۮmahRAA&F>=0F'kt"̞ygӫp}Y;b6گjfN'rq78`1$ΏCbק@0%P])qJvJ,0UJg8^ğ2b~#~"VHex΁ɐׇ q|sHWH+$sH+$+$7Bˈ# B/WDoE_C +!~: 拀hq|7 v1&]vM.hD]LjyUSFU6iJ+[9){ `wTP*886K +3B54|~E4P1M[}&$%O8Q^.)]&:/HXLG4%u/Lif-mPǗ~Ri.BέʹK)J&8>RKuAԑַ3{|.vI(ck'EHL6dKNg-TSIM: Ho^u'\혣5.6~9Ż?osX"ZaA$ u>#_j&$36 v]fml3Xbz!_hR+hHWy9έF٢X`r8r8XS0UXI )9]">cL#F}+VeOeU1=jrz|@9az 92sW vhS6TNٓSfUxfmm +kImjA#Ҡmmf_Q+ԄZ>S+Q捻Jiˉ՗2aM&eaJmzW=%3.ISӅ:e[Y+pOfKomU*lQ2)DZ[@O-t ˴hБfZ&x\ )1PtX)*IWbOq[EQՠ+jwIX)0'aq~Su,xlH+z%jg)GS ΞSNIY |Z+A5rDL)G7!269X6D>lNu:sn$ 0h/R5Æ&Jul?;NiԆQ͝ʾ9CZisrF 41oHizowl?ƽnAFݱ㌣j9I¤]e 2c)ql_#6>%T6p*s|Jx?CFc ?fΘ{2q cŎv`W˘>zGAv2TsWMM;X՛jvQlho..B6t\ZwҬ@zG:!wgڰ-" ![wBB!D !O'|!"D!w3@@7@D?e@ _qs@D{|{_ qJS 1*! ?㿍#8_AyE '#?'~B?;5AJWB䒎F@@YUoHU6J6yvKpCw4X8?eC|쮊bs@fԅtP K/GS k.1M[}&(%O@eSu3q KϢWP'+@ibDVA1[-du!LBWS{BY1ԎJN`e\7yڎAZr]B/ޝO*9/Br!P :-h`)NDM84>ȟyuP.L.KNZkM;2/Gp+[,+,YN\cg*+^|,$\Ԩ.K$qK2&S KJXTBsPC&'$.N?K 3 + ~sgٗo*n5VEyЮAܿ'4LJYhJ^y\7h̊\\3.9"LMA] ivL5ܜ%ְw~ j"]:m)06`pԅwdMgƮμ-5RD~, W>wK!7)읭jv +|^Q+3:q;=![nwa{c\Hu(:O^?3}R$Y @:!rAUa:apP+@ 5SLl3~~:Q^g/$ 8'T}ލ}o)j1yGQJrW q8=?C%=Qo9Tsah#;Shx=HvQ97!x}c!=+TnMwc +]7(RE5k8tM*h@cU/ǛE~(a}z/dls)odnY.1m]'l;ڧ+=?w7YR +:cp%2͋a! '7MƺxvSy73s*yTF+YVdmG?1['IZ1@ +RckkGZ35Cd,ٌA*Gn,ҰbZq +%sYv]z"ѭDH_QAl B,A6\#62Q MVoT\Q\y2X C\8QeS %@'$޸K|8h?cD4b +KI>6E=|y@x\5A4e+6/ǘ_J:};}yuU5,%jf4µ,\\ q+.>gwߢpoQ'n1:=ԗo ?z]{鎷mh^='pYV\G%EFd=>ch 9zՂ'!j,)Jxך~egG-I.!Lj5Siu}beXKnۢ{ .p֛n8kxaƟ @sOѣExJ:} +aҵ#+l{u`hY)&c nje! +>ꩿK4t [^kKUqY!r}I7W(HO.S1 #^h!a bh*}h+on_"H4M_6t6o4O*s vZ\lvG-bESS&b8 ~d<7SMcLd.wn{ tk+a_}G9֋NWh\GG_r:nT@n~Eqp=.{!hSXY ^^)s s̛`E7 +ZרAegpIuo+õZKӠ-31 NVNl { @anGA&ZTd"&+<-ቛ7[@X 'ARtݭ?T"(Fc1>Gh}ZV_jIy$nUמ+Wgga8q^ƒ!eWTiOd3Jy}\aL1>[bZ)L|iwIҁX@llZV)f`]tґF?N #<}.ex^WRDgEF R𽮓ʛv}Cau_Jf=`'(H½2y rx5Ċ AV;=&^ 11G qBK2")Sanw;~|UU54E D5i%x'W.ZdVa?&VLXO̩o;wV}57lJPY1t>TbPۡw$UP)fF@Z@LwT^\ pݮ~ +^jC:,'u2 2 ⤽0gYq0ѳ"Wl3h׎saо<;'x&`klz@k`nPAG$Q,Sm8`mrP%cfMjݻgAr# +#Xߘ!syLql̖լD ,>Iv>RI{^",~x]r,v @e XP[Tʚ֠Pok!O7(lIϫ-;= +gӅL[im 3iLpB(oc;\ͶGZ˼#O\ UTXur{/*ڨʧ!"FF_~t3OV4pR9W79 rPo⻊S +V 'P>!yx!MnaZFLxٕÎ\?UZǓ^5#^pʌ;6 > &+yJ.oG E1%fұc2ΛUX +(R͐*}[7$T0\)oV`D>ǂ%fiO:@Zcj%afԒՐpfֲqEMo*9N)/.rҍpA!f%R8P0G4zg&kN@k}hN0&fl {:(@x oKT<&xB1`]eibbHN4L3)`e$h[k* WJkb'U9WxH=sى'ΫB6f[ [NJV~{*rZ+JϜe.ٯ*3Ȓձeᛜ|S -AQ}e9ur5aiWEr.c#ɀE-8pt@s>5GgqS&O[m_,QQ4nAOTdTQSh- +9صb6r59ee& FҢZ]REo&LBevlSއo镎 tџ巅 ׻/xj;F=ԼԲB1RV$2kjzMeeܲSFg:.$9(~s0SD<Qi']+p@s˝ǹF^Fҗߍ)$κ~-;(QE +Ʈ,~IjQQde8nΙTyqEۿ(8Q£URÜjHەF7oHf꬚oWNV+Udh[WD>7a9V@EcNW=V@x4'p a }pjm-*~d@xdkJpBl[£FŃ9ΒrAKxryh,hԤ "  ,GF$gllA ̔Nms_ S;D(ҖQiD|Ћъ>i_0*īvꯚw}.miU +Q6,mҰX&OFڠν1IyL,X^_LsDm-(G'j| NK? )WK +KYt@+QŶ5tP{QܰJK'M)]?yj"n(ʹ5c[߯P9L4.&4#B f zŐNbh:5SE@ד?y :|}y8>2*lg"'On_剃lW$u:AA.PEh +M5Hgf Y#?6 I wUbwGי47B1 +)|vۂ26l$ SoQ@}h<4o6'TEݤsKU+}^QU_%~??uoHoWKooi|-? E˛o_?#o|%%1W.@z^@S>A_χV(#Zc/6_!N䵂C_@# +~z*83||FdX\{(+s?0vsuP6%KT#~GֿI?OvW;vo:/3}gf?쵛>jfKW7n?fe^l\Lv| :k]tP'߭y+yɍדپfgԷ/^7vڸ<\prhC|kmC'Sv2jww?$(RI#ɓiDZI5.e0:lWK7 +J6>wԃHJғw]c@bq>Za_sc@ώozk#-Yt, (QLtAD:~º[cӲt3$P@omb/ tgwp'%g9dE +twЏRͣb +c&2fM3Yw齹-,=YU],wVn>;qP63^*mY]<TڐY1~"yJ].[2^ȃd;xXRTˇأ@ڷ构u/0C;5S=$"諩Ⲵ-3C,FhXw]k*D) +X>|z{;w:i)y%G=#=E PDQs%:k63vTቤ!n~`2F3az9Uojza9̐".F"pN[$1쨇0l{geI 8Mj {%OMתjvYpҭzNtHgE\L@j=̣O^i{ԽԳǣ+Moβ>Ak|jtT k~+pl̟pQ7ۡDcNsdML&/on[nodRv?s;iP@ 9lyqn4cmH Qa f4~xٍ6Q3( iDT\À%V +Ab|(zl 1y=89+ 񈪶:jȎ1= 'v yw1-Lesl*^D/XԾ,{ +EpwR ձ )l*?'r~ +ߐ%7]r+ )թ0 G(q̭|P?:?bzPfS)VF> /kVt.=y[6 +;@'! -[5 +mEws\x|ͭ +mQl˾B[ӞLBz Wx2%RK 6-riΡ䞅a'UIA)&g eZca Tn9%}Cc_w-P+O@+z$H¯rԵiM=YJD 㬧b(jc(X{seƽ``.1^'"'" =>հTB +*3x1~_l1NYdzɰS/A[w%M:޵-I T T9QW*Ǖ^ڗś' `"<\9B;"XfKp ߹mD ~po}=c4Srڠf4ި Py#9xG0]oiCmWUj_hSfE +łUH]%֊[mQq Gk Mu +јgHݘUR~j>1&dw4Z[}浹֫/)y}S$$[,[#R*zh`t@oO<_٪[pԚ_cN̶3(g`/bF:}/Pg ܩl½Q'M~tH_*wh߰(.e+ v^ ~|U:"*gdſ~@+YT]k,k-"1xfnҀt>G}wdX/!!E:u7~wѯ,PEJ^)QBD--c%CJQѺŵ6R)Y Bst>I撪)qOJ#ڔF$E8xփit)k +R&}kZH9鲳VHw*E`u:BXɕr[~TOzEOmۄ4ɉB*74J=!+6&9KPH_)ZʔlbHQ@})%{RCP v6u@+)vTCi#C"ϔhE:7PVx_]Upf:M*XDnODLSečGNI/E <;ڵ*? `ȡ G쓧m̾:nEܚQ#Wēh樄(p%mZl`dY,?-lv*IlUc#ԫ.1wU k{_#/Ak`WšGGC5ڠ/糷~(?>1o{RcGŹw.wMsWc#kqݸjg:5DwH$Akl[~ƲmXGSӹҾ[k+ˣ0KA±uH{ z: fvOOGTg[JDdGUB.MJ}*tJ)9L)יՎt8H +(L{VJ+m3CMUzklҼPvm圾$Rڦ$>֎FG*ɦ2*ِGin=L4䨶Z -m"WJ۲* g'X1PeabzQ\'zh\x1ͅWEk)@=QN"L,;a :*rzjRFqtN~$<jB![A;l=Nv\rgZ55΅B + sLξwwp?\A/i ]&6UeC4KY3Cm"STuR>_ٴD6cgck˚}ukx~ h2@3>y ?4RDՇ$SjTzSBCkp(rdaՆ<rؤZ#gܵB6>'zv:N8$ V6X8̂\c4IU8E`U_,]hW IQ)S&}ѻw_?Z[I[[m-ƧL Q1{-ʳA}۰E|ґ19n?Fu+W(^ZU11+ux$tQb-҄ENH6ܬJtFf˳^{PUohɁJj?/1`#ܑ&η LoYQTF?If*:ѱp %@9D ۳vvS'ez-E*^)qL #s m1AƳCŕ%&OtC'd/| +7\HƐؖ0hqI Gѡ$U?JA,w hg i*bG-d *ٹv,A/E)uRޯa]l]ThRܓ*و8#m*Djd ??^]w BE4qAf<TӋ]c )ŁUtʨUV9ǀ n8Xib޴F8]taϟhrboX;._:ȇۉKlxW3jQ &spDeEjd,d7{r}޾ۓ=ڨEH0@D}gzX?Ajg紏]sL.Cv%4TQ31zBat34"!uQP sG#8,\##r8@t #ڔ?Z!CTu6\6[](>mՙr]b'2m"uA+!J]lь,Ӝ2Y +t'rvM9mEطq(*.vsf쁠rpͼ vM sfghg_z!ٳ^-;/VVQGjoiOńgۇLFƵX1 `Svؐ2oZ6rewzWY]vz+4W܊FFbJsPonߕC|Jd'7/y>Ũ~ovꗵD oj+͖hf$@R7W3#nwp߂R_>yj|D[]%$Ig˼ZE/+{ОqJet*3%Ol-~4[iv8oOJ1D[w#T!jUOv;z,NtɴL<wؼkNdtc`)w'F֖^u!)[-M2o4_?Zjzܡ_Z.yadZRjqҤ#=Xv"ڎ)km>tvh!~T_F>`4c᣾Τ]BQeԫ׼#:AmЊmԘ{Rh3VʥbA&!cX#Y= fP,-lLF[L?0dAX/NTHWM@ZW.C0.nlΪJ+f6K>fø +mwF;]QO)27ͧmxG~W$֖SzR9w_e!$*٫x^MqJB*iLyQb@ iԘl3FьK #U;FƢA.6Hu5& %s)/YѨ/ejT^X|R()jGrX%k)8e)|4Tv atzڧΩ'03h*k9{ь5)Dזy*,(?«%Ɏ}<6^h5z#fc7z$&nt.@9 ҷ^0 ?oj2zL䴒R}[zo %=-6]2n3Konr;i}DP&R/g4WTI+c:apl*תA]aF3q4#AvxɊi~YHG)"K;ˏsw%8NrS.3& ACo(q~$i f*/v5<9:֌rу^8*>±)J51gg  +mјR ,FSCYReEh3ve X-ca V9WEԦs2+>pš}vRFL 4׺vJcrC0@YkG9P©e~5:G)M8+^Iq 1Tj3nz[PSis^,=*n<̴9S K7"R@JMܓ+:߯lh씐G4qn /-3Dz.34ԫάsHdOUWnK,wfPCB9 M4OaWԆ"w[RLЗEz+Cw$=]#J6e=&ff &PưM]ǁtC95k*Ϊ"Y2(fOʚs*Qy΁cKM6XPJ<[H$kTMbsTq6 Bͫj6-2 Me0K92CXZW .M^WVeA*m3ϡTBFj +{ǗT,@ |SlChPP՛otb01D'ON@xJ箷FM3Rԓguo|ku[y^oz4څJAQg%h1>␳C~=`KFblb73O?d}%]ثPij7[8ГYG}W `< 'ƕX;P'nEGױXΈj\U_j ѭ:t>TEf*sk4Tcsuy[Ŕ."ɮD*LIavqBQREeڪIHtD51zMN6邪^ncƴb&fe͈ OeS9\_eƶZ5ON)|MsMlFe$gT)5ܒ!l]ټ0 +X:!<ǥ~_>E0.}%Rۑ/$Լ@~Nj_('Z#^!}XUoRXq߶׽h6e$WԞ{l{,^W>ï,SӷL3qO ud|!!>R[@ 8D̰ ڽ&wiHPe)դƃXZDpLiT_fRGj!q`&!b Y&oWNy# '_R#٥!XIsmD5]_d*ےz"[NGSΞI9_Tzpnt R*5rj"R){꒸+2F*ܪ?k8U~Q?'ၟ'w 1jr% }Cp}#C* `iD7,­@Z7 "~gA@_7t1 *4V1?祥 *>D)‡1.+,=y:󉡮,W|7P/$p˃7^-IrHpߧ yfӋowxd3M1kB{hd -W+mTq.wBM +Y㉏[ CQ YNR3 w +ϑ&-Ӝ"476\[YsWuDVK}0u?ZtϛD*\f&sz" HGnr):EjOj R3Y.*Ђx,K0Ļ굆AW71j+*#*7!NQ/` FD쬨a^O2F:%G]u0=k`}b.ܖW6qj;2w\oQk58&!NRyqq.E qKwBNB@qLx +N٫P[[BjwZ(^tM:;ڭ^eYJ:i_ivɴ4[BU--ڋPPkPn^uZ{J+NkYh r "- +KnM %|jTkcoQb^Z(kEob-kA[W6ފb"Z foZ}'jE7bkkXK{V \{RMX0fxf&VV_%Ytl-:0.iMUM5*( +e+PeCKz&&q1FJ hDbPta4 4r M~+d\겦N鱕c#$_XSnY0}`9IˆPzy[?+WѪ]A(Yի*= EEr"rLDA2;S00J?UAaVh-e&TLskDV9Lf-F z!dc21YOqՕZIps0h_NPYQeÕ8S`Mǃܧz_9zܙ/R̚GNVHk4rl fqbK̂I(9oje8?rޱ#HwFe}e4nDeFH>_a_'sz=v|99`ش'JalIsa%E@Ày<vd:`WjƖrTcnp^~YFhda$r2=:,}X_;lg)W&/+nLcbKx&q2'`BwkG_Fw̙x9˸siX*yqFt1ݫVSuDf¢˖V@gEz.M;V'۹hưq^fR֖bI<{J/0hzo}=ه%*Ww_ o 8!i<j.hmk0rP~*ETnR筯t/}5QTpsj6=i'8 QC>U.@v;:GAo5Dk׮!Ъ v_K!լh(Y;ۨL47͓tv2ZglnV]qug"YhC &j$^u+"H2% 2]J$n)W1g0o'۹%p}q d^js!}f.=g 6ܷӔ>̓E]KC߸dqF".#8>+RX/#xQOCFxWnՂvL1dFuQr q_u@a` +a2P}_yC֭rrje!QXIeɟZ~(fjiܹPzݜʷ(3Uqᙥ) +סa7r(2Szi;qR`: &cZԕb۬ANtZ(0FzUCh0`ЎWv^G/?_o.T*"FyWFw#9,I??Kn @&Y0E?!ϓ\|qlWNË}VAmkSh?.W+xXtP8]rt:8iIȈy[il-v|#zu_s_>CtKd剗@mUFnhv!񲉦0 +Muk>c(TP?eSRhOeV5XVPVX0eS IϨsO>gjw.,9n^OGyºP1ywRE`1.u-OߴiyOX7iS)zOيPα-JDIF&H^_њwD_-0 ߧM q_]ѰBKe z}Ta^ nUOuM0|>lCYW*f(Yp; F8)scJ/}ݰ#vPZA&^?K񳸦^1k&tbĜ^3 nbDcRm P~]ُHt$VHCkobu"y+7ru"Z˝⎨Gv E0X%(j {Rc|Ο`Eַ"mm.뵡ޅ (Z3+c,ObTI\US -5)WN5ɺfN"g?^z>/Yjn!má;f[ٴssnW<GNix9P-`(RrfGjŸ Rqy0wUFFhO6mK̘ʉ+ ֪D*aSV\yiܻ[fpU f"c:Ez'Gkݍ9@{sEoL}VS5XnByumwxx̛\uOHc!zr(w,0Ջ?J|)p '0V?hG&`c[gvD礼MI>}R `m}GPNu%F3 +L<6*s3.r;tDWTS7A֞y5e`m$C­$w>*'Ag*Xb(5U z BP%˒k}ˉ*kr?`ExZ )$uvF*w Fw00Ppuմ`z5+%e-jt1X<֫ET˪7%|1^Vّ]_ZNo:< /m 7\"I+2FN C>9djvhi Oܭ;D M dwB7-BȊRePI\$YvRoI}tz_cI$DEE0f9['Sl~v8Yr%ɇJ L$}M8ęab*Y +ts}uB7o;shWO4?d +!YXk5AĴLc2Atm\&;gݾIJ᪴j+Fo >.bpL'e5t C`R/+*^O?|hҥl2X|'f7%SRחlY;?1x#8kt`D>l 0xqEezZ3W$-d(ds* s.Jٲ}iv-Gx/\i S7 X1ѠG`S#$&7lƁTK'ӖIP' 2P1(vkYafǫCѝ'C,L/;erqr줊$TŽE'8MB}%ӌ]oS;zv#Uo. @f"upL]c7]WA\I +=4ޗGLm|ѱ>N6ԔA[pF\d2Jst0P*ɉz_Q7\j">R4ERp3=2X44c-IH TMRdy,]'R"G"Y8tc}dՄ3zDu_'I:,mxiuJut|u]4@]Z0ؙ |UNXFڗNIgKC\ٿP))rz"]!`EY57KsaGF=ҮF\"Iפ_80B܀s5[ Ĩ ^v\FP>bI줮+-jHon ^PSY6+ gCC }puoǀ3=rbtu +H.1_6i HWl҉<9 _շ69}_؁b7v.X ;aUxBQD?V#Pʰ8TvL0]%ɑ0+ܗjM"\}y͙0e*n#F*:ꮾđ_[ [ɭun%laM UrSVγ|EC9)D{ Vv&=̤EE^\#1?Uw!7EZSλI^z=n/<{_*e-@UT=:FX^gFb.0"bN]FũUdF:(( %VU`btJ%.Rδ]=&k +ˏI +-I`5OQ.b<]&ZC;=?*] 펱D#YΆAn7Dġ>Xߐ>xPmI;鵔m"0D띓 X=AVo']]s75էn v߯Hu>E19< skslɘדܨ֒5F4A0Z3 +VDl~ K#~w kAm{{yQK}ۺ_!JLDZ&N&?:Ul-jx8D 3K=Oa©>M<& h7c502y]鉉)H jt+9` A%nz.1!6%]%\e)`8״ԝWySoa$(趜MQ)Kt,k v,9L]qʨ<ʗsf&$qC5!]J=ME:l "#CL:`S]y=LwVPATwzQfCz($;QLw_SkHXb q2 g&ʲ,6hn3&;,}_Y2ֻuK9]JM>.U +hU6R8Q>#VS$95[uOQt^Q&gqjBRQD3H"Shӈ='(MI!Gk=[Ϡi" r4JqqoCYz*saHu+Ü0~EYn><2J(& \P1 +#VuaMKܚX )7R]/2Tˎez8orw +>%t2 +s9u7Dn$evݳ׋>5xmTX?7/)hߪ pLzV%t`Mn5WdgY<H(5Xxa!`iia=ͨ{}r0թ]MBv)ڥb矙kڲf$QƳH +̜lXOH`З +shZ̻.y/lbe^ xj:F^qnZ!`|4p;Q!:moͽ}Ӿ0UKa0+U>bU֢4-7VWmmWH =#סG~]5֍A[Eh/cj5Zfӫ9>nc}im(~#1w[Juct|8ͱ`Zc^dlk:|g|:/Xʠ$$[BԇSe5꜒sfv;cqrb ѭ>ع!?  {K֐1k|dW;g#gT?}@,ɲ,42̊>X+6" m_kQ*vJܹ^8v#.Rt2<ܦ`ĭ y+M4'y12]zy +n"8yG#San{jyp󿿄qir&K! ,)ś^yfCZڙ0άqql$n"h; 4.AڄNж4"`WU# Qk{NùEa MF8?wԽCc5š\ 2Mli 0ZQ1jjv>:-udJNy=Q,JB1'>\{Oq]\/zyUQ|j$d2.d˻.N۷5) /@/SNNhgWa慼*J +T^P.I5Mg +AA<1L3t A[UCgg$ MZ1IS4$a1{Ptp֒+UiեD!\OzsM "IΆWNQ sG""oh魐³q-s5ʼ-L&;wl_Xفz<:@qЩK9ÑrI"㍪4LAD`l/0^B1*ZLB?zr%"kOiQxX=(;5ӂNi>*ьe\)pOKh{%ЭuT^w-9IM-N%XCptX]NaYG'w.} հ?nzˈ*9WUC1tVqT)wo^| ?|,iC.cj +Jjơ͎5y_[?JO{2 äQjl0L& ^cqDJ׷C苍4i9VR/1fɎS~,HCB}v|ﰍwɚ=,s^ slUjh] w@e +Di2-ܶ3Vzi Ovzlڐeu6#b +252ltP}hgc61v+X>"$=ӏmLS +PSȂx>E3ɒP[eg➔[T'ö8jd:69zҩʌ눧zRg%'loT#.OErL&ju`VP{^gq߯ /󸃅XHg$Wn8lV!3XxctS5iyimkK4qUׂev(98^>qqq^}bcCA\^`Sq˭eJcu}LC8}I2MQjKߺBODzt#6{j<:و[+nW!۟5r$Ɩz_ ojy$H xİ 3Oׇu +hc[P׮T;sg!'s(ЌΧ#1&ܭT,yv9HOtM7f%cfn5q! bYReru;vCPF吆KAbdWVR;B Dğ$ )}.%,cmGc#I{pblm;NN3`q$+7)Fj^ER7$0x Xr ʣbiR]tyiQ &nacFĭsKAH!GK!iKQN<EUDNҜDO6w@"k>ǒٻj^sLA\ȞwD)iE}5 +6S +2:*xs'Xˊ2RSdՙ9h +;/v1mKc|#ޘSxX~D$_ijs{h5(Д2w65[We+txLCYV|" wگg~&6ŗ]+Q|j~;oi_Y'k[t^KzwE-Fű)~Aߝ/KiPІ+4e3fy^ @"R3PE~m| (>\7v䅾K,*4 $LKl] +yo9U".wbZ| 04d?f]:" gݖRNgh_/%M[ ~蹞;E從T%^PD-8aMWxJ/e* ~Vi"{߿%DNG_餾$t|R-[_tYr%.GHWU:t")5rhXRG^ /@[Ik | o{Y(M@I""u|Fk,D|<ֽ?gZtdxՎv=3)I|'6 yXwjg9CII{z0!pH6O*4[H*"w91XRp;H>b謎5R؇{ɥXi%%xlZ/:ŹLoi}Hn6vB+gs?v]{ځ2 +$Z6Z?MXJ_J_ L~F-xIMucBG &ys MҠsf諭w.LӺZIǦ E͚sƍF1nZjpK)EMH]p"G Xl;*D()v(]ͭ'FBDb %rCQm;@WbF1Ѥ,T >RWSKY!LBx"UR UCYV}wK<1ݩ?:KP[!swGfbT|=ݠ.S@lKԚeKЌICȑگlPpT?8Q/K. +Ta(~dJ>qrV\gkի>׍AWi ~e쎁îS 'X$YUwjYCdq. +z%\'s(6P26%COC݅ +:jZ:UPOY佴W(˚IBPp5/! 0*ӭ5zj i퍸p諬<߆X% wRDCӽ)8 AQ/k2v !j>+zs*jHIq}>ԇ1NRdH2Wk1=c]UXnN{~0ڭZiĕ5U_,ɷ!} +gQ9ព)9M2_rd:Ohy}A +ײR ` "\ 9SKvXZb*".Z*>@{NUH7J3Lmd~4ϯE(*WxUktWX*`Dbyn풉E CRpQkNZs,޾\jqYK_}1)Y$yfj]oSe0EfzTvcIv_o:`ė0^:zguTEa,I0{[(ϚjhdnɺH|ydR5nwc.]vt7ԱaƎΪmțVm^SKeQ$Ab& +*ձt9ϐ1EXH6YJ2Fk GΚB< [o!<#Ƶ &8|Av8?ڟsS7~NKE)=֬WvBpvEwzE +Bt!Cx>X1oV7]ϧV00~-]6 sD메t%@_@: V@+Jk@9ң_)Ǐr2cR)ANۧ9<&4~ ?i'̱qZiX=FB1fv|[YcXM`S1b|Ehc&p,SÁq;vdZ<4&dahB7B4vm))OjiwwUcD?~Z 7&N|g(,㦅΄!dF6o4 +a cPzRhmU8L#{L9͖I*ܫ72=-fy3n9fܳXDi6 s.K<`D2FzxTL@#m~QN"q=kx +DF>}+"8iČ*,\ב5&Q5gXj޾ZwC= zApE[>7.z4E-1}B KMB`[JUA=WށMzqtm_֔x']B=*k"YiDi;Td$V~l d{U;<hzha4ʺ_tZM܉eFQE ;LC*Ə`*h|=j $;UИԈZ +$)q);hesF⢠/uVh8nׁwƾlg+ts*b;{- Ž;0$6!۩_aL4¸=קS\:&R.sVWURÒ`M&;t[r-i_>zwC=^.SX#XOn-MMg&@n`]9mD%Ѩ4@H"Yj +)Bhyߞsm4(k9 _fdk3l׬ɳd +~]bgIU;lb\2j4[KϤɃvȡKwڏ~Wޞkz^oȕrVk1iN xinu6f\\ǝtxl*M㺱 e2yݾhXz;_ng፯HCdqysoznS4"T2nH˶ȌL#o)<I[ +k铙7:'*mo4ܵ4Kyo„ a9{2Xo7ur<7'n f fpG7B~~f:M~ff1` vն+ah1 +:H)I c- MC?"ƂxMQx%e!=.aƺ[Op^#D>˶vh >)XhrrC׼90>^Kݳ-hwJm.{\Qb %W9N?s"HO*GXlޱ=)ZnuCncȮ0`$Z8x8xH ep!D؝s|_A=;>gl6zߥ= F'Jiߙ۱_,u +uFEC3IotYpWrLpajp9Q}ݱcOtd@:$a,j(CEOCiZpszA _}"4Eb vrׄ⤖+\*),Eb8+DG횈q-٘G/aU5#07Ct XFc$-fKד꟏\&t+BP]XL {']ȴ +-z ]ەݗBODa UΆ}Qs}-1QIam@ra{JQ,(%FeH`]^+i٘Y=,ؾ4eQXԚc;?`yF^qq/)l~EԀB7ffQ &\w9\o$cNb>p6N19ff,NNzu =S D]`L'tiQbjê '#1sn(Jvۉwk'qJ0DeAƄÄi7uWue^gujRf0S й.TٛAc+LxMr6c"1 +=% S{'tkA$,S oWv4QO +!th/ (R㞭I I""]?FڗɈ`AG${4`J|8.j) +1+]GIeP{~##RTϬe꾒9W;o\4 J>_ MUN;8jG%NOD4aY4/L:Q~0k` rD +;F@ 2:Yo>Z*6霅 +Ձ=ơsh2]mrN2cұc|bYe7g S-ʊH&ګ^$(`R%#RqdW Өi?ycjB_x5%j[Z#vţ'nR-؞م沧9),Ii?@Kh(:d߈0sgjbz)V pH'2G"]d7;ovM=̀4ՃX~6v5ت}j^ԫ۫WYYӟêj}Wj;F~66~Qg?dZF86ШiL ɩר}RtG&W+Dh҄>E!Vts0_#IH I(0bV(hO3\ƥZ(gA 0ИU L qCiY r9SǫP^,eFd{ݶmˢJ֥58},E3i[g]X4ns17e݋!̼(+֩}fY6ՕP"7.f~(6kNU;4# rvsPNuFEGJqF&ʨ( d]$>,dI%[V[=1lg(8$&ߴof>ޑXڐO#SR'Rncj usz;F MH"R4"]cZ)hP3T +eȼ9ҝR -Pls?=0ug +GBc"@ {Q{)@,xl{[`:F%풭ʥe@`d6-4ILL~ B{ƪxlDzEl6f`I-+]۲;g x,ahKY9˸8:J<Wue Z 5 +<:Ntj g:AZEHwpNt#/oC h;~j 4TN%ݝsGro4w Gcs /ູADXvR;d'V!5U: +S,}jI6w8ҫ˼!J+N{\fJ=Š;P ]́"z{.\FoF)ͶqvT۬@.O G5<7,ԡB-LI*fٹ@ nB4X8㑪`&$\߶b>х,]7Qy֚OUDmnR֭_1l!)/sqhSbx~VG/gqxRSgoYsAXBz|уk}RmJFVWl߼MQFK4)DM\;c( ;w )B{|@ <>@Uv|_A=PH+Bڑ}QzXrtLGwgC6zv"tLqBC6Uyo8".bh'V#(CP9KB;#(jP0jE;60kj`4 g7mj ϳɆ sP/=uRi0FPY*o#`($Y}6(oFgά886H46Ec:`cyAg6oJNUZzȬ>HgKyp@d\lk0p]pejH7`!T;5!-QCѭX +qUq@mZbMT]"ŷEWA6h ;CơzM?j "kti^ ӏ5yWW(qYRr7!`j[*qJ}-z %' +P~ 'VEbMcASL[`z{̒ےު2:0HqQ=ElfqWwW`s!&(ޯR?5Kt ^>$KR2ǪE"l~+#>/L~Wf*ۆ$%9 )ش58r;ReV0Ÿ]D(UX ߃cVPִ ;Q +dq`u7Heυ;@3)y3zԋ3x0DfXŀF3*NZg\YDֱZGx~z?g8U/+ +6,kɞLfb'&ZY}6ntt.(]J1E,Ɉ>8ˈ\c_4R}I4Su-\Y5[iڣFKS]TixnmkO{yYAoK/ia˾ɓc:l |Vg~~(sw2ןkoYH}BQw8FE}V6VN?ڛHgm2Fg/yO +Uh:ƨbP.6$ɓ^|j'mj~1˂":ha[DC6NZ*OFnj(EKK"E`ݍS"J4n"dܩ$j|HنDi-z'btb |`UB+ÐWXGU4R}&y\l,Xء17vPK!k՜sbb~m%3A$Z3XɤSaIeag^݄-g ס-m=S&uBa9Bu"64KIq e\=l*-Da/~ %@w5RqEւd%^E#@y ڗwx";%{7&Dl|dw"Y1/UE$ok6ufXqqs*ci)R)+IR`Y,s!9-!VmEli[%Od*Qus10T@,){f''E[بgLfM(]n8OSFqf` +Ҙ#>E$/5H6}v>Z|a_@8ېZ@se{Urtv%5>AkpyU64Qrw#a1y!cNF#(q<,[L~9gtɮtqN^PR߬k%` 6% iGBF[PO<#"T0HOVah =˽i4󐎰TlaCRYDM8'Q)Pc`{jNQI0x TM1kAw[Q:]&wߐ puZRHXIGv#KP*j +#GFk{NBw"LF)FOѾ-VES][H /~H zfAӋ^''l%5JlBT^ k]mCGkqϩ|uU4gTR9#|:[d8jG;Z3SNeeǷ̪Z7 +X6yGnLRMy҈']EUzDocsٺg9{7G2dgh$G }g4d +{ 4 8PX2w̶c*tV 1Np`/)Ҫ&F:OP Zf*c-V;f(YL{7+oK1BH&ц*v[hޡKreTd${_qK)Ys6JT^gM,TP(aPo&G+iu}/J<@z0LؤKuj !c(:fd_Gʪ\Fyeö0iv v림NёK<"U;AW7W&G.7L׮R}@FJyg$I$I s'N +x#$/է3d*~}LD 1b {cU܉KhPDJR̙yd}ؔ)iᐁjo eU7 F'zx1Ss3; Tm+ꨑ$,q/2OĈ{ڕ7- ᩜŵužW~t0)m KzpMTD:L.d3U:=/hP rd@S=m~N Fx+ULғ}uHʑ'TBO^)hHK'0:ϡԋe,-v7|p0\]|bIZDQvrEJ,qOG6c7U)A #b*@\ X& u.FnFE,,At񟣔BȀGbb)xr?γ?F6,q眫į*,mA-h:%\}~ټ~'[7剒-2~ |;i8ܿ0'w~B^2gѣը*=nJfuz_E:JffbxhIcAyG=Rꉈ!hP"J#-u2ȷ849~ݹVl֋YZKt"#FS9RH OIɓ)CRr+lOS}J۽"'7%E>#KMo%bjzy|bR9W Epۤ.u?oJv+p6^M\x枋l.,$% ?֋Ve !uޛF $hX*?+8QEVAbo`AԻ j"@@Rd H50Bլ-jv3~&C0:/c0#eNYNK6 Zlg`Bָۚee/HžC.9#Ƃ'{v6SѬB7h{8S+PMZduVtޟGRv c6ۭLaԶ0b9[5:Ik\ԤH&w>8K0-e F?$T5;@{)e^Hi>,;m:>Bf;ž -T!Z|jAA[KaSJ+ZTVk6(|װeϠMb+`iZ:UDVTy7tB @r.FKCC_!%~HSY0?=j4{Y[kq +2t7Wtcx<\?{=g-rBW}rF_58F)oݟrPS_u8Z`#xvI73|mA`R Wksb]t +|-Z}5OIaM#35gZ0uޫ|F"!^sPsg$.)X'CA S}֘ʄAnٯz\jrJ \oTj՗y➤ǡ`gTilf}(&5L 8LO sɅV$zS[tj%'v(Ӌ= *m|?-/dFaSJ$Jٛ(H5ӻjǂ3+P$IF'_ݷЖ_nJ\\Ghl`( $"VXgYvNw|P *r.Ov锹q[vH>L4}n*%MV)APOs:U쟀 7%S+*Cր9jK]EQt~&$B7! ׈$pC!@!ۡTk]3qdD\SoEʲ/oHXCcq?pUA2yNr*Ň 4nMBmDo͵ +$(M7ȏKC\YF>{JJMr0mgt5Tr/={պN@7Cg %b |\c2|=+[N`l#,rn:gyxO7?cTA:2xQJ7>R#X} +Ԑ:.'+c%i*:iO:VM=Mrr{6RDxJ;Kʁd21rt*&sBe6uבvЇGw%1V&a^SmFS\=z#/U1!.̑Q;KLuU8""@jh2ƱϮLs39q>7=:C!%Sdߨ'd!F1O84j_wGoRMӹ9T7QE!1 +n*h3(eI|V#9=X(Vm'MO`U] Q4iN)w(H&9ԧ,bINL'd{z[ G `σKD_]O6J5 }D*M@(С͹3=VGlj Qն>"ΉSt xg M\ǨYiG/v)m'"o"Dtn wNU%F8P1ru҃L ImI@)}B'6m[T :[ [~H2O5JnR% +N N48DVr+ReI4m␔+|齦E'll=lXVS%f^I)!U!YRݦ4ϨIO`Jkj)kCia%r**SԴ% *Xx_i\ovY^.ё*ߑ. +D΂#5R&T=S/T/c,TQܩfR$-KY9>QD Dn>"I ﳾhx6}lzվ/̎mVȭ3BVJ6cdCLLl-'H[xvhw@7~XqMvegK®of'-tr`7+؟;=JbNv;iY֌ռ񄱻ݐĆJ*^ +1@VL2LRY,|~Uu.p5EzQvxn0Fq:|!pc:)͹x1mfxJ$d`LYIFrh:ҚLGW'b'_ [vxFs^M X\~oyYo廪H֪RޒN5G2!l%Ò&kG5Č5Pj#gogj 6iD9AmE 8mfx6ae=",D F -:R&z'c+-1ofQ0:OZD#gYԓ7>iAxOU؝%6,4sɼ&{0tɹ1%Gڗ{O$V(U4LJ<'i:I.aS%xyjH^Vb.$@l LYΉ@q=E F+R;mM Ԏ/ͩQQjbAJPS*;fs%4|xHV1 \~#UOT9Z!g"FVYzzr 8zQ,,dnoγNRS@ˉN)!)3R`A1.EH`/sHWTYH$Ae{[=OrlƩ9~0O`rM7S[ѡgMp=R#WKn Jo`^;f pӃ;_1Pw*9YHj M|1rI& yԿstJȁHZD/$f萗쵘 +O#zZιI̭퐺B£!᜻lƮ_,QJ& *1i +YR79& +Rrm9roÝTxsb6|m(х"qQ@ɻwq"WZjVi8R߀Cglr0fzpeF˶Q;QJ +)1T-# R~~w^'y=[m߳йϓ  Tv6lEidF93L)+[[ 0~G\]Ue$7"Ujp3NSƀ.O!r>{GqcpLxQ0Ea@bgVrbil`#G̏`jϒR" H3hZ'233],`hKެv>ڗwxV n 4/`nKjSum o$Iޔ 1,m%JHBR%)s5aǶ6MZ>x۲an;_Џ؛MG* |g&;K)} R|0]vh`LC/lݱNt4T@'/Q|#[kYa )>(dh4̬w"NT@kJg Z0MP\3keb>7T|<+'`漐N^ѷ,BpcX^:EҡUާ癫jDq0z̢>Woshp-m=x6x#fL!nۨ1oE.!#9I! 8^`sPMSQj7 +4l~Ŋ6qW*njeՈJjpLN43YNA5) HJ*۩fοTD%6 l4?RRB"ގpο`YXP{b3G^`%OI6]kB4{ +ޒkN # j#/ #݁#<ӱ=mtʡ8ay:,)fUU,R;i"ͩ7q2߻ZGM(UQb˂7sEm+6>4S=]s?5vISR}M?f$gqmЇSul27 +ڣbhWs[KV1^e C&* 8i5yRpwHXq!-6χ:{'c{Y1xdw.yQ(jQQ{d>S6_mlCWJ +MX65J3O5Pi$1UJZtD}}kЯ6{b#Ufd_/_d Cmk$"hWSƅ&v9`lDqZ:jd"=:E4jS33$ =6Mƫj44ItBW}?y~AXЙ+hcaU WD˕w7>OIY‘2 H2g*:mDx5?@>Cka$IK?I[%-Ilg $AA*)UBJh+PƑUIRyѨ7Мe$LP,m5r_qsຈ%SBPSmk.:|WP"}GqDi%4\bW݄l2FKxYv6QwEX4f`hۘ5&cec=Fݙ(sZ߆GŊ8'VS b\ٓڷqh"~kƀU C@hqij飹uzMAbߏ\~=5ʍHKm7_Ĵ6HFKbkfB߯|8pCkV.fÞ@ىan?L;柪̙'FeR 2Z{dku(~DWK?5'%ѨlxEKAaC!HQgD#(sr K%1\V +dP%8Ĥ=k*UE@QQ5rʉE7'\s^"ʽ}9/U\W} +AoeIa\|$)q,`Xu\'1 +LCLXu.R,%>-e\ ũ^(\ +PEz˟&y\:/tFNh6Z +pOjrJ'! I`3^,֮['UUX%kV_ +@'.ض +G߿{ppI|Xlcc1]=zꄾm %` rBsf }H4\X9u w\<. +P擲KBh?;NLFyX9rzU i(R4NpD[#gW(MDd!NBC +"$!ِ %)YdTu{la_]e?-w+l8[oE}aOb~A7IwK+vW'] yG oF\}VI7}N߆)ޚ~,ԼuWo֥4鸏sO4qM뱻v +z9~u9(@ivZ/9ODjAIκC?LQ` +B_0E X2ٯ2Eh?_$(N5DV)z_ŻbO.}b.Q>#|IXˑzyc4*#\nEӎPia/kz6~D|Ӫ<}ǂ=X8#oΛ + -ߋ/J8?IֶW25Pf +ۿuRv7(yU.d:& JzgY3~4#.+wj2ϓQ[Q`' +&[2[a޵PGtóV_a!-4v?l ۚZ.{f\>&I(Vl1>!{X +0qjmGW|٫EcM<e;8DXߎF3Io}_{ӒN7[$V +zl1{9 Jn!Uj݄t2 +`[{aƾs-W +"(̹lK^k./˵75C8UuNDY#X 0HY#b>Mi }Gfe\E~d0.e é=SP~sW6XN9uVS}iӞPPf<=FJr]XU[.QG J,D9Bpl5jʏ%$dGd\ +qY>BAw"g 5ܳA'Ǿ%.L.;6{j@\Y7s|:=܆g:A1a\lrַzƙ-LZDQV5C9E#%Ⱥm U3ՂSFlT_)b{W +z*_([B _"3-z~*U@[@=Sܾv|K>gYlA  ; +5싌FڰtuP\=s/fg9ȬfCr[5(Fҽ+|)G2-@V1S2̐-Cw1I;i٥ksv-ły o~`FbɅ@џш7R/.hexPJz|峉iz9ήE+r}}:f{_жckK|Gb+?(wJ{ +eͧWT/AD=L]{[X%qT"/zW!cDVhbJo}gg^N5IîT)U΃(ܬk"@v/2*m5cfleUKCxѳvapt;]h72ɽ]ۘ6mCRY!l2kwFtBTaT,{;RMH[Gpd(`p+RJ +kzv'u-K ֣#Dh'ah1[, +OPu&Lْ?w/͕22qSX/Y̦NĤd<5%n܃zX&s<1T{z{ lRik'7{k5Y]ՇU+{2aoB/ذ,֘I@1i$'*sq'gX wVח9ĝ6'LfO٨#e~<+bD=iAsMPF\nf3eV0TH][>v;bmO5L!̜kdgm6+ +KS=zl-da)9VfP̻ޥ$$~WI]~LN a8i\2H1~J@ p7)lz5?e܎t5=qdL~^ˢSZMg\qp +|Yep| tI,DVzhvP$NgMnvW1\}쭊3@{;6}_7;cB̑  SsmZ\qϑW/Z|=gk,}Ei{̧{s"D=MTt J72%'-1a\+aE'f8*NKðD?L/f7Y[ަ>3NY, uTNńq(b$x>#;{Pw%Y->QJOcuJ76"Y{?ސOmqJRQ>̐u2ƫzTYQS .ĉč8fv"%'Ξ?OjȄi($UWwR{n(mm=w"I'y$5?Xkúa](MG=-upUW]׫'%j9Nź6gS)vTkCRw~M:Ո mCNG 321|RK<ʭM#a)薂Sˮ0hDx +8yuv@*iTت r /՛ԶLTx@:(:>r`՞0ԟ7d}ᅉET$YK#I5ϸIoP>EHw,Ƅ1WzK찲O `! pҰU9WyCk8mPOࣥ©3},7 /bRy^bVFGVQ6@CgJ@o7JCY7`uut,_@!#S=` l/kZuv>hhvQxc+gxH7I7،6)#0^xY}LI~+3a݇~,>7?[~]0A۞~Xokc K9l^3±=f7 0 1B[06}ÚhgG +|]Sky~0]'=1״TKmvk_>->.4>/ZyJ;w:!@䤝eڳM| u\1nA: RW)L~Ҭ*5KgOgq;4ܓEnX˚Hmhn_!ovTN 8sg b#v8oe8Y>}yaVYEL˵n,MyZyN2 v~f>KY?|4Xc'Ql3̭ˁ׫B\d@}S᱑W}+B9PP]b^/;vZ}}UsX8-eKC8/}'Ϩo>߮Cs7vt",,(dbȤ2J7u[+*_ k9ۡXZ Dg|g*e{RUg^,^8TsktmiVjjf5Ȭ*wAk%o-Y<4?G0+BLWJ:.uKVIEZ5A} ^G+}T JT:itfA.04R:|[&{SZ3ZMM9ǣ:FL ln잯3P!JI(|5@֎aHj轍X.Wpa!]Xm92ҩͪi&p49VRwV~*yЍNeOEL>nf˯z>IqF[-ொSWo w_Q +GZ?{J-L`Tpͱz#'X( ]C(M-`QlQf= )Əɉx=w?G}n>8)g!vw|kWpb#!˾~g, ^'{gdl<LXoXɂWdWȞLd=&{)S`z=J 9'W-:}iE6SFzy7`jɵ儁m c`Gtsr3H7:ǂcL}2gŵ/vF"iS~8‚!"̐g--帪k|QeGk\({aSQv4D;~u +^!ub-+ u?*d/T:%gQy~_W9 2Gޗc^/J^H.[ ATU֣hF!;և_mw߽|7 y25CykR~B}a,(fE*+RfV*hh7{REC +dhR& jǾJĬh *؂Āw8ʰc{ ѡh7X"7X|LH/'b{jG?^@T%JTON(n:h +yc.gҰuFr=5w^`Cz#=FJ ViC-9d7KsD58зKQ5|4dlwhڐZON8e#8evEK3ʆ$b[O+M#-RaXzj|hG㺨2ue걩] +=.5uO]j3d[f\eS= ɂ-#7ٟx͎k7H\%dVMGE kG"?o̒lU& .cKG9cnϧC:>lZב}\E%'&9s@)lMn,49?5F?O1fC6sITM_2WLWyqÛƔq= 6ME)U)i8IM{ (G Qt=%s".XoYH@(О"ț*̅DP)kT8Jnk.UꦚLƤTLI+[v&xvC(K\NRrC9)n`*,6,N҆['1Vvp#u@^wi,֜\1xTn&HmB,b'uM·<[#v^ZeE&uE"eUImCkgjZzNM+Hd+Y>,DB B\ +G#$"R/pwyq+XjےȨS:0P(Av; kW}{ݭ_i|$c;E{Gv5E28W0cj_aLa[J|IJPBldbr\’Em@1Z*XV)M$ Z7y>dD j̛$TTmki,CtSa:>dk^PJW},^ERk=%摱lipb<ƊM۵q`,b= ^腅J*,_۟ϟ7W5OO%%~W5`ٯ$~"QcPMT%<;>I EG1fjPC#Jh}ޫ-e6D+>u()CBAd T:wxl`( oxGM|5%(m4:d?ھ(n_Q γw(vvPϳښm1 +ږ!%ah2ώB)< +ԟK#QLjsz0?mb젝~WѮ/gi=+ǝ,+ιR*j!(9+pPM_W9 E苖R[<0FM,Go: +AνFTwV룿_6|؛G UNPbuU)3rVaauf-;IƼ6Ic㋚{c7k Nd}{k fq<( nb7HMjݒ'j⑾LIM&Ae)M)rS53[˙TU>pA#,p=/H*XOѭRB Kו'z].rJ?LuډdpetIna_ (r92f2?^~`ps]4dmY#`rWCXLPYuZ@Z/ԉ,U9K$AkkqwsDj`M^_Lf 4ZkּswFׂ5A4*N=u Fm5Y1M"< +˔jyaQkxbaD\w|ώ: V2q*2;4)di.'[{MGc KZpv+㎩3QLffƼ33X8)~Ckz~mgK_ʸLi//F-ݢlM'%+N>WS_kZ΅,1O8K~@CeAN@ݲ@U.~@F}ూ2dS QLܓ#xfT@po{~)"'nr(r3wLK +ᩜõfLEP|~;\7.Q]R"-^'IdM@RmwUic˿Cm?֖ءܐ}#uhsV\~R7ѪB3n7^QCr_:FHʘ;=i2UTn1ֵGN ޸{d&/¯V; :hmUJqTk5r]6>sFP;ǪU./]-z _I*]/b%^HإSP;S'~hBSr5T|Y (5m@OP~!eWU6riX +]ίR +,w\8ʸtl/^Qn\&U +=sp#!d^'O)P\#*PC|_yD~'*jz<]5D 7h'%68ql9nD-`O=y'zPNTgjc| =FT x.JhIB 9|zmE-=Θ>]QGqIEI"0E+J^Si~ ([1}ߕĦ>& +q=qG%U=Ra<_;ݞ'3~cu>efzN(R15h;?i345mԸ>Ц?)}qx=Y-"Hs|GË5 dS{G, aU6·k儡6B]..0LL5$3q +l%>y,w}W%vA ɵ"ЕzFXY!m*':և(9zQg}FDGv_hUYp rUXocHi[/^&Tf'HhSmo92 {Np ɇx hdwjhԽq愘\.mFK}˿vPT*uL3mL0-d[_ۭ@iKUv4lL%wcz#.^ّ"6'*'I挖V4ߗ";{ʀb9!z{V0 0fW{͍=ܢ"f C/(b؃=0YjPZ#zמJRze;¯\?Jz]t3}W, ZM3եUM>8uDʸ Pc bS^l '?YN9^m%5v3ISEAq\u`'{/sJ.bheW'ә[ ;NpӐ֓.Vf4 + o!ŌQEz)#CBPguG BM/Xj:NnC޺_t.X?\2(? vlIo,?*BYK16A#(2e$X [i`^J1ioKzoEE._wD*sQ+"9E1_~QHNvH>XGX x ;?m~2uHj ?"=O + ۨ +*lO4☐+a"l5M,Kŋ >emCw7ІY1䡝d@Ue?T( #L7 z#zcE:s( }B,G;bjuΣW4~S|6ρz?Ӻ}r\ӃVO* 佑\VG>_^2#a]d'Mm z %v녀LǠ=O=t*&ֶ"@b #< ef8IU;CZ$>{f|Q zȁRn )M:OmM[56AӁ&yi +d ;ŵA2fU]:rT,+XqBĩEȖ f3*hjE?N:9[>!Ahi5%"#SR1@)vc~RiAQMR>Qx86ԝw[=<ӚNC;~z,~/Ͽ<} yr7b{ABNo'+S/'ĂH Oww[:)+d; 8&Dȳ>4]"U\Ds̃JW4s$<t!X:#zHpCA%4EJǐL]ճWQvܷQ'I.4RUf`v6>@P})( |vøX캽y9]Ckprfs'M<žf\0)*0=< >8kl]TCڨ,ؤmTӡՋ z/>g%C78^AH+rmD +S<~ A?Q鉚“8XQpFlW-`Zb"RگK59~s'3= g xֿCBm鐉܎igEqt_QƦ#ؠjXRr#LF[Xp٣oٺEٗK5̕Da5oL7[78)3=`I;@=bWi5v%{vnPaӮ}}; ȮmMI͗q̅o0bb{s<`hdji/l+tm?p({ۦGU[֐-ˏUJ*UsBZ +Ɗ7+.SFcdb#9^A'fk r +wOE1tij8WڙNpmGa~+ Q}(X[P^ġ+%MWY+AxwԺ-.|q$_zD_^#i# _ "&3 {ADf`پqP(l^'~W%)=²ס3E8"MeGЫ49,J" ]^Zb{ٿ;4^Ҵǝ僧r+sic?C؝}.~:-~e/&}p$/]y)[8k۷Topz6~o`KGID /%iݕM mndjNŻAD=g")C+PԕиN(*NzSZEcªjlr0и +yfY-5$~Eǩ*H%m&n = _ X˿-Z7Cub1;vrSp'J-RN{1NoC[U%x1bWஶt&"Wy+ ZtnC;Nxa7(}aT痲cejEfޓD#MOQ̋ +t&P@veJspA1b%tFc7ؽ({4P2h^lłNZ>p7\"?i +WpjJfnMN8ll +b8;6Uϭs>_%PrWGlw|ߎ/v1uR10$Оc +Iffk _MZY@Epʪ9jLً5^neI5anzR} 1_cId ҋE-+} +D&g>zKE 3@k$3Q׏xY /\nҌlN.6:*BdFEBdvJxbmnp#eKd׍ئ}[k$>[VCJ{U˛&<>n ѕ|=V8E\UE B NI> r +]uk,Ӄ +4wJ_r[n@2dU3;,ϕytH#f~!|"ClE 'OA=~QiPE&Ek}DW6\xcN~_g{OtK7I@gJ>^!- /ŞBikfa\x\+z#QU2ĥgJCLM3@JXW],(͚C%:~cݫpHQ( L=U])WoV{]AʯI%,e4-ŷitQ:/?z髳8ɹbW`Iн6+!K)5"Iq D(R"|#~RO\d#; ("lSq2-fb?+^aY1N/etK3E$2^7&Tϡ&lb& Ɔ;wQj QG$s῕"l|Տ6#uj49|)y@M;Pq* q$jOyȥ0ut +ɉ4T/ӥq&?<I!GT5!%9.IY+%imL0"ͷ?~*r"uʤ0 Cl0ͼ;"D2QFӥ4Y-ĦA<Y8A.rF8Ef\Zy;]XmWUDwnC ܕY=hlQ+a'894RPmQ0o().?ko#0ԙ&//LHUo53SSʳ88+G8ȣ4* 1:G4+['Xf]dJP\KMڅJbCYilw=L,=kftIs(!Ic8X)@0i#nфc2XU՘FVuRyTyhRO4G>澨.ghйe&'/-Oh9W"pv dkK\ " +YaH2'a&Y.I9;Lki:)!uZ`t`[A|K3\bciXpmh ·oUZ.\` ;"hSŵ*N&itNŸ.Vߑ/W7t*!",9?+KqUe06w4V!63=N,“^.pT`$?~ 5}Aʁ쨶a\)NMC= tOb[OYs<~{=;POܨF(붨6e");9~sԯSPZtrؿkiNnf68{ X[-ƣ=7zB~ɃL#ȯ _|nk=z8DjòkI)=LLAeuo80[9&, +IQT<rs +wӴ҅ZeQ1O"jol|6w]L&T$1=VK|\w]\`cnlêݼIzqgÿ DD}x'ޞ@@gj+ +Qy2ŀ ݸqF>GӴ{౤ʒT88{0N]k ':ȥ"eɲP-!`A.L +KGED̘ s> "ws:.%RN%Z_#O8ݖ*Z S}ھz|h@\7 X4GW)1,XVq2_##PׅA Y$H`"K;uUܗ7e?Dk~5_BZ]m,+FJ'(q !])c6RIxtmRS&Cs+jďG 3mU/◶.Gw;j^@Ki&^}: +BPp~~N u]&]=N~a *5}C=)2_',N=#j?fEmW 㬸ǒ \r"af76I9; ˳#1(cjQ0?45lElqy!bǸDlDl""6Rǥa,Ҕڲ_@]ZWŖFPIP'Qm*g[˥be<*vJ5Kz Mj&:O>7(c ( Qn(+͊tj v:T>yYT,S]ɲ<-g80E9ǀ8~/<&`"[))')'SOhf>nw eDYsn_wxBI2<&L$g'=Ulw }?=~媞 RQm +=KW%n:8 +Y֫.GTn%1yULw*RUe&yn`qUi W&hF|a̱aibqAFޙ 嶼kΥ=ȭC>EC?; \&:dLtd ^I4 ݘL}&PU5b͠eiq[j@!@gđh\OSnCAc-e_+|6dEBc'afo_qd`8EK/SbiYO1[Ve@.^9im,F`阤m(ўy䬐M{Y`]cI8-8l -\u?Gcoj`_[-n]"(Co=CM'(OȪM`c+? AK  C7Pn Q`L:hMz gI|`J +bagf-ic8Zܰid Xmcl#턾mIe>te +tĘfi8?79I12|]~B$NEZV=J"_O&NJDB!XP0[ӈ07⒦ m\K5N|;t!KY'sGMZ7'44Q'/%5& +Ioܐ6⃗ ni) A +VRwT}ƩJ!c/P qHE^4W=> + |Bh8 |S:FFk_j-~hֿh;)%KOm$Z{II5ZVKKOўj4nxT2^*mTZc&_j4{i44--K־hh;vIv4Z{iiF/f_jFKK'6_4FkRh/BK>>o}f]>KK>/^YK>k/}6.}>>)h@+#o70 ҜKYNF]1۝foda1VcBZJL`/? [qY0( G2˜|]Q{%%H,Qh(euZ*'?јtm sѺW"չHSGzrۤW-+?cRE+0&U 2ӂX z%),Z;K䱕I<ʕ.bv$ qEy,陒a9EUdh$_يP4Vts9 ď]i1v~/xE\&wHBvA4\)Yn@MIRDFhLKTKi5dآ厕ER"5W0ii%Æ)Ak[+6ŪB76*D fF4fqR5dz[?bMi` +{.! h?#q 5$մm pv%L/D%c7~3'g :˘Yq+2; % AJ#5nUC~^sDe(VkXI&f3F? 7.R;/l@l|lY^TN D+"r vk(mP|:ծL떳B02@ӯ؅Qr`1\6" \rW«ظh]Hˌذ\Pbe„}2p+oL GZgItkvΉ}}9gHkmυu'u4s`W҂ v@-DWR%A4&dIŲM:efgD8dSA I +:8g#Կ/$w(тz|F I=lU6U$;!\:-kZsշ +Yk>y2YB, X%urq x/(U+Cܺ@ +[K=Sy=ۓ4WWp2]F"=]Kd3Ҳ]&Y(*b$@͞2?-]nQpBI^V NŖ_mv9/1!{D$ ô6w80ƻPaDg;-jZnC3ֺ=>O>knmKBڛr~4p: +f5Y +(s>3F-\Fj>l\4lW1ߌi?mS5Iݞ hX;6n !y6畷_vy,kKNv& 91t.WBi^)Xk$b%zvPԖFM7ػ _$߰;`Vs]ϩL4,NHkCs+S^Vt?*(fԮTJÐrnjd7͵UYKsGW]Ys;B-q +ʨz[,t>9"qk\Niɤ  V琱mcCf9NؔZ8*%x[k:Z" |'.'.ZeR4e;KcyMn/];:yExjd<h^2P`p mVuEMŃO]IV.àYgTݙvڲʘdC(gQߛNnb.}]z"mTj '%T:*gq,!]PєbST=ZjUQ"o +nEcsBFgi +]wu@fpq\SrVcA24l6GИ(fvQEt]xlUфmFG6̚jcs Y 'Z̵;©/2D?ĵ+e}K Q\@;if="LkV:-Sϖ:3X(9jQZa?Aߗ--Ü35 ^S0\h׼mƒ |xak4*RQknV*{NOL-T|(^ݫT?ٞ=1ϟDVb~tͫHOXt&M8BOALX\WCVnPާp]TUV ~C>}|Ǧ붴v@΍//PWB4t>^:TB7 l/ G69)3DG&ft) aK{׮n@x?84=ixGjBdi]}h}uڗq#aQGjͳoGgB/.6nL.LIAj8S$oJ@v޹)vQ(%wS2!^=KeM.sڿ_g%M(!PQSE +>2%fl,EAM)5Pϣc~J(CL$_{dD|fK9`* c t@_dFהFHC:B7u&nIơvVVHvh5K'{0]zDw\ tdĉP?5cOa{90/u6wS±hS{@(~~~q?_KY/Z?~\iTFal0sA[A]tz|iBCE<$!8gV::U?u[ֶa{W"+?7OGQ-CudcNuM>jwurij8N RT#.73gtJH: !oA[ԜR!MٺUf3%eVz\ ,#%KW:L̂e /;\P"ae ??uO5Mo4?uiC=X܅9p%C <+S,~C>:#tK$7D^EG#?xRJ '#?(׸Dǻ +c!yK˜zD1cc5π)h.mk& #'~$jy dj/ b3~ڗߚLh;-Eȭ[J'AÍϒiҘju_mɣ? Ga]#tj,z;Mt!a%# "Qu=~fC nT1j ұf⋫?ege[8TVxcqw^r(uU)A{_UDYEF:U`1J{=J(@"rXqʙX _RIB~Sd{]b'Eއzv"ZiLy04V +> ̝G9q +{_EdQiIYC׸.Eت//a쥀uhxp3m=ƪvf1LH/hcXw+S^myh3*9u0gfZ6v"wXє9&f~/#PѩpaUEÄ]w 1Q%ǫ9S/ib8K)YK픓āc.< 휄޴wVI>$^   aS dr̟(0h l#! -([47âa)ф +ف /V4V/SY4B3trt`]cؗ ${!AtץrM)37'oJy$;z.B1- gI՜+̣dS1KPoύk*e;gWX}qaҙwBiC4ر6 ED{[ñg% +VU(f, I֟w-'|{Ӧ%_iE^7oJHfTiޮEQW.rY~1.3#kWsejkSc{~˥I ӡyL'P3Di)[ <*WZ v,hHrq=HlL.h~JT"cI~e>ˊǩTkVM.=_>+t%*0N8 nxU5hwk'_5 +w\pb3TB5VR`^&* Feж(p!zE6u aTS:a%3y-<бWNj8ݙYH~G9}|%4<_$5fQ(¿y PSIh_//d!Y7@alGEȘ}DQ``􎠊jFߴ|/Tr?EƻƠ9|ޮ9s̙._`b,N?FVXbfZpUK__>^9F'V |e}77|yݕUAek5-ဠVtu P  Pi1 @*'g*ƕzI8yx \~I藊)qȕn#+<0QX{_H't:³׶zԂDt(m4-G?,ějC 8?G&|VKÂeIԙ9wO2 v*8'āŗ5\*U_j5 qizkE $bP[2VSNPi9rkYUAڰA C[;{iW^n|kh#-yV{Yn>DX wЦc'u].k/ {C7J)Hs/ kԠű,G̭˖1xqȓ e@Q 4 4eo2mKΨ\ooW)pwc\.Lk6AƯgUsߡP(tYtQDQ^&XN/쏇q>pu|by神;x19z&DCݼi?vQ $` +NEa<<ˆWUؽf (`Ĩ8=j2. VQA9\WX8VR$yJ"A +pAFF͢<Z]̕lBfAIu}Y«\çeC)b`RϻX0Ry,@)]3 V48ce +ޫ Ty>~.y*V{6=5b0uմݍ.na@Wa5O|vZWt4KB +ƀG3-K!YssZ*`JiɠE?:/ ^^s2OBc8#S 9v_ѳ-0RKm7zKm'U_,6o'vd0t+o#')Br'۴T.dλg乹EQ~P,9 +2ʴ{0V8'iOshf^نihMh>s OgBLK\ngq[K }FKSӽ-qP 7j,9_&6l3B1N+Q˰3&ڶjkɎw.!VǑ xWftD~%Z#7lmyLgÛ6mrB+a~iZ ^IyM8p0W ML,?@Q6?yFhYkZ=@>ZG,{d+>,zX[2?aoco |ikԴ<&q*S2վQw.3q# ᠂XJ~?m|,kw)r;QyLE{LE87Hܐ|kR'%޲;ݖƇiZ"_ n|yI`Nk?|VSW|t9vtgqoY 6>!IGNܹ;ʺNn7$heY[N<6^҇;5`WA4 2z~EM׼Jhu#܁LsT"<\ [|!Gk`hB{jk/FC>g'>>JD݂q$: hYCB:;x,zv/~څ=9/tjW>59|vxRUuY/o׎q_,0Qgmmk9:)Haxp$ÜbxIJ+V#G{ky 2gYMgՎ.jt[ŢTaq TPj>uXAcNUM:9;q'GĐ>1 NZ.}Ê}^¯]uиMZAUUPڵӰ\m z^SE(9H]4b5zU|nq/tFBg<觢@'E u ͏ }vwH|9(wn 껟ޢqC8^kFQ"rX}YוnN +5޽ )ױl[heQZtxwkjZ ՌSAJLC6~r|{O|U9S?U+L஄՟Oa>OֱliުKD8:A#BTU|~OLW1{&kvS<Xx`iO[g2vov_|`|@$IcpliH_]sl0N<35 z|*Ԅr{JÍw"aAAo5nD+ft>1KVQ3F?0_ !E7lI_9|Cq&\`cFK~ O!h2l`-|Z TB~﬎}V܍rFǠ:~NHa[\+{seJ۔'!wh.Hi74LTWڢx mz.\LHYеe"nֵM#1Bv1,Y9eXc9nJ9~C ,NLKHX*F<\(yn#2)WĥaO +y- i"&SIh%:݈  r_XK"pЌ"jczR7E eNhF/<&X0ZK`u(*OMj2pYp~e7NetM`]vEp4+.U +.=Sw +doJ +<`K`v "H(݈z̖eܲD8.T'wvnPó4*ЌЫK ݹcb59]̬{J_<͚^~t6-{h4 ?q-ɳgD9b]:}ɯ5uhjl*p_ņJĺE'61Zu# S"$K{L>FR$Z@cEҼ`'bY'$L:ArƸqOcA\-@^(J"͉hS\hJ1iAFB1x]aWeA,Y2Nː)ќpWdL5}"mI@n"tC8<~9k:70X3ЉAPԙj1s-j8rfIϨE@?Bryz%s&E/]76`..n^ټ> aTTYs TQ)Dl IE))nLPٿ+#?3_>=&@ ihO$PcVdG/bKlΕI$>GW{қ{8·)iUE+mJ8 oAwP+ʡl3T};~.4 7d].zw_)7CVi102"{ADع{4K4UC?걳yJK c~uW{c^ZpTJщ ¢e:5"Yvg\4}?֏%ZiY*{^q{HFpwmf~[ӛ$񤡀t!P[Dtbܜ  (BQ[n>'F`Tw{߆bsTT[ՂU4G}GK߆Yޕ0GdBYmX~pD\$^Nn$xKvEVso23SLxw<'q`SH ksiK6<ܰ$6%|>3"gmewUωO&;&OJ +籇q7ʂgQ'T$,k뒋e0")!A <ѽ&) _ʼnNiMZyӎAFv/+hw7fj=qqWk:ǮCS_~¨WGL:hUxg#lC~d퐟6Fw;4c#--;K˵6oEq.",=vGӉi`8/g^;΀]&Y<*2M%8{t#٫xw߳"fI +2 J`|zE^μQ/JaUthѱkZ<Jf/wԯׇgײOj>ш@TMLy/|LMRFM`F&~w7׹;6sbIqPݴkuڝVlE-g.$ ;PA',ޑ6PY?9PzJR|%Ud *84f}=wqd2_tZ.`[]ѭc\sj56TnO]_>G kL=>{'4|8YߏӦܥ{>APF^FyoBI)~:=3gWaP߰ҹÅ ߍ-M͝][Whn˿Bf}?]kNtb<½Q>ˆu""2W>1YC8@"vnժ+04}U~ pt߲ϼjS r60 +5W4^Lj Cc"Y!d&L+VXڕ /LY(4M>7FbR/q!7dY1rpӺ5:>`ong +|;+ni 9n4+K)`?6,VYuR]VدʻXA_F%{I)JmXu,D22 %xA3xZqQ5jp(f/LTA6*vˉic7u{oE(weÍ(BT]-Ѹxc}% Ӣt[Gv?|cU]vS">hMTg^r2;-U/ 2ьsC Y#E0OtؘHنIU6w#1BR/_,}}<͢~m_a~$㼈d{VHNm kn&B^'NRdd k_l E)H2J]^̈́A +@Zv; + -฾&N8h\;[P?c::69D +(ʾn,Ma8_*ŝR>Xw$=_Z9(Ű՟[%MCz`1DhY:>!Z;Q Ql͕Nɵx[=vfH<-|3xjiJ8kRz[Ne  &ؽ0Ǔ%գT)pR<:J¡EnN- +إ'kL@~v)%\M*I5RYV3n2ڗO,Z36G +n*'B GŧDvJF4'h(5m9L4Iݚh:O'drm;ƣD/2=wubt2 Y?1fA 2\0}цn ƞg<`Pk|qvh{Ynσ~f8ʡ#*'ȦjJ!\΋ovvIܔeb6ف)F|=j/='C=P>\?_4dIR+nށ隈qBlQMj + +kU㉈=vЪaUfPEsw\O$u>}3B d ܖӕ*@947B%U{H&5%Xp^\ש7`/T(;>cEDcAnBr0Be~Q$sz:+.aEܰf'IUf#7#?h>Wc9j(.Q/:1>cZ≬>+W%yeje5@(TRKҠMpVr#5Xz_(ŌL FIx7–ޮPEK M*LYMLRoyߎ~`>P;}+ڂ;/+w[Iڪpc"_hUeIzI 7E wU| !l36F[MV1 + S]DD[ #1n.LDžj"8gTI8b߱ M%_VdxdP; /y-~DRAŦEO}'Hk6Ar-[Ѥ}tؙXדВ% }"X]rb0$ڌdD;)$Mk[Ѡ3?M_&2B>f9pQS֗&8K)oRҫ;;6%7 Q,A]l[弛!$ +o(CF5h?Ī31U ?; 0ʃ1֕0:Xϡ}p +U@"2E,*@@8xWci'cH2Wt&ҖNu JFWj;hsPy Sex5!t2$h2xH|jD ?Qkm4h>i_Z֯ߐ懮dD'&h3x@g)7T'UHDN}  NȕobNdwޟN%3Oos}18ɒSƁTf }rRQ, Tk[.ZG Sɺ@ MW(lk `M٬"φ: RR[frf5][v E'"ɿ%vO]!F4'.?qOdh؊}lFXn7g4b|V[~ds6,þ,TFbʲΎJaU*QJM@׷KZ@vmd; 6>L̚HH" +?>ag4Xb}g~ɺ>t꼧a}wǠ +y2Y(EaNc`c>K6d)X7ق0L2mC yhp*}C4ǶIGˍ %Óh9U%h&,_vuyhw;گ*r+kր_%\y5 NpV۩w3Mu- #+0򚊳]*5p=*ԇ b:<AIY̧<ϩ/6)tǧ`t"=|PWRs_'6O.?mpBtCWanuBCٽ]iёw|uR()-v]gKSj:.WM[gՌ'P"kGSx*iUwXx[_}Rb۵aO_d nӞ踨g~[طegmY?weߖutӲѠ8oyǛ4Ԥ /+ɸ|E8N VT${+noWe wp,-~l?~ t{۲ː~HoT=!aX})cNju$?;N?|wQ1C-ewpBP3휡;]j-kG8˓ s#o{OG[ PcOG: +:@VmCڲH=-p 7]:_i9G5q6Fo"(r: $ C{;+I96ȶ ;6+js4!F +R"bzݎ%H׆Sꣂ ;*(^-Q J~F<-ґ*Q ޹63 .X]^Dy-:&9gYMWCDI,Peł1~IrjAqq` U AwIiqN .)M'(Q_cRgKIW: +Ze&UuTUp mo͋DU'd($x70)j˜ !c RFІd+{HK(|qn{DWoV;UEFGP& :0821B]5aR鹾vox3l5Y+UB̛s y>퓏ߴ.0)L]LSds ΄t2T͓w[IWCu +]qb̜NA~ iUGeH+k]0&vΪ@&L@P\EL-m:U죾YOM~)P fYc +gN؃gsAI2!+Z}vOC[(i^ֽM)ifSH*q}QK] )h˱<&͝9>] ;˹M:RqN@8B"2.TVcsiOLf-I>v&Ub65iPQ_Z_4< oѡ3JƤq7 +~g߰ +\P΁c0wXg(|㜁Yuc-5IZqW߱ Շ]Ƙ64Kt:k1`g:ƻʦ΄^j0`y.8hNȥo]&bn)n36dvDunw;Sp-MόO;c%`o%#E O,WһS侾<(l־NDΟaT8iM]$@Y+K/(z닧1l}l\q8Uj^hIlbpRH>2:GF42Re֪CNFǠSjm, cmj5i#~Wq,FAg6ݜ+%4}č'08-O٤lcMnp. vӥɎ:|fN4I [O ӊO7H_[Z#TڝhmڍAS<;MeWO5lͰ_AEQ3G\f}G%yv r27cV`/?jyO?҂7<'r>nԠl7~i "m(^׾f <Ƕ{JhH!f,VjX63#pb !2pV?=1+RO +tu7Qb1a#ʾZSԉ8D {'w={&@al]^;Ї5q>zk#+#M6I,~rwN9}f{9"ϦM*Vuν溂$gmo+ƹr.,\Уy#.)IT-ܦaU=dGͣ1zb?Ie@wꀕ '؏XusTL!niIJtrhVn~I: ߒwFjfaݠE '>y4OUc]]{B-BSH'4:I@9Lo~OkI< +s7$G?ωI੓هf~xGPm8qzTA"jI{u\M\MfZc^YMK,L5U޹޲#E5PX5Z}6 J*D?thW66ɝCaф.4UOJԪ{QEHjQ$2\?nm&]6cS~TD~h$eK-WzKy5ҬUZ CZ4clX>HF{_*tv:ij`ek <4J{gܹ:y2Eo +F>l^fp?7#p^T3pp33t2mBAt6 +:ul]YIt,9@BrGFdv8 fMTڟ:fs) eRIKg!Wƍc&;DB"|w?E8S0BB2P&,&ɞ@4bz.o8G/kj:Ms\5e{Ųdass9쒊[Ɖ,M_oB~[f+߁^K1D7E12oYS`yX<?K풟M¾zRLщvp@}}nFʝJh[rvcsՉ+0o].e@TZy&@rdnmTToۿA`gI'Ns֕?Y՗\H*>[.8YR`BNa CÝ phZeNQb[^< i^@W`^[w 9⣌E?{!>FGV(~ !#Arm1 =!emɊcѩ"Bw-w`B:7'7=t\1iE)  +Ǘ+ )ඈ+3f;q qb.NQ$Vzl#Kvlx>5sjS:ItO9@.T_/EZ+ +1 \DΫlaC4g +co}[iW +$9ol1XSYB^ah׻G4'?K?3J\OI@RptIvl?+(؟_uh8 u഼~ڑq=qӶLNc˾ϖ,"}t eýlv]nUe@ksczo48ńY7v06iTbFm2n^ǷTVoZGYr|3:b7Mll,SㅢXJћ UH}rNlVOf5w8?!^'Wx"O] l?ƧN , WgkQ[euOԮ'ӿ]DEO4>IpbD*ϻ/C(!4XB H;͗=]0t tw_;ZU&%HIUjo'M8{WSdL 2cߦ9Afj#CK])%V77k}(=r1*!LZ&h +h(#ooW(gڞ w\y\1I;r`Tkxnxr?Ӌ f{.cƒխ(u띟R| P/h2JBxx.$͑0>|$&?p=ۤ&/{N}qD=I0fBƭ5hD'ܸ\,$vW] e=s~  5p\,J6垘xv H@%BqNb$H2+DG߃ω-'tm"z+o'xПQG~gjqkIbĴkAjgS!=hU-{*-ͦ.U ?9{Dˮ7V݅CO*X=kVU6D74DFqw٥ɩSqj7*NK:^Y9W+㭪"hBt=vi7XaR84hk4{§djXpėeT<̊I:{^ΝCHJqmzh™{ +Pu ǭ//f$X !zh Ҥ9B=dn;o 12[BJ#qʤXNy=kKLi(u--ۦ:͕FƭukoEO7?x3 )@Pڂ-t jnkQ*ZWjfxQ%fcN.AٓsՎyvMRPc3_y[ƴ(3zGr5M&< S@Q <Oօ1򄙝$hײkBl_W%ptpW?*:! զǧyʸTקJuRZ'+( kSRR+`t﹬u*W&"O7VjUdF +-;Wٳ+]$0:u) u+vGQvV:JP@D+ x̭rS՜}ωd7 [DA2B0!F3&Fu@C {+I1<ƇPecGKB9͂8݂ؿkC5ďGPغ: Q"^6MMnAO!lazJ`m;`x67t5aSLM9vXAbo]p0X8t4Le3չN#;y1T7HjĿ"}'1~lz([O\x~9e' + l 7E:$vZrF1JrDFH^ɾ@A6[b@; ϋbxV矈TU+%. ݛG>1>a3og=iY"Aӳ /֎v*{#6lHX36rY:iCbl(@H1EJqс* My1wiGܠ55$C:WܫAL$ NW,/\PDLU.~q]ș<@ENҪ,a]JpaGWڷLO]Thݫ~ADHF;q+Ȱ4LOH@rklkq3$W&u8/0^ʝЀeoH(}M{^v&ttC"{`T7 ]WnD{'ID㫯nX"r$FӘ*C}z7S`]yzG*qޝ=J<>K?pkYhA9l4 Xol2Í`=j-81X! X"A nzOU7WUs"O g,/KE:"Bؿ=~Hs޹RfC^,okW~4씚qԃ +"4[րUj>ǡ b5 ӻOث Ш5fu(U}ȡa{[@Fʆ}r)!xVRxNo)u殁A +Zt24&iB֎1fH)\՞;6`[Lh>pc1ܣqqA5njpB((#* @7y瓪PvDQ鍷H@fIllSkInF *B{uazLo S'Lk,F3>}e"'[CqGp lY,LqMdԺ yE}g&Hcyg(Q (ǐAɀ^M3T8%ԸBc/Ё{LmO/i!qB. so~|AL>LnfQG?DKy+B9qPÃȥrArEU$*kjG:Ai=*ghvX|t@:븢E]FS5NQr ԝ.!ܷg3y A@>pЂN.6+`~5<EEKzcMrn9&g1j{s{OVO_Jy{H^h}\$Il7/E& B7 $,ɯ}Wc|:qQ}jˑsb*֧fgtYO.J_] uܭ$pݾݿ"%;˪d[wDEXnnp `Lo + +>$jɶ-aS H >ܻu4tgE_^@10_)7d}CM_y!ǽvC_Ⱥa͋"8濈ɛ{>EB,/ho݊He Æc'N PluEe=qڠ?+dzpcx ;si/ˌ[`pBq?d8"Sz٣ q:Xqe"lA  TFQW< N8baPs0<?7\ +W$뤛N:`k#_?亲"Ui89pX)2yAZhH[ M'6dU ﶼS[vTsxҭ."v>zJ":g4rMVif 9*~=N0Zq)ƽ ŽFJ*EN'O=;-h5s,[[@<"]"S7L)|>"}!ZjmJjT t/ ټ$܌{ɍB3p;YP3Qwܒ1bPRJ>t Ƴ4`)Yӑ~( +OjJh)X:pA-XOC5"iE?Q[AEKZH2jW%@HaF8W DSmQf쩒!ظuH0G.r!^ePICu:M^|pX1c}3|7tK=(I7~X"Xv t &7HRxQtcFiLOGwЇJr.Uk{U=^ t>tT +l%9 tM!TВ)'  knj3r^O44Z6hwuu5ڕy`,ěi@: DRAB-"6qȂ8ԑZm5I$;QlfmKOv9)OClWC[K>g@(/EQ{Piƅ))SE+/i<MrP PIXElCk=QqǶshϞQyX :%s'E)ϐؿ|.8śx@oZ.Y4,\?@v!IVbL-PxYfv1F+ +3xŅ౾Ajpf'uEQFu0Ωv_>߱#h6:rzp!44r/q~;@0PD +ژIF=bEZxAd*;͘&J@2J8LZ|Ir|I +(f[qx# )ONMYH7(I F-KAҜxٰrd8U39Y$t>T7gȌ4#hIxX|#5#CkU׏Lo錁yi5CWՖ(bǯ("o7ۇ>5&g})b_w/ +:GtZ6=+NܨiFK,Q$nh݇24k1|ľ6?mimPl5NoG<÷"-.osm'%x[9 C+A;-0j"@FOUzYܐt_"~ɂbkaRlt!"%̖ +AgIq;ʂK-t Y,S [jW 9: `Ğ0g+\ey.3ap䕷_Fs:WzmI!z<&JL9~ +j\Kd3K/K0n +s~|j7O^4eMwVE VkoE5~<)r6?#=W<s8lf!;Db-{# "*f v+1cmAN<9A3F8$O#>L_nix=.XK)X5wa\֣)[už˃ȗSvqIc{1 &y\r; q%37^0d] bO|3v2rDyvr'I`m`T,bR&O|'W-w;;T\v7N05F(x'h[0+! +uӫ 4箈a1.Ӻٸߦ럱 c3yVbq^Kzf8.([|XzH@m»(ِNKވi0fms[^! +"fajh =Jn`8N]I@󣣆&v&!V˨gy0#BƼ{EmI)XyCZAK:؅eXAJg# >@2Y`>V@ҁq3;6؀ ڿEq6:fknF5 +u̒c\A Կo/9BٲT(V&rQ]EjbXbёPh\_}pb\}"Tb"*Dݷ  ŎϚ4,ʉl<6Z-Lȭ.TzC<˾:l>NH~5DwPeVQ7d.4 u?7hܶ~/-H=B|&66ުnXߊqy #ڹ}Qc3 +:ũNJޞ9`3A+E##_oM-epMki"0&x@.H׊[sݠ9H̵B1.6DJ6}r5z!F\*ߴB-4W@W(e%B?"E"˴ 48X1tpJl1m$\ФHTyN̾iϛ]I)ʮml抷+J8$$3Pգ1a_a'"+.b.e3gqb +I-lؗJ[aqG(CnE/FG{{6qYws,Lu<~ OOJ*A*ѩv*.SƸE+R!.+T)53G){%Ç|9`..D] + + Tğ;cz:QhmjD˚ 64H; nfKr'zWG,5x}l/iu 1=J[FZ^, KFaš 5x2BN5Dɯ%>X.Wyz۔@3t^yd#@hu|Ɣ3hKeEr'}5 SSdxB~68p{H3Pg[jbbsSvc[!S1fբL.wƮR2v^E2](Fl&oԮlsԈ=TvسrC7_q +-|\D /- +wA~;r2n[)f!ҧޑ;kb5sMzͲ~ 9TY8z!i׆9jkzί2PXPTHE\NjX@ cfD`T\؁f8ylDr3GͅHL !8-`]aGv$SD*4GT[ 3}.DdZ-U?caM{K +T +Nϊ$̢O'yDIRH:_U8AْsrtzCJL\f2MU)Z+}Fg)%ĕ*l$6F5?.1޴mSfu^gK0P×J0(v@X|$VKpH~J$ y^g% 0Q5m2<(\q75wȲZwIRt32s tȳ/A)[ϗ_njgr&AU}`F @i"_|'^ߎ9k< -@`4ѢQ툅X-3sx%G;<(&OPG$ቐ&[K] }|i*Rӕ R>L3m}B g :ah Z?Dt-:1O2966?QO>L-91K",*2+;v=l:id7a'aĪ4FQ+5=C՚L"i'N?o0'kbyOӸDN&3}}>bELG\B[{VAQ߱៯Fi;ݢ#HGCzɉԮťVH[9`R$7 K{:xD䀲eJߖ!>4m&pv֛f-D< _'!0!V߯>+٭RCRQU%DϠNM$V.}Qux5 +);xs%3=WN%QNM)P}!a1.[RIvU<vT\c3'LLs3ʹф'Fwx3O6U [NvwGH` G=S}F;yF+c] N\҉Wiȁu3V,Wh$wkP{mGeϺOuYNx 0F>jfV"r~4beol"s vQ`%&<k`rA!+#j\ih3@o ](rFuQJE*x-[G>N` @CeeJ4& +vӪTe0suGT)`slzUF +]Ta(>!C:"R]]' +Yb{Ky՜V-:E)]'PiWjZd*0$P9Sj,'%H#EsӺ W&ߵ1ޱVWC}֜0vR=fJu.\dtUW +Dкmʛ2"0dDYLuYI "a@ՇkwllCO>O +e{qSbu=n3h2N`\>J ~/p-47K~٥:I\7U;@;]iDuQ7)(}}6+Q4ڶ}$:i(Jo}=CvZײ  c^jmIO]ev!cz s&$fbA0!eb\t2 |T)h۶;11ZrKE>=hU%=͏ b[:+vjNfJ!H] +F}ZzpӲߒ +{bؼoBo!;hO]D'cs'ne{?%$D8nިQ16C>,&e~ạe6WWzs'[ԚDzW5XS֯S!߳|vdYr[Em}}&?5D@,wRRJ `Pxr!hJH'4'"ER9TXRi=m}g:ww]}ꎮ8Y B\50jYD_ TӲ̢y őe:/w ^Xy];Q1^rԟU;(jz/6 j ؇p =q0OJ)"gePM^[*j 37/ںIN࣏ +څSTϱ^zTZ{sOo$rH/>@N`jl _|0ƉQ%R=$ +Sy[zy.tŁ˳ZoGEZ ^vAaW ni`ܢe2\4.jY{/~kٗ,3z9*Zy69lHa Dv8aa9W7(2[ڥN%;e&%ta> ^.zIbEzDE¤aj:i&bg7?aϤYp XpݹQ0-"(qvJCmk hIn/_7b4cp5:6LIZdE4A9k=2sW[\ه?Q% .;$Kf!UVx +uE)geϲ ]Vߐ4VS\a,[ܕL%e3KEmsJQ z(b/ rS^~$ M(dFcUl6YplZK\8$&vkS6"97^mn9_\Y~md%LDatۋ+?Z.}5z8TkgĠя.jP~kf#rk՘Ϲ>&Ӡ\{g`ehѿފ(-lZx{+NӶs DR&w@tbkI!LݯK{O墘$"4ȫA]*v@!~> TesJa;[I'7. Gt6/ʔ?ZE8dq(07iAiWszҗ*̲-L@Bu9~+ T[d<@xl|ƉB=7ZcFČfe^v.e BiUJ~=ŚN6yK5b.-|(XkUs]uzyΏ~߱|Ĺ~KJQ[BeCNӫ\TZ¥Q* ʺ?oC^yoӌ}r+rP6yC#e\5; Ijo]fR嵧sØ| +q~KZazxe7Xv}pLY07.k07'^H۾h۰v@`f]rD@ c6%:W6"pϚ +x",h]^c#.^ 8ÄލJE40U]j%}&ڷicE}ilWbi9MU/u{֓s+_d9Xد' ćnZ~۹ΧLzU.>*z/K +JY[iV+3'`]+Q Sf\LG2xAlI2o6 ++Q5< +q"0͈0,68Y"5<@r7o?P,NPG0U+ԉqȹrR/e%ij3G-m>'2֦*<ql(Y߮^'s̐QY +C]jM,ӳ')92;}×-U)3վzV{7j|h{jƳԭ9Uu(RҲ^]gҗ{ݦVq~z߸l6uŷ)JjX!}Cݭ1\ObzZEOP[;VXلcZU9$ݑ}8V$8?+%"#C/򃭽_\*(Ӏ @7JD5m[+\KADP z|T7YpWF^gb${0LCEKo8Bb1ANxQ?mA׮]Ftj- D dktrڝ,xfQJi'=چwt5W=Y"jU՟ezx$8Re4v +_55M\m\ƠD} +PUi<ڮ! +pK +X럶-UM'sHcGXj[nje<9wo-d8V#wlq~`uWR~X 3(I6HUfP@sζ$sI\fv5XONxI)~s(5[)=#PzaislG5𭦈jj|Edf<YTsN* BSh㑧'Lt+f\~&HW62 +2shOOꝄg HAUiޞGFڧuN#߈+V{3;餣`"Ā52 Yԉ;fO><})7VW3q;,kyUg HC-2]@KYR4IJn,$޵aVd]mLc avF +d %'4{iDL?Y$p+}]65n# +S# UsL:.iQ?ƏՆXFҟgՎ~bFGzo³VwdVV[*o:3p -Z?K˶esfM6$ժϵФE2$S H}%AڮY%'h1 Nd'[7,lK}^aE4!樵ˀ ?뎔 %_WBv",@thUYD|S!RWjvqܤ?@-κ'Jfr'R +3xZhf*~7RJ"|zY oxqa9,7-o=ïU;.7º@fUyo5ưTj\_-oҽ's#YHO*IUPKAnb4읟]F +.oFKWИ/b?P7! u]a^Ⱥ A莙9cRT݇n$nF Kds}4HoPVba<d_JTq4_6vJVye_# X@ϿCdS!vk +_|x7O+MV9YŽ`zdOW@}^GS>s=TyޣBjz^UbDZOcv3_ꩲvV79RO8㊮i,F@qV@iVBK%jh)E +,0Sv  QeטplbZEUaĩ _-,+#/Z:7J'V(q"It7my{mc+Ll3^*3v+.{F? +U4lg 2B}beMi j^p-6YEЭK[ +0̱(Y|fv}eJ o5˫q%9RQ>NM_y(ƚdEyCBnuߠ:煝z6?9û9[Ղ +(8aBU驮R!:HiJSFV^V:˝sE'q(~<x9Wh$\\[ŧQ6ZKF (Q"Yf6TDYE5*hRTW=Y¢U՟cz#Q+Q5qp_5mo\4 S\yv]6|7n[Z"VN`$n>5(%b~+ν"mU{c.tr5íc2.:|mv1J +ޱylI Df=ԠjS3ₒۣ|HJр/OGYԩ~ѸH ŝPA)͏sKSnB;b1[W?3[W-@aBloL(;^IqH䌀ݺ|⿯r0RxMa.F2 ]ԫYȘp.pz +{_SW7<ޑ ,Uqx`Ut5F,,)c{%a}xtOI@I\^od]| :]w<٩lwH8e'GZP< B:曯 or_fuR76 GŜH_9E3RXwa;zn`>hd7ʡcj.7iXxܬmh +27"JA$׋|VFZl#9g<1-ḙM&&!ˊ፾#b9s $;TVܕyH'A;Du *O\SAVA]PPٰC4@ jmAF%/zkiG=!X)ϥ"@MM{'Bfo(~ȇ>ºZ }]kϕSl 41, sa%!?i19ro~r.xN%3' P!,Hg~/_6t_;H_I!J@,$6 +A Ǿ_A 1Zmdg[C%GBtMaaF)] ~uָ:7FLd멕ƛ*O +lG x82:nrW6)Ȯ{ώ>l6M|qO2_=Ȣ9k_%Q)ntH?_PŸ&W(BP}%m4txy섙*%AS +8~ V}Z[uڱ~fXc_xOVsR[R[+ -udNP.ko9y"Crtບ54&A:jŃtlBJ.߱WSQoA(>]a;ʺG %XeT^\ϩ5u@@@J|,?"LlhZn? Y~ʥKnlN<d? 񻊺`5~Q_+*wr1h MuO1Kr21,3{gT4()"FsGz,5G +(>F@=@:ʺ Z&5I8{nT~>éJN H]XQ6'V;x k]=r(g*7 Fx-*q`>~sUAWBø)F(p;w]<Nϙ?A 2F7lh꘍OԒz;s=WmD(2v7y|^n9@-H\"&D4EU Cm%pa?ʰYr݃?hb5G ڀ,Y_%Pb *}q)oo;<^ `/S cPaw("YU=ܩ!=A[0zatY'aKj8Ka?j;~멝}h~v'}f$]3GMũ;-̩Z"g~V,#<*_E=(YX%lK% +Dh3z("jPB{Tdz(Vjn9_XoM8!ׂg"ֺpў BG-r[&CNy,0@@<$7ʘ\BM,wI5S"`I"<@@YJ%Q(t~e2d̄W3[;ڴMQ#Qe)ymWEx3⏭hm舰1lxܕddkRe \)LLn/×eW6+t%ot$S<n+z4R11zU! c! Ai9nÅLՅLjJVbGl]HgjMZ a*|lxބ{e ]ß ;R7 $dAgewe`ARX+]=DXH Y9|"E"|XVrQr NH~ci guV| B xx +#gժ1$vϫb= ɪ;-kj[~}4:D7 +GTC HNtCԗ)vxױ<P/u_Y%x8\E3ּ(@2r'zJ;Aඡj8\X6ǟ#{@=fbcs=NǪY<ŞBJM@CJ2srrfn7O +hH[d@`bnt2ՑEIL7cj&&\.=fJPYBڙ?~A%v{׼#+wH^xA79꓈h `+A%s6Цx?!5ڂu%4[<Ιo}ykNX๡ :v z9^z!Ƃ~ "ԑ3m=ВƼҩe&u:ƩX[C0MVxzҩ@rd>-gw}CT#2mTբǔkr>1 +bկ3qx [oz +]Ut@逄&t)?aVAxoT_L,"oCi M ƥ屎r.qYj`?rK>;Y<>?X{%$B b7NK,<6+rVk]*IfD'2#S2@>.<YΜcb$QVCHƮ]i f a`p'9i%?'s-N|Z* P[W,X'*r;(OeYxl5赊a}  ?鵴w֞DB/ zx]+ki1ɰQǮ7m Z8(2ZZ]Fk}5uY[}c07\{ +O2?<WtVV`[ʄG zrw;#1pcMjۓNeG+.ovn%OXDDI$jA+{,ܘ],W 蓉էߩ: +IWJF):5Eh|dS["q?o^QPUU C#E(#D;)LHHOLMvETtza@'Gݔh@̢OEJn~:dW%s' lo[6nsµIj TkvP}C̝;yMesg{Tf@f io䳰qLQk$5xSr8|;ik,l?d4NS'ד&s@^=<%T{@<'#3i<>Cjs\g,DdR:Xv ӕlB+G 3?OM .U e;-uH>#N*nݨqDMdQU#7b)Oy9!0|B_`Ok}|,3zֹ߾mf"DH1դf83Z?ejCѩ$y7w:[DF=Su 8yᙖ&D17B$]dxt ks\Ly6̶w&V~ otb5;3SϺ6[EV޴L,e[bui f +U'@V_0% z7H; dkHdX!؟G{>o'&Վ|ٺJCƓ]F4QE>C^Gf,Ti+]>Mc݇Qmytƅ :VR3B͇@zn#۰ !rzwrВD6m4rmFߔB@_jRX]R{\&;]35ӫBQ}9[|BK5&Zܚ5dɇ+5aIRu%Tȝq3Yy{җO4Wn(+ GqxN)f4z.k-]y"!:PEy+vy$mڬ߹z+QIWչ/"nìCV'{<_VNy;PV e%j^oΣy6OOepߚg3_4P`PNpq}:ug2GvtkV=[ )nrVeM!j +[6lw(՝Q3V![IvU8Sޚ>%k0{CY{95Qᰪpu/ۨY{"[S{~==gtFjx+rBdT<׍jYV 1O)kU5Ai)ӹ$@fw@TtQ0vȸċ$=S?Ѿ]}cۺڼ'{Ǻ3(Oet/{OC>>O{ *Ɯ'w`m$axuRRGp}2 Q^8eDĘ.D9[JJojll7{,$ssN>jQ>Da"@t)@8l/x˯z3uk,22TH3ffeģrxm&gٵȒ+B1Ӄ  wP-#JX9.I% +>W3*ѫL+zNp~>~LHq&u1,CFY BawZJtʧy"BOP{n"npbRy:&3yAj?~Le ~j{ #ID%B^\b(cϩ/c,}JPcU z5vty'?C s܂KQN`2É$XE- @ųN]j +qO$ʉ^@YCHfD]*w?fG|q>{Ig6d}2}}9#(d*ꥹڢhĐ[gKo_6G&!m4o f%UKCҾd +wlWKm ʝ@eal '4^ l6*g#diV Ha14hn.] Rά[zꤻg ߬)u $)S JMFK5C4_|,Br:d՚V$4S$ eTP[BbL}3B~o;%e'o  zvo&–e{,XcѱiXh8HM$:6BS1N!lI}'aW:o_I͝(583(3 UM}SIg'd]dn}N `k`}rj.VrDGgBa 5-¡Wn~qHN?=y,Y?Tv-lبQpN&piP2uXӳ1RjUnrrGOD =H=;Ya#:p% {ݞlb]xBʉ|3:a$hϨ?+CwFvx3sv,uoSw,- M!:c#RtR>mr|UQZs'KYʹo$9.!%5*:\:Կ1[Y?k9ttدdUߣ*W4i{Wc\">e J_|:|$=m#8w|o\,АVUd.dmjqd + ڐgmx9xX+[oY Vz_Up5r|G$sY UyzgVu,4+ b4ȳ8C~R uvFA7v ;!qcIr:CnB) mź4c((()5Bbu.5YJi|ס'6NƗwՑcsS۬*"$ľ鐕:\_Ȟ:^=;u>_\W+_O{:,UZ;Yky,o6N`uՙA;Kz1 +01%}kn8:kIVd.r +6 + +# +o.6  + +ݐȵSv&pN#~"Z \ɉ%'ң헼L`rT +Ƕ`Vt4Ok׎Ggt=+w~D;(4'J''l(*qkqV CYzC(pe{:w1:' *Nh8&သ K'Ѹ 9#9g.(Ȯ{x%x72αy-a" U"c£k.$ܿ1Yl%|\jBhn`@2W2x!g(Gk¹Q[Dch͝d*$M$خR潻BE :KGKh+FD8ƨ%4*l܀SaYA~S 0bPSp%q٘H֮0.Թ.~bmp^wcj2 l`bMesM$Fwث 5ꕐL5_躑H,w>f:Rb1 `]1! oesgcsI +{"r>'OK֋xAdߘܑM5`sǐ&suerjiy>فOV'"ʚ:b yqƖ임ⰂZ ٩I#gFs/ߗm(A}Yn ^aw +?8^yGOBi.lO <ت7gbI1fD&5i3rF>SwRDC\;%f99vG]}|8 0>zu4Z6~ϋ!?}?Kz,nxmOa=ƉpĈs~-Dr>lsZ 5& He]ciUߤ]X:CYCSU;|M擆@\ E?%SxIw݋lcJ_qϦ#fOL(rbeTyc+(]C$1HG>Q9vA լӛfp݂͛ Ǔ0Qm+9JLN$âWı1%IU(Y~0映ɟs?cI,MGl@b`d(Z;5jN ~,B )U?b/O0T8/[9R(d-1J)gfKczUUұ6WQ+$}ɄRZGȅPs4f4Jl ZЉ/,Kb;yWS"?k7XX۵ΔJֈ,MƕŐ1`VzX65XL+_%RG }t + 6yn#)~vGaɰ"sbOѾzz~7%E B拔A72ߒd:`-$c+IYZA}UwjIuokyIVkBo`R);4(ʖޔO-i6v2jȌ@LT@Ac,Jnѵ轕@̩#.X5*eU|yC2^cD$O]WH}ށ$QEe]$#|,ݚ͋g.7vkj[-a\ah!?IYq5Ɯjf? >ȔEi̔N;c/TrsF=5ժ/kb2=1>,86$b$bmxTRعt=Q ١RԘo NM$ԟ.z-: =hQAr%Bzcc~$D}DdVu K)P2ImxBΝ%f3T%fM[#nIլYw{K6+UXjltu*CJK=omjH?~:ѿvlOw9vbmy@[.umfCǭrC։+5A+N*!Ι<L1fe:rnFM?ٌ*`1M ۷R1hV4`1T@i@k =^?-x? +\>rÌƀ*p{`p !KyZǓA/^uSxXvUdQnQG@t-V|x'o[߷WŠ@rK#%Yۑ{{v5P + AHm-s_NaZVՑvʅ[\ʅMNOl\^ܞ%E.!Xy1pI,Ꝫ4tc^5u̕3|C6Ԍr^e* I;*6iR@;Ksf4Q;V45)6VHTOT^ +65*-b:rwT̝)54uӀfىThNM̰C .VWK{,`n%hQ#SX?&E"cOizY0 +1So7: +ut,)7mjZhW {E{SSkߝ*" 8IG}V AqxJq ^b™$fOjv*,[L"7j]BA¢2u1ܲ f_q>x()Ô&R琥x[B?Vj) iS?D7ݺ$\МnoB{=U$H.]vRW̥&`5 dXF=, GGTU*8ovB^\{7@h]qplPti1CkyJ5~%Ul㠦6vk}>Z9U4])yҨaC}M: K5X=p8H)|"Nf+Mt/ʳt#-5unlgGjwQ8Hy|CcMsrêwf5۩&`2{r m+'TCBrUKI}›9>3?ˆjh$s?Uh^\]ck˺0/Vn8yMһ2}®+78jІEt?Y`\tV]5g/y꟬=EUFwp{9ex[㭈VH,hsg{P z`~jG]G⻹W"Ķy2ʗ1''3 `[)3p b"ӵ8!`fڄT*~恑象qbx N,[爳2avT ZNF Jxhtihzdl6oSCBjc@߳4Y仭JΛA^;*l9+LŤW>#t@J9FuѪ VouXCSrT+a&9#ψG'(|Ē_/GP0r7p؈s v/zi5jH?+2>u7pNV{V}bIjNAzSFqIt9=c oc殷x#*F##~QP8 -wr~q(DTOG6Mۍwyj)+2+w43ui*?{R6$Hh\]ݞ2_i1ES U趮QUM^kk\Zy$KfUK ]'{>|%T53m]R 1Q5wULpk*jݾSM>Evn"12zW6bS(X[60~W jy5zͳp NZe8A( [>'C]=9X lkhrl-O_5^O׽%:|P  6uX594m9Y\qZ3ᦒKNmS‰bjE0´#J mhr~Ȝ@=f)[s&RGi2Fl$|b,N%HLQ`/YJԲY-F=|`~ї-9Ni=A8:Ib5r$f ]3WdXN +18Jױ1xl:vO6Ȳ9"E/0Pvz';|:Fuo~0VLT[S 7' +tk5R`m^Z Q-lUWpWk*טܠHz>fxQ8[YM-T.67IoY7XƜg_؆9 F1Ƶw6}8@Ldyǔ +RcErH73gL2&6Lq` 5UfK ka~XGN,AuWCO|/˙Ẋ< 0 +Dq@O*TͷZ9:HTN9m=dRԍش{?Va[col/ Q3 +?5:*M)Dṷrו+!888w`["Ճ-U & z/r(N@fTeTrͲVVN5?G֕[ù(q.+, 1vus:ҮpH՝r y Y_b&]Qqi(ˠYa1PN IGR]};`m'Gh~Ũ#a2}|߶:n49A]w}mqJrx'n MmBu9ͫشJu<YsUF$\AVljA7%+ 0*j& bc)0 L1Q :@26dI^1(Ůf1.;h%\};s"(k)h&Na͒فGE.ƟdR`YrүTWaUl(sۓMy:Ob$eS#Mɿ3D9U6F4ԁTGBN!f䷑ Jp'nk<1襓Q%>RLUGaX$ YJ&לG R;txǒrM(Jpv~5& UĬ 9LEơ<4Y-Ilmv|k-V2}H,{N 'gwT{T{:rjoq<фP{.[h/_Yo= x n3{A-UApp@*oۀ!7ȚIH7J _+bPJDh{ʧ"Z?>hh{ACm' CB;$TONڞ$$T!$AWE278|/8h=9h{pkgqPs889?t[|?A9h!PP?)NRmA$ۯI,t-*?'S:n~PMXhY9:d" "A-p~溹ENvAje@MAʛԴ/va d?jWQj>F.H)3>WQݧK)W$DǴ_/μD6R4׈ƉN/S?un5ѥ`v穚J s"Uo♫T81/оit:UOk DںT>);!u5]/Wv%B C-]<,o07Y82 z +9 vnj)D=g@U;t$ 6.i(kT"G,Š;4mv^E- k._gb,}ZU47.Qi"d;׳xc{#ƚzQTݴSP|g>tz پk]E?fFWҧFLS>ʥGHK)œ%dV0Q&--O򫩮&DHN8y^. "@O+dqM{Е=R-JzyQڥe{t+$2vAr Z-%q(8f@1")'9#31Jشh3kKyIhn6)nsS[zSwcf~]9~\{W@Q_Iv@݉R+GbC4 Jrscex֡/@;˫mjx,%y} xxtכ:pW3|]CؾdceVDzn&:Rz\U(A;A*aߚU@avb fwۂ&ř5U~eMhn[шf$ +zgѹ;oZ?C3+WNP/_/) u_?58ΕRfbP&Hf(WvI K}zGaK,^`єӼizM:.Wz:Y;)5=AΜN"4؎[ +Ϭr⽹Tެ2J`L$)4ˏ4|r|UT[wo?NOY6OɉʨCV +Qn`JQ5SZM6/xO=D7Fzkjב~T( nh7ž@]gz`>!4 ܝ2fm|XІrX!BS +¿5\s=wx?"9țqpQ4p T׻k$?j8a]T15"h(W155 Vd6Qz ELQZƳ[9cnh9$`*5 ʤ✍G"dabk4SWW%8ZldXISi8v#ׄfv˜ I'^+  +Y`?G4zI^P&0&HOkmi4t <y/O"r9MZË"V$~8$"qL%ISe%VPrYCkUGܜ#D=ICIđV[U.L4#FAt+? ^9`ՈS[sAc>ٻ=r7ZɉOb5sXm=3NϢ>C,W5Z*{*++u0_)^ؾ]TlRDWt.؅V^am;2R(d#E(J壹}$~G ҽCcz٪VNQ{|$v#m=kriPʆ36x7]ϱxegq4! 3%aY/ VNxT`Ҧm\4"73`VsZ+Uًm]LnoNu[{>+v87G<qj?%Km%7ޱ q[8UݓK1 ɓ +MHi9VάDŽ:Vp(GB'˿"tkێ ǒT.+C^N~x,DXT[ݴ0`+$oӡ{SD~^dzo9urhtrw) {YWN M-V@Do*4aI> +@Ccf\Ky1jpdx."uUMA$E^X-5Xo-^= )+@QaC]|3اt]X 4#pjͺLt]Xܨdɰ%'|d_ϹᚲVyA6 dqٱb{iO 5U]MVDش lu1e"c%XW1ɱ+iĖ4^EcNlQZ(Ć 9Br~U2AM\&9ةe9+umR6\#zmOE`ӎ{3Lfeo“@ 星zo ̢[i|1uؽ$JapV=e'c R;=C,bAj:0q &25SY|?;*mgҏ#/4sTA3IOfrIRYl`i6Ϗh=!K'E5XN٩p{AO`o,Er+g Y/xOCb&n7u +!5.s.kċՠѢTV +p:Ӯ}(XzZ +3:DCkf^hBIꈨG;kOMZgH!X"%RkAhJ K-Y?UJA9BGQpbFcQ(CU6 i8 +U|UP |o幵0z k˜W$pT8o/g>HOФL +/U"9wW=Z깵տB4P{t veu`n[b$Kx+aG\0݋,59 Zݴ (pjEB%7'@CLAQb~k Aƪ^*L^ +)`љ=v4ЂU{$AC7c ~PHP=TY; ~@?\hPǵEwh=mc*-Sٱ'5cÉ'ZrHf bIӳ^Z*vՙS+Y9"it\r`ЈЊTmi> f5EL7a72]qH/;zHK,N1fFq˘wlgz٫}#Gܯ@1g ߝ[ŝf9tt 6!8Y̩SEo6 +WF&}ǪdAy3=LJVyQEDo7#1vQEX : ,uHSi2T-)-\*oļa|*ztBw3`V$DxQC/O As`.uhJ`|170^ߒНUPg}uਨwN$$rH(x?yg -4?b!f6[>LT-eq"MLA٧Ũ/Cٱإj94Y3!w=*@aY>˟腅jn97RӒO;h(h-2_LS+9N`jNex}xbSd(Xʢ6)TxnCI2)ZiU ߕzP;gi%͸QUL24s{,t7s`e6)+}^9px q;e<yՁ>'QFuj> dH PwȭeR*HhRMҴ)ɟ{&y(h=%'Q򞧠hL+lAFWHLtRx1HrD4K ew'Pԃ>/1CABFdLwuko%ay4˼9b%)'nz +GZH]Bu3Q^5Iv6lS쒨dE#gWyhH@$5I/Px /pG?DBsb Lt1 +ؖsidiCm=tςk_= Q/F_h Ӕ(?R+ Q ]Y +Љ}s(B*#TmRRzvݥ>Pt)/}rZ[l?&sf{kČX9НLؓ+ YYL8t ϓynbS*K[bb8=y,^:GZ) fGXsz郑s'֗s_ʵ?Pz(P.ؠPn;HˡqAsGq2@z,k] + q8ev[1oNTFWb;/_w&=;8Տo~8JO ˅gwd1uQsz!D+2+S~7)~OsTNZJz9'e w qF͜PPO6e");Ke\wJ*E m.ͅhHwխ,Um&PjvÝoA&/RקA5 >p4A҉umHK]KڨH c$0h_o~C);T'WIO!rm̶zӮk<M1&ycE0Zi|[YXBⳏ"uWukBѼCAώW9%Fd$Qz[(z¯r]m:bc}%p_ťW,voQMǙVD44 +~v,#ʓuKBʂ˪SᲪGUFfeLƨd.#|˕!R]KŜڠ,g +} +o Iڅ멇8>?b5MʉӍ&<O(Y(n)̒\ǾX=]4 N F "iY6“#2 +\/ WЋ.]K'у#xMk9ezVpf2yP 6/d6? +hy$ei["9*$,<ݘ=[8No%W#$ ʇp9=)PuK\'|Mq)BrXw?B>l#M)U%Im[ k&Ic9,'F[(MO8nD(1_m·Sk*^9͈qO dJE))NUۻw ?/?g%h·FLsǷ\ Vk [mx|, !cD~eZw r›.B1ʼn4hCXUL3qZSjSpDoq^+ޅԄcXn۪vJ#FWYYg24N2YS#Қ&C$sޑe21]&Yw6g@ج'(Neb?(nc* s{:ר`BӕϪf{pU;\P \D:CӃmkK_r,⯐bsfu +t~~|Xn{rE\Y.ݴ(!P +hk@G+Y-X4vPaP.#1-QCwTMJ.iCլ Q.MBk9IYz#Oe'ag){eX ⢌Vf~+1!,sO4(/xŤPHhuUV3D(Ϛ~f]K55awgAҌ>gSmYhT^r4r&%2A*0h ,O8Q-Hgi!#%e=M:\iOtP%Ͳخ- #eTk&$*&?1'>"tGV2Isj1 7Ir)ҭW wĠlA4˽V7`ۜA1'^S%o*{SV՜JI^bYcB^%k,`U:33CQ1YIQ:BxYYֆ4o9\Sɲʈ6e.:H" +cՐ# hLÒ[fV+*q+V"X5)<ܻX+BmnE;mI' :9LeLƞ7m{yEe`Dc ixѩkh7yTF}2xz?E5eZɈI-]3ZKs2y!N>lۿ%+}e9&,B5f#F5UWHݟX+mAb\7t-ꘖ-5A# ̆LfOX ϥML g 0WcB +I LŹ7Թ.VOj8sae$Ǝm/d/T5L{R!Z4X?JX f~1-d888l<ș'h C(> ZaQqӧu~a[SZ^bK% ]C=z|(㟟8VWӁ=&ڶrY#尢>3!~$ G"T *v#ݰyj`MxR^J.^)vy].ݮON9PTx օyYeݗAj&)0<dLJ.( ݕe 152tp5,+$_a]ݥCMvGOkq=pLSC m%d ht'M eN( p~ SjGsDWO21 6P\F7< jR9s͆Ɯ!\,;͊3NMb[G؈AN:ÌM>o +4hcqXٮ:TΕATt=@GjK!HggyMl8t'UJ%,X`|J#[H-g[$!AX4Da]X>˪.Dvϐ~fUc_dCtd\鿙uy<;.*9aTO⢥'kD;؏4^HlzTQ4xƕ5(6[f@`o`$<;j"ANEO$NZ:C%9Z9.:W"LzD^HPr^il;5MޘKIOYv}C eqa +旪ץm[RZqdؾ;Sֵڻ 0)#h=ꤦk;@Q>u#={ ӷLجDq"#][|e?!3Db$7 P<X|D>*PfLZSч*S38t֖X4ߨ oHDB8b>޵z7ܢ>T +O*fs|ǰU c>~>N*b:>%|>(tqc{1q+>u{=T?Uqȭ(Ҝ$)cKkߊ'a`m{w gq#a{ ;bqc4;o ΓڃQs4j{Ts pvnXehM)?.:D+~c^{JVNZZNV8/G^W 4Q/t1CbԱkO怯q-5mU8KJA G@(bPkLk#F"v)8CAw:`otDO)äIFh +rwc,T?.=,obJr[~L픀.gG!YS篂h:2]P')YȎTIE+q jxj`è|=Ia5؁xA?7mAPJqVMP271x59k:B%{5Y+}|\_G7HRPfn:B aCrI;=4F" HAIzljH 'e(="40]5vW5-<Dž +0=gy`۬5g&k0TaXZZƱs|[E+]!Z0Z =~GA*SwɾQ-ҏU:*4I + 19}p l4nszنrܘYLjjP7UCjs[k?YkXlvUK}Z7уGcŒ +Tupʍ7V5 b2f{$E5Z?UA͛JLheH5AϬ+6ǽk3|D=[JGRuIILlϺjojA +տ9f'3RdƉ I͌5͝O'E ca5\>|?y273Qjf&{ ̱웙 +3Mz3'3ynS=ic4ic︝X|V02t #qCnōCmk8_۸хCۛgopҋX쟵:Ƃ5 cplE>>k-'6`6Ī92]89V'g:Y6?lGhztN肷ǘyq p.HV4~s  }߀ڍ Yi0=_kl91z9~8~\=h= /0`(Ѱ EAd`OJw1 eD|2׾)hzC~G7@BnDƹ('tתz̷j;v:-~E:(ʰТ@PO'glXk]\~;5,idDʹ$8~/njuA $<:"bR04I3wwRܞ7B썿8I 'Ɍ)eCSkvc[ei{3` Xl`"zjLFvK AǪVر%NΌ ƺ"TzV&.X&pI! x}\ZrXcᘲХz(GEFD\rMvz)Qv42Cs4ڋw5F4vV65Ǽ5y S +ˬg2)tz2Є#.h0}=c]`Pz?8Rvz]NRvaAJ5L ) ۂh;>E-X6qLgoV r|><ˍYX5PrijW%R +,9+'4!욽ȫ'aNVܝy|>_ +n'$u?nQkF(~ا`N+~Y▲[b%nV!<z9eKVwXJ";Ɲǡz$ \}_Np]Xu Իq`ڨ"WO!|1PQߪ.C8m]ntu^c6XȫHѧhFM\Ɓ5u5ݼ&:j. ! 6h1*E%Vfy3ep^;tn!_%RrR?Smv$q@xߧy8Oomf~`Ur8989;cR%?W1 +6Oj]JhjȹN\VKz¹,2jHZQtT|6X@,<7x 3u-^ DPj=KZT Q`}FwC[4,%!xeʼn5Fbq4 +WQZL[(THZ>\f,ѿҞ* !6!KHZп:=#Rb0HkC{/Px .B38@Af ڦJ0۾A{}*stM $uOXKHؑ ~}DCbyL&z4bXҌ`-%|weȤ.v}OP_+n~BiWXBtX\L<=ou@:<NxYKbX㊣_xbα0:.P4A+4u>ۭJ3܈}[NBoϰ +ϳզY~/HҤP{;LezyC3nh cW[,6ZiyuRҵS*8+D4kmyvO^UێaoMzDkǸ-VER \f@șg,rƛnimagٞg8O+?0" jOQi[)w8p&^6ۊJkϮroe-1^\l-Uq(CWB.^'m_9࿼C乪sv뮷|Ťóg1_j: В7I֓@ٺIz_lp+ϳ}T9)RwsYX'1Mc+?mہѪzE G $C%OuVJۿ=mOzmLsxͅ[ZjTQ-߉lө[d+be(}ā)khN0;v793vPl5x0~Ȥ< =10MMZ.A}527jvBo~7z3pV$\3ͦg[cll d=+w2]m u?iCYͶ#o0jȜ|CmTYeW/C +z б!ĺG;F/IC{Z2* +Sҕ&t\,S'5rMZӭBV<2_/~LS2$}2HNDy-/S-9\E+vHP$%Loze͆ww&Y\=X0s1p]rs?ȯSi'Gr0K-е خO +&K8 //=OD0.`p]=n2\]|v,%8/C)O|-lAHZ(+,^ @]P掳ևt*4EcDSM vuzKFsF.Х}'>0[!qN C+yM4Z"^׉Jy\塺$ޥ mv\cs碛#DP7IQPV yr[ZD:نCx*,}k;ؐ<ˋ4sccR-W}w^cB?Gl8~kVϥU? O;= +609|!KO5[Y+/JO9x4@Rq-@Sop zr:d/%l][]MHO" ISIU&;MR1ⰦFyy6-;.I$Fy^TBT!bvc]!r1YK\Z322}My|pi%lؾ(*Gg\4r a=0Ӓqwm: .2a^X6tǼ3_?0>-`F,D`Lx=*^x ?Av0mްU#2Լ'*i')\(,ucBv8S]l Bz[|5d {ad6|VD>** zJEО^D=:"]`S5fXoR -һ|߅Nj.79\ɒќ6/hc4qޤGoWև(˦ Gr#?Bb!B?j!e![H_;~1If-$]ZH-!n_}~{_C*rO἟*|ZHѧ\8= NFJ{2hIv%5/Y.ܐs)H*èpIkRKZgUvFCPj]W +cV)9ژ{Ae-ʣR1i"ZP# @ c"/}R2ZF㇋hb78owLK4^fbF/zau=Вű2R5*t"eD|}2u}sٵ}}$ ?e}s3DƧQF߮s.enބ);CGTjFQ`㕼_)Qپ|ĕb\ɃԳYF6dZD|Oӓ ͇*d8R]IJNıvmq@P-x|aym\Fjll$Ed)H`*޿\h[ھ<6c1RPÚ }u2 >+>aB|Q?o_t_;5K}gE瞼?ֿw_DR )$k# &Jv>-YV}?y@w:+Wpd;*\k6;R>#dNYI* U5勠hksXӫktb%AG,bIOnżDRK;w3vR4d>{Ʃ >VzD፵ ^O"F*}=s-BU&@ѕZZx!?r" ]tŢîW\v~mڥ~>Lh§ +{\%ڣՋ~.)ķ+I7K-^EC8)/ l\b +KHGǹh-U* ˱| +<۫T#&6ar)WY[lp 2l'pvDKOo(ɫ(Ɗc~4?Eϫ6+ξ֋rgs ZXrYy ͤ -m<=iJi*z|Q08<܌|!fI.n9\s7XBTL%"z6 *c%xо&f,GC6TJa|'6 Q}'DI7C)q`(n* ³xM=Mm4m~{y 25L^xK;øTsm"HoijbqMz}ߙxgkwh@y{)l%?Z{1%QiM犐ڮ %҈Y,X&<[ -rh\ajVh\)#{|  thf^7 nxx&>$2"ll Л JD9#T; X血@aaq )rqPF`Vu;z%W%}hd 6y|&g$\Z7TrJ%ĄMkBuv/ :Ul(3І5[D0J*o-Eo\0*oH!ցj%{c]߯xtNX3:{ /RB"G]flƔVL)-rа3W^_%C~p@Kw&Wurx XJovp*<εO şܸcC!yRq)޽tܗIfF=ÁE Pbklt I()o#V ]a;Wx/KbkhCj +di .B?ѭ'j0+e 9k6{D! + Q)/6 eFB3RK c@򑊮 \ sS,Q 2Wh3 R_2M.JFe褉Fם6rU(^/jpK~Xe2?lx^%u{6d 2KA3Z .CϓB4* +06Kr+Zu[S0/ϣԕ=Mx-S0 [QXgB% b ަ saONuDyqI#NCK󟮬dHe,%ӸArc<5v'Q bly\P}JM\Xe[.6fUSVl5_c#f|P +|=9 6Պ> 1k~mU#|# zV +Swkbk:z?ȸ({{DJGUP0KK~7аGߪUƮҪv=p~,ArO xpspr6Z Qp0N@Fc {ߨڌ3K)=_vSu:u1-;,I-5oU)X4^ nu~:S1x 3k$޵vEI[-w)ƒ,_QSjc;:]TyZ^T]U&:%tldfc] PőrN,e Wu]:na )Al!F \A(&U]Qp_`z +\i;Z<^M5qSn>!59d@5Y|&YS(ۦS?ʿb<~^J?|.6 ˉmt&WA`R$G+٭Dڲ'n{[|m51طr~m$ +R10oY GU5Bн1J yً.}ONk7)ݱ(Vـ[] 6r[$ +LN ^-y5l%^p}0$-?nF jg+oZ@xoiYʉHH b|~& Mm4s]ޗH(FDh)H1> Yp0ŋ|VqTg2tzv8'ZV^Np2:Ta3Ej\| k:\s]sB-eU3O"~e(v eO_9YLkܽDG墽F^+gՁ>C:WrPXXt5aJ6ՙqy5_+'#,~$N-l)-ҔJjHX^Jt$'g.jG.IPֲbԄE^?q}&ϽWV I`*ԘGXE 38.tٮnxBo m #d&`_rFslQ_#3A UGHy & lQLUk?,m"6f~X?fR^) +I5}`XyaN:s,Iഹ) E'cr7ff͖Dqk~j4]GT{z5t'6knkyA^Gb3z*, R9dXT{$ޤ6nL>׿$]d7a=Sj3 6OLiu&RCc',-`3O1J]/lZrXaVc#ۼLA3Ӥ 8ȚB6iB)*%CDzbWR;K%Yd2ƀECQ[Wn=͞V- +`36zK>pf#0 F$,eZf*UrʼnSFi<!Ĉ}@)u6ZUP> +a`\̮oY[C [Ĺ|”f&Iڴ1%aPfTyh B‡ "(-L?R$=1h-0MзZCs .}cr#V+yc+ Gkq!~1/&䚞=R @DYSC i)V֚ +PBQb뎏/KMs\u7!hvE@Kw<*dURFR 9"a*Úmq`vv؈b!i@Ꙕf N`9AeU| y)Y 8O]:#>9`J= j3+)hkP+{\rÑ9D&geFfw l+hFGk]+WhԀbX1äh#p HIJ{n[d~>׫} [v궚 +kT**IU2l̀Я.6А6,Pl9cllVVlZHL3LEC'AiPz\AE廁T^ uOu=ts"c3KBv?]r_rǃwǥc+|@{xMS<-ݙoz\~;~@뇿/77׿>޶x/٤/WU&\=춇ox:lO嫞??vw׷m>. +endstream endobj 101 0 obj <>stream + + + + + 2018-05-25T15:28:03+05:30 + 2018-05-25T15:28:02+05:30 + 2018-05-25T15:28:03+05:30 + Adobe Illustrator CS5.1 + + + + 256 + 240 + JPEG + /9j/4AAQSkZJRgABAgEASABIAAD/7QAsUGhvdG9zaG9wIDMuMAA4QklNA+0AAAAAABAASAAAAAEA AQBIAAAAAQAB/+4ADkFkb2JlAGTAAAAAAf/bAIQABgQEBAUEBgUFBgkGBQYJCwgGBggLDAoKCwoK DBAMDAwMDAwQDA4PEA8ODBMTFBQTExwbGxscHx8fHx8fHx8fHwEHBwcNDA0YEBAYGhURFRofHx8f Hx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8f/8AAEQgA8AEAAwER AAIRAQMRAf/EAaIAAAAHAQEBAQEAAAAAAAAAAAQFAwIGAQAHCAkKCwEAAgIDAQEBAQEAAAAAAAAA AQACAwQFBgcICQoLEAACAQMDAgQCBgcDBAIGAnMBAgMRBAAFIRIxQVEGE2EicYEUMpGhBxWxQiPB UtHhMxZi8CRygvElQzRTkqKyY3PCNUQnk6OzNhdUZHTD0uIIJoMJChgZhJRFRqS0VtNVKBry4/PE 1OT0ZXWFlaW1xdXl9WZ2hpamtsbW5vY3R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo+Ck5SVlpeYmZ qbnJ2en5KjpKWmp6ipqqusra6voRAAICAQIDBQUEBQYECAMDbQEAAhEDBCESMUEFURNhIgZxgZEy obHwFMHR4SNCFVJicvEzJDRDghaSUyWiY7LCB3PSNeJEgxdUkwgJChgZJjZFGidkdFU38qOzwygp 0+PzhJSktMTU5PRldYWVpbXF1eX1RlZmdoaWprbG1ub2R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo +DlJWWl5iZmpucnZ6fkqOkpaanqKmqq6ytrq+v/aAAwDAQACEQMRAD8A7bZeW9F1rzh5k/Sdv9Y+ rvbel8ciU5xty+wy1rxHXNZDBDJlnxC6pxRjjKcrU77TfyisNYk0i/8ATtL6K3F3ILia5iiWEtxD Gd2ENa/s8+Xtl/5DD/N+0/rbPy8O5b9T/Jksqi5s2Z2mXit5KxUW7MszvST4I0ZCGdqKDTfcY/kM P837T+tfy8O5EjQfykKQSCeyMdyyJbP9fakjSfYVD63xFuwHXH8hh/m/af1r+Xh3IWOz/JuTTf0m s1sbFpJ4oZxdSlZntgxlEFJCZuIQn4K1x/IYf5v2n9a/l4dyx4fySS3juGvbH0pRyjpeyMxBDEEI JS2/ptTbtj+Qw/zftP61/Lw7kT+ifyeDhDdWCyMzxiNr9lfnGAzpxMwPJAwLDqO+P5DD/N+0/rX8 vDuUJ7X8mYL6GznuLSJ54I7mGV7yRYXjncpDxlMojZpCjcFBqaGmP5DD/N+0/rX8vDuV4dJ/Jye4 t7aG70+W5u6fVYE1As8vIkD01E1WqQemP5DD/N+0/rX8vDuQ81v+S8Fze201zZxzaeHN0rXkg4mN WeVR+9+Jo1Ql1G698fyGH+b9p/Wv5eHcue0/JdL6Kya7sfXmWcp/pzca2vp+qhb1eIdRMp4nem/b H8hh/m/af1r+Xh3K66P+TzAFbqwIZokUjUCatOC0IH77rIFJT+btj+Qw/wA37T+tfy8O5Dalbfk5 pwtnumi9C7gluoLiKa5mhaGBC7v6kTOgHFW41PxUoKnbH8hh/m/af1r+Xh3Kcn/Kko7U3T3NqsKt EpL3UyHjNMlusnF5FPperKqmT7INanY4/kMP837T+tfy8O5Favpf5R6Sq/XTAsriBorZLmaSd1uZ VghdIkkLsjSSAcgKY/kMP837T+tfy8O5TFn+T377kUjaG5nsuEsl3G0k9qeNwkCuymf0js5i5AeO P5DD/N+0/rX8vDuVTpf5NBS5vdOChHlLHUdvTiPGR6+t9lCKMe2P5DD/ADftP61/Lw7ldfLf5VvY 3t/Ebaaz01Xa/nhu5ZVhES839ThI3EqoqR1x/IYf5v2n9a/l4dyiNH/KVbIXlz6NjDyZWF/PcWUi shUMHiuXikQ/vEPxKNmU9GFX8hh/m/af1r+Xh3LZtL/KCK0ivDLayWsxASeK8eROJKrzLLKQEXmv Jugrvj+Qw/zftP61/Lw7lkFj+TU92bWG4tJJVR5WKXcpQJHGkzMZBJwH7qZXG+61I2VqP5DD/N+0 /rX8vDuV59F/KSC3jupJLUWss72ouRdyNCJoo3mdHkEhRCqRMTyI6eNMfyGH+b9p/Wv5eHcoCy/J o3pshc2XrBIH/wB7n4/6VX0FDerx5SAVVepG4x/IYf5v2n9a/l4dytNo/wCUMCxPNPZRxTxyTQyt fOI2jgZUkYOZePwtIoO/fH8hh/m/af1r+Xh3KUmn/k/Fq8ekSNCt/LHHNFH61zxaOaOWVHEnP0+J S2kNeVBQV+0tX8hh/m/af1r+Xh3Kkuk/k/Fci0aezN4Sqrapeu8xLgMtIllLmqkN06b9MfyGH+b9 p/Wv5eHcutdE/KG7aNbW5sbhpUkliWK/Llkhr6jrxmNVSh5EdO+P5DD/ADftP61/Lw7kPdWn5LW1 pNdy3th6EEH1qRlvyx9HkEDgCapDOQo8WIHXH8hh/m/af1r+Xh3KOtx/k7oqz/pBPTktrSHUJoVe 7dxbXEwt45KKxrWVgCBuO+P5DD/N+0/rX8vDuVb60/Jqxs7q7up7WJLNS88TXcqzChYAekZBIWco Qgp8XauP5DD/ADftP61/Lw7kV5p8i+VbHy9f3drY+ncQxFo39WZqGo7M5GU6nR4o4yQN/i15cEBE kBlmhf8AHD07/mGh/wCTYzOw/RH3ByIfSEj8s/8AKYeav9e0/wCTb5jaf+9yfBqx/XL4KHmzyV+X 2sai02vRRm+uo4IWLyFGkiFzH6MZFaMhufS+E7FuNczW9B6h5F/K+10qa2vzHFpl6AGWW5ZYlQfW ZfhPLjGlbmd9qL9wxVQT8s/yjuvTuxa28qx3UNurF6oLizraxwhPsKQV40VRvuPHFUdZeTvy/utH is7S7aezSK6gMiX0jO8V8iSXEcrh+TB1EbUbpt2xVD/8q2/LY6ghMrG9kjdYEN9IX9EMgdY1L19M ckQgbDl4tuqio/yx8gS+rGsRmaYOhBuZJCE+sQ3bRqGZqKs0KNTtWnQ0xVLZofycmsY4LjUrZ7a3 sY7cepdsCLRbG6C8mLhv947+Zq1rxYH+XFUXZeW/yytL8elcxtfmdpyZLpnlaZtThLO3JiSW1C2R D/lfD3piqDk/Lz8qdZu6+q13JczTrHGt5MyerNF68kaDmVBEUvNQNwpNNq4q1F+Xn5V3+r6iI7uS fVEkYaqq6hOJvUnkWX97xkVq+paBk8Cm32aYqp3/AOWv5OaY8EuoehbtHLAYDPcAKskbCGLZjx5F 5lWtK14gEbYq3dab+UCWKWVxqtumlWEMVpJavchYx+9XUYAWqCOCozBVNPSZgRwxVVufy1/KXTbr TLR1j06+mjWPSWFy8U7C0uReKYZC3OqTSg/Cd6gGu2KpwfIvkSOe0veIQWEdt9VX6zJ6SCwKNDIF L8SyLbBS3UqpB74qk0+mflBLqV5q8urRC7vLdr28n+vuqyWd56NGYBwrQ19Hj2Hw02piqpL+XP5U 6Elhpkgj0tppoZdNQXLW7max9RYXRgykvGbw7k1JK1qaYqqxeVPy30cavaG8S2s9SMlprNlJPxWa 41ORKNJISJTKzPxjIf4eZp2oquX8vfy71WS4u2uJtQel3bXUsl/PN8ZCW13y5SEc1ECRsf2eIG2K oe/8n/lXI8GoXV16C+YhL9Tf65LAk7ahMlwzQ0deLyP6fErQ9AMVVk/K/wDLK3gitvTH1dmAiiN1 JwJ4xTNQBqDnFYJz7MikdCcVVZvKn5X6DpkGmTyW2n2dteDU4YnuFib11RLYSMeSs4CzIpLV+0te 2KoS1/LH8qbhz6CpcjSZ44jE1y0yW89nG3wFHZlVgsrMwI679Rsqm0X5a+TI9KsdNtkmhs9HL/VP Supg0RkuIr01fnyNJ4I5ByO1B2xVCav5B/LMWkF7expDBoll6MFwtw6m3tBaPbABg1aei+3iQD1A xVSTyZ+WKalDpgnX9JaS/wBbNubtjMjG3gg5TfFyNYoITxb2anQ4qo/8q18j6Dq7+YUvpLC1s9N/ R9zb+v8AAsMMMi8uRJkRxFJWqnl8IoR8XJVu8/LX8rdPsoLa5b6ok00lnbSfW5I5DPcssjxowYHm 3o9Oy17VxVE6n+X/AOWeuXeoNdok14Ujiv5VuJFdYUSHhEWDCkf+jxtxG3LfqTiqH/5Vb+Vl9dXe oRwr9auZ5ku7iO4eORpKPZTxlwyvxYK8TJXj12rvirJvO/8Ayiep/wDGE/rGY2r/ALqXuas30FHa F/xw9O/5hof+TYy3D9EfcGcPpCR+Wf8AlMPNX+vaf8m3zG0/97k+DVj+uXwVfNn5faB5pmim1P1h JDGIonhcIVAnjnJU8SQWMIUkfsk033Ga3sWT/nHzycrqWv8AUpEWyn0/0mkthGUuSWkkKLbqPVq3 2vvBxVPLT8rNDsbe4j0+7urKa6uIruW5gW0V/rEV3PeCXh6Bh5M90UNU+wqjalcVSj/lQfkstDyn vGihiaAQMbYoySW0ds/MGDdmWHkT4k/s0UKqn/KjfLC3N7cw6hqEM95cy3ZdTaH0mmnt7jhCHt2C oj2ScR4VBrXFUfo35Q+VNI1Ky1G2a4aeyS4QB2ipKblpS0khWNW5qtwyKykfDQGtBiqGtPyd0lNG t9JvNTvLmyiCtJaKtskDTfUjYSSBTFJIBJGxJQyFa9BTbFVG1/IfyTbPbukt+5t3UIJLjmDapc/W xZurKQ0Pr/GSR6lf28VRtj+V8elz2h0jWbq2tbW7ivVtZY7eZQ8VkNPIjYRxsoe3qG5FvioRToVV uvfk/wCWdW+tT85oL+6luLiS6UqxZ545ECPsr+lH67MqI6771rviqFi/I7yj9Xe3uprm6t5xb/Wo X9ALKbf0S3NxF67iU2y8g8jUq3HjXFVe1/Jry5aacbS0vr+Gdivq6iHge5dFsRp5jdpIXQhohUnh y5EkEdMVTDVfy30jWbO2t9XmkuXtLW+sbaRViVY4b9k6RssiFoY4VjjL8tt2q2+Ksch/IvyfDcvB PqNy8lxDNBBEzQmU2bxQpcR8pUkerTK8zSxcGDzPQjliqMm/Jn8vYrRrK8kuDb3Mhok10Vcwxkzi 1WUcZfSQxo+zcz6a8mIXFUXdeSvK2sabpthrurnWXjtLqzsbiWWBZJorx1ZJKRqFkliS3UI4WlVL 8eXRVAy/kV5VuJ7m4vL6/uprxZfrUrm0R3eeb15H5w28Tgl9wqkKvVVB3xVav5CeT0eN0u74PFFF EpZraWphumvFkcSwSB2aV25cgQwO4PXFU5/5Vlo9xoumaTqlxNqEOkRS29g7iNCsTBUg5BV4s8CR pxYj7Y5U6AKpJqX5EeW7y6Vor24tbMyJJLbRJArAwxSRwiJ1RaBTMWpKsnQKvFagqqifkN5KVEhL 3DWqDh9XKWdGiMsMzRNILcSlC9svV/hWqrxU0xVa/wCQ3lVomjGo6kolLPdN6ls7TzPam1aeRpLd z6jB3kLClZGLeACqZaR+VWmaTZ6xYWmpXn1DXYbuPUVk9BpjJdnaWOYRqVMXOTiGDD49/sgYqlB/ 5x78k/VY4RdX4kTnzuS1s0soe2itOMpeBlbhFD+72qhZmUg8Sqqb+Yfyl0LXL+9vJ7++g+vMJJLe I2zwrKDCTIEngm5V+qp8LllG5Cg74qxrVf8AnHLy/d39vPbarcwWy+st1bSRwyl0nkMlEcLHR1Mk hWSRZGqV3oijFWSxflJ5fh0qbTYru8SGfUv0s01bcziahXikrQF0FDTmpEtNudKjFUuX8hvJy2K2 yz3KyKiRC6EdgJDFE0bJGyi19JlBgU/Ehqasfi3xVEaT+SflTTNcg1eO6vrh7ZmeGzuJIZLdWaYz 14+kGqHbryrsMVZJ53/5RPU/+MJ/WMxtX/dS9zVm+go7Qv8Ajh6d/wAw0P8AybGW4foj7gzh9ISP yz/ymHmr/XtP+Tb5jaf+9yfBqx/XL4MqzNb3Yq7FXYq7FXYq7FXYq7FXYq7FXYq7FWJ+cfIx8yap YTyXPo2dvb3FvcxrzDv609rMvEqV2/0Uq2/ceGKpXd/lMs8UMb6s99SS4kujqcX1gst1aS2csUTQ SWkkSOlwxb4mYkL8W26qE0v8mE0vUra/t9YmnlEkDXiTohQCG9W/P1c0aZA0qsOLSttI5qSd1Xpe KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KpH53/AOUT1P8A4wn9YzG1f91L3NWb6CjtC/44enf8w0P/ ACbGW4foj7gzh9ISm+8iWF3qVzqC399azXZUzLbTLGp4rxG3An7zlE9HGUjK5C+4tcsAJuyo/wDK vrb/AKvOrf8ASSP+aMj+SH86fzR4A7y7/lX1t/1edW/6SR/zRj+SH86fzXwB3l3/ACr62/6vOrf9 JI/5ox/JD+dP5r4A7y7/AJV9bf8AV51b/pJH/NGP5Ifzp/NfAHeXf8q+tv8Aq86t/wBJI/5ox/JD +dP5r4A7y7/lX1t/1edW/wCkkf8ANGP5Ifzp/NfAHeXf8q+tv+rzq3/SSP8AmjH8kP50/mvgDvLv +VfW3/V51b/pJH/NGP5Ifzp/NfAHeXf8q+tv+rzq3/SSP+aMfyQ/nT+a+AO8u/5V9bf9XnVv+kkf 80Y/kh/On818Ad5d/wAq+tv+rzq3/SSP+aMfyQ/nT+a+AO8u/wCVfW3/AFedW/6SR/zRj+SH86fz XwB3lJ/LflU6l+lPrGsamPqWoT2cXC4pWOLjxLVU/Fvvmbrey4Y+DhlP1Y4k79S1YsfFdk7EhOP+ VfW3/V51b/pJH/NGYX5Ifzp/Nt8Ad5d/yr62/wCrzq3/AEkj/mjH8kP50/mvgDvLv+VfW3/V51b/ AKSR/wA0Y/kh/On818Ad5d/yr62/6vOrf9JI/wCaMfyQ/nT+a+AO8u/5V9bf9XnVv+kkf80Y/kh/ On818Ad5d/yr62/6vOrf9JI/5ox/JD+dP5r4A7yx7Uo/KGm3stld+YdYS4hIEiiVmAJAPUR06HMX IMUJcJnO/wAeTTLgBoykhvrvkb/qZNZ/5GP/ANUshx4f58/x8EXD+dJ313yN/wBTJrP/ACMf/qlj x4f58/x8FuH86Tvrvkb/AKmTWf8AkY//AFSx48P8+f4+C3D+dJ313yN/1Mms/wDIx/8Aqljx4f58 /wAfBbh/Ok7675G/6mTWf+Rj/wDVLHjw/wA+f4+C3D+dJ313yN/1Mms/8jH/AOqWPHh/nz/HwW4f zpO+u+Rv+pk1n/kY/wD1Sx48P8+f4+C3D+dJ313yN/1Mms/8jH/6pY8eH+fP8fBbh/Ok7675G/6m TWf+Rj/9UsePD/Pn+Pgtw/nSWyz+QZo2il8w6vJG4oyO7MpHuDFgJwHYzn+PggnH/Ok9N0xbddNt FtmL24hjELN9opwHEnYb0zcY64RXKnNjyFInJsnYq7FXYq7FXYq7FXYq7FXYq7FXYq7FWAeXfN+g 6Vda/a30syTHV7t6R21zMKEqPtRRuvbxzf6vQZcsccogV4Uf4oj7yHBxZ4xMgf5x6FOv+Vj+Uv8A louP+kG9/wCqOYX8kajuj/p4f8U3fmoefyP6nf8AKx/KX/LRcf8ASDe/9Ucf5I1HdH/Tw/4pfzUP P5H9Tv8AlY/lL/louP8ApBvf+qOP8kajuj/p4f8AFL+ah5/I/qd/ysfyl/y0XH/SDe/9Ucf5I1Hd H/Tw/wCKX81Dz+R/U7/lY/lL/louP+kG9/6o4/yRqO6P+nh/xS/moefyP6kBrf5q+XrCwa6tEnvX Rl5wtb3Vv8BNGYPJDwqPAkV8cv03Yeac+GVR8+KJ+wSthk1kIixv8D+pZ+X2sWms615g1Sz5i2uv qbxiReLgcHFCN+4zQHTyw6nLCXOJH3J08xMmQ60lNt+ddnHeTW+p2aqhuY7W0ubWZZI3llvHtPq7 8xHxuE4eq8alvgYHLnKQdr/zkBplxexldOddOlsYruOZprcD1JLz6pQz+r9WEYLL8Zk2NVPxbYq4 /wDORPl31J2XTblrSL4Fm9SAM83LdAjOKKFZfjLU5sFPUEqpte/nZ5dttJstS+qzSRX015bxgPCq q9i4Rw0rusW/LkCGI4hjXbdVCW/576DcSQy/VXtLH6zb297JdS26SwrdIrpJJEsjNEsfqj1PUpQh 16qcVQVh/wA5E6DdX4j/AEbci2lWzEHFrd5vUunuFZZI1lJUgW4ZEPxOp5AEFaqptYfm59c07T7+ HTxNHqM1zFBDDNDIzmK7t7KFEkD+iWeS8ViedAobFVVvzcsJItLuIrJ7Cz1GaGMXGsk2COtzbyTR tbMVkWejIqvxPw8gd6gYqj/y98/HzYsziO3ES20N3DLbSmVHjnuru3RviCMvJbIScWUMvLidwcVT Xzv/AMonqf8AxhP6xmNq/wC6l7mrN9BR2hf8cPTv+YaH/k2Mtw/RH3BnD6QjssZOxV2KuxV2KuxV 2KuxV2KuxV2KuxV2KsZ8j/8AS/8A+2zd/wDGmbLtL/J/8Kj+lx9P/F/WLJs1rkOxV2KuxV2KoXUt L07U7cW2oW6XVuHWT0ZRyQsu4JU7H6ctw554zxQPCWM4CQoi2G2Ou6ZoXmzzCL3nFHO1sLcRxO60 jjIIHAGlOQzURzxhmmZdaceMxGcr8k2f8wfKroyO8rIwIZTbykEHYggrl357H5/Itn5iK2Hz95Th iSGFpI4o1CRxpbyqqqooFUBaAAY/nsfn8iv5iK//AJWJ5Y/37N/yIl/5px/PY/P5FfzEXf8AKxPL H+/Zv+REv/NOP57H5/Ir+Yi7/lYnlj/fs3/IiX/mnH89j8/kV/MRd/ysTyx/v2b/AJES/wDNOP57 H5/Ir+Yipyee/KEksUsnqPLASYZGtpCyFhxYqStRVTQ0x/PY/P5FfzEVT/lYnlj/AH7N/wAiJf8A mnH89j8/kV/MRU089+UEmknT1FnlCiWUW0gdwleIZuNTxqaVx/PY/P5FfzEUv80eddBv/L99Z2zz PcTxlY1MMoBNR3K0ynUauEsZAuz5NeXNExIDLNDBGiaeCKEW0NR/zzGZ2H6B7g5EPpCNyxk7FXYq 7FXYq7FXYq7FVG8u7eztZbq5f04IFLyvQmiruTRQTkZSERZ5BBNCykH/ACsjyX/1cf8AkjP/ANU8 xfz+H+d9h/U1fmId7v8AlZHkv/q4/wDJGf8A6p4/n8P877D+pfzEO93/ACsjyX/1cf8AkjP/ANU8 fz+H+d9h/Uv5iHep3H5g+RriCSCa/wCUUyNHIvoz7qwoRsnhkodo4okES3Hkf1IOfGRVsO/L668p eW7rUrm71prqWWRorJmjuDxtqhqt8H23IHL/AFRm47V9o8GpjGMfSALOx+r5ch097iaYQxkkm/1M 1/5WR5L/AOrj/wAkZ/8Aqnml/P4f532H9TmfmId7v+VkeS/+rj/yRn/6p4/n8P8AO+w/qX8xDvd/ ysjyX/1cf+SM/wD1Tx/P4f532H9S/mId7v8AlZHkv/q4/wDJGf8A6p4/n8P877D+pfzEO9kisrqG U1VgCD7HMxubxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxVKPN//KL6r/zD Sf8AETlGq/upe5ry/SUToX/HD07/AJhof+TYyeH6I+4MofSGL/mL581Xyw8Cadpb6gGtrme6mCTO sLKnC0U+mpB9a4YKQWHwhj4ZYySOf87ZVQNBoMkhT1WlQzj40jiumVoWVGRhO9oqw8ivqcxQYqrX 35yTwHUreLRGa906O9Zg85SFpbBOTwLI0QLSMalVC/EilwSMVTJ/zQkGizanDo01z9Xuby2migkD gLZRNKZFkC8HV+IA49D1NQRiqU3H503Mdt9aHl26EMAtzdMzKYSLtOcbR3I/dGOMHlLJuFGKtT/n j6QuwfK2pxyWYrK04SKDeRVUfWG/dfErqQeVN6VxVlXknzfc+ZH1d5LFrGGwuvq0COayMFQFjJ2D BiQVH2emKq/5gf8AKH6l/qJ/ycXMXW/3UmnP9BTy1/3mh/1F/VmTHk2jkqYUuxV2KuxV2KuxV2Ku xV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KpR5v8A+UX1X/mGk/4ico1X91L3NeX6SidC/wCOHp3/ ADDQ/wDJsZPD9EfcGUPpCOyxk7FWAanr35hw+ZdSttIskvLOKdljSeIhRH9VsjD6cgkiHxz3FwWZ q/3fH4euKpN5n1z85UtrxNKs3TUzBaSW8cVtG9vHJW6FwgldpQ5f/Ru5AqfsDk6qp55R1bzbP5qm ttS+tHTnXUHjS8hjheJIZbSO1qY0jVvVL3LqRX4OKmrKTirPMVdirH/zA/5Q/Uv9RP8Ak4uYut/u pNOf6Cnlr/vND/qL+rMmPJtHJUwpYHBoUOuebfMKXV1dRpaPb+isEpQfvIzy2of5c1gwjJlnZO1d XFEOKcrTL/lXWk/8t2of9JH/ADbl35GPfL5s/wAuO8u/5V1pP/LdqH/SR/zbj+Rj3y+a/lx3l3/K utJ/5btQ/wCkj/m3H8jHvl81/LjvLv8AlXWk/wDLdqH/AEkf824/kY98vmv5cd5d/wAq60n/AJbt Q/6SP+bcfyMe+XzX8uO8u/5V1pP/AC3ah/0kf824/kY98vmv5cd5d/yrrSf+W7UP+kj/AJtx/Ix7 5fNfy47y7/lXWk/8t2of9JH/ADbj+Rj3y+a/lx3l3/KutJ/5btQ/6SP+bcfyMe+XzX8uO8u/5V1p P/LdqH/SR/zbj+Rj3y+a/lx3lJ/N/lCz0jy7d6ha3t6Z4PT4B5yV+OVUNQAOzZj6rSiGMyBlY82v LiEYkglnP6T03/lrh/5GJ/XNl4ke8OTxDvd+k9N/5a4f+Rif1x8SPeF4h3u/Sem/8tcP/IxP64+J HvC8Q73fpPTf+WuH/kYn9cfEj3heId7v0npv/LXD/wAjE/rj4ke8LxDvd+k9N/5a4f8AkYn9cfEj 3heId6JBBAINQdwRk2TsVQmraeNR0y6sWf0xcxtEZAK8eQpWm2Qyw44mPexnGxSRQ+VfMUEMcMXm WZYolCRr9Xi2VRQDc+GYo02QChM/INQxSH8S/wDw35n/AOpmm/6R4v64fAyfzz8gnw5fznf4b8z/ APUzTf8ASPF/XHwMn88/IL4cv5zv8N+Z/wDqZpv+keL+uPgZP55+QXw5fzkr0i08039/q1qfMM0Y 025W3DiCI8w0KS8qbU/vKZl6jQThCEhkPrjfId5H6GqHHIkcXIpp/hvzP/1M03/SPF/XMTwMn88/ INvhy/nO/wAN+Z/+pmm/6R4v64+Bk/nn5BfDl/Od/hvzP/1M03/SPF/XHwMn88/IL4cv5yhfeTdc vrWS0u/MUstvKAJIzbxgEA17EHqMjPSzkKMzXuYywyIoyZVEnCNErXiAtfkKZmgU5AXYVYr5Z/5T DzV/r2n/ACbfMLT/AN7k+DRj+uXwSGbRvzRghaytbhZob+a6nurpriT1YQ3r+kiNJJIy7+hQRBUA 5fCOrZrerLZfmzEdGsLe4jjs4tLsYNUvZXinlN4s8Qu5F9RS7P6AkoWLKxoaVxVLYNJ/OgW0cMl0 Gl9SylaeSeP45IPqpuPW9MKVhcwy8UhC8lJ5glt1Udp0X5zjRr5byeBtUNmospGS3RVuPrDcvgQy KzfV6UZmVOVPhG+KoQ2v543Ia3vZYEtpIAFltWhglExRATIauUXn6ppGxNPS+L+8xVbf2/563Utr JE1ratbBmCpJH6byjTnhZpV/bVrtxJEnQft/ZGKq6WP5rPFqM2qzSMosIoYY7R44mZw0JneJImJW ZlWXfnsT8BAxVHwWP5rvps0d9eW8jPE3IQFIbnlJaRkpC4X014XTSKrGh4gHrviqTwaF+bL3aSXL llWewmRTdzKpWK39OaKb0bmIFObNIxVKs/Hl6ibBVn3lBNSTyvpSan6v19bWIXAnblMG4DaVqmsg /bNTv3xVA/mR/wAoXqP/ADx/5Px5h6/+5l8PvDTqPoKTeR/J/lvUPK9leXlis1zL6vqSFnBPGZ1H RgOgyjR6XHLECRv+1qw4omIJCff8q/8AJ/8A1bU/4OT/AJqzJ/JYv5rb4EO53/Kv/J//AFbU/wCD k/5qx/JYv5q+BDud/wAq/wDJ/wD1bU/4OT/mrH8li/mr4EO53/Kv/J//AFbU/wCDk/5qx/JYv5q+ BDuSXzl5N8tWPlq+u7SxWK4iVTHIGckEuo7sR0OY+q0uOOMkDdrzYYiJIDMdM/45tp/xhj/4gMz8 f0j3ORHkETk2TsVdirsVdirsVY15U/47nmr/ALaKf9QkObLXf3WH+p/vpOPh+qfv/QGS5rXIdirs VdirsVWySxR8fUdU5MFXkQKsegFe5wiJPJBLF/LP/KYeav8AXtP+Tb5g6f8Avcnwacf1y+DE4tW/ PSxuLtG06HUYHnv5raSZYmZYFjU2cCCF7QAtJzrz5bUBcVqM1vVLLzX+eMjxrdeWLWEcrcOyAOp5 KgnX4rtGUBubh+LcQFXizMSqqceXNc/NCbVra117SbeGxeANPeW0VAJvWYFeL3TlVMXEgjluDXqo xVnWKuxV2KuxV2KuxV2Ksa/Mj/lC9R/54/8AJ+PMPX/3Mvh94adR9Bd+W/8Ayhenf89v+T8mOg/u Y/H7yun+gJf558l+aNd1Wz1LRtcGky6bbXEVmnB2DS3iNDLJLxZa+mnBotj8Q7ZmNzED+S3m2aVz Pry+l6Oppbj1Ll2hbUZFmXgzNXlHKpczVEjltz8IxVNI/wAtPPaLcQx+Z5La1lad4o4Z7rlGJrkT rH6hfkxT4qzH94/IhjQYqnXlPyd5q07zHcatrmsLqSE362kQ9U+lHePaOqL6hbiq/VG+EGg5bYqm 35gf8ofqX+on/Jxcxdb/AHUmnP8AQU30z/jm2n/GGP8A4gMvx/SPc2R5BE5Nk7FXYq7FXYq7FWB6 ZoE2peY/M0qavf6eEvkUxWbxIjf6LEeTB45DXfxze5tUMeHEDCE/R/ED/Ol3EOFDHxTluRv09wTf /Bd1/wBTNrP/ACOt/wDqhmJ/KMf9SxfKX/FNvgH+dL7P1O/wXdf9TNrP/I63/wCqGP8AKMf9SxfK X/FL4B/nS+z9Tv8ABd1/1M2s/wDI63/6oY/yjH/UsXyl/wAUvgH+dL7P1O/wXdf9TNrP/I63/wCq GP8AKMf9SxfKX/FL4B/nS+z9Tv8ABd1/1M2s/wDI63/6oY/yjH/UsXyl/wAUvgH+dL7P1JR5o/Lj UtT0l7ODXr65eR0PC/liMKgGpfjHb8iw7UI+eZei7XhjycRxwH9UG/tl+v3NWbSmUaEj8f7F/wCX mmS6XrGv6fNdyX0lt9TRrqb7bn033O5+Q36Zzkswy6nLMARBI2HTZnpocJIu+TOcucpTMsguFiEL mNlLG4BTgCDTiQW51PstPfFUB5nudVtfLWrXWkRibVoLK4l0+Io0ge4SJmiXgtGargDiNzirzyy/ ML81TGsV35UMV19Y43EhiumjjgW/aCV19NGEirAvJCrc25BwhWuKojXfzB87aR5hubSPR5b6B3nS 1gWyvAiLE0AhkF1Ek6z+rHJK5SNKrx3pxYlVObfzf5vfR4pv8Ozvqb3lxFPburRRxwpPL6QDkFnY wIlHC+mSftDpirGZ/wAzvzNFqs0Hk65a5vLaVrW3a3ujHBPF6gQTN6cbfGQpoePsdwcVR8f5h+fr vVYLBfK09nbtqEdvLqDRXbKYFu4o3dVa3UKstszyh3YBACrfHQFV6ZirsVY1+ZH/ACheo/8APH/k /HmHr/7mXw+8NOo+gu/Lf/lC9O/57f8AJ+THQf3Mfj95XT/QEh13Qvzcv9S19LPXI7HSbiMjQ0gM IlRvqs6Ukka1d1VrhoWqpLChofHMbkul8r/nN6PGHXVM0GoS3FnNNLC9LSRLhAkiLaLzm/eqfiPp D4aIOOKphqnlX8zn1BL/AE7zD6V1+ira1mLCAQveRR3JlYRvbTLGskzwsWVa7UpQBcVVNG0b83I9 VtbnU9dSWyWZvrFgFtSht/8AR+AZ1tIpGlobjmysqlghCqKjFWQfmB/yh+pf6if8nFzF1v8AdSac /wBBTfTP+Obaf8YY/wDiAy/H9I9zZHkETk2SF1LVLDTLU3V9MILcEKZCCRU9PsgnIZMkYC5GgxlI RFlKf+VgeT/+rkn/AAEn/NOUfncX85r8eHe7/lYHk/8A6uSf8BJ/zTj+dxfzl8eHe7/lYHk//q5J /wABJ/zTj+dxfzl8eHe03n7yeylf0moqKVCyA/8AEcRrsX85fHh3sD8hatDpev6xcav5gM9mZOFp y5n6zVUUTy0X7Sxxqu/fl9O+7U7a0eTDjGOhKt/6O59I+JJ91OFpjwykZS2+/wA2e/8AKwPJ/wD1 ck/4CT/mnND+dxfznN8eHe7/AJWB5P8A+rkn/ASf804/ncX85fHh3u/5WB5P/wCrkn/ASf8ANOP5 3F/OXx4d7v8AlYHk/wD6uSf8BJ/zTj+dxfzl8eHemumatp2qWxubCYTwBihcAj4hQkfEB45djyxm LibDZGQkLCLyxkxXyz/ymHmr/XtP+Tb5haf+9yfBox/XL4MqzNb1BrphepbehKVdC5uQF9JSCBwJ ry5Gu22KobzHqFzpvl7VNRtYvXurK0nuIICCQ8kUTOqUXf4itNsVYPN+bGpPLHbwaFLBO13bQ1uC zCSGWW25mFUUc3WG4YtUgIRvy6YqgdQ/PC5RreXT/L89xbFOcyuSjtzlaBeDEBFRHXlLI2yr274q mNj+a17c2erai2iyQWliLOOCCZisnq3FzNBIbiRVdERRGj/CG+Bg1TyACqAX84tfjSe7uPLEv1RU CpbIzi5inMt4FFzzQKimGzV2p9jkD8QYUVTc/mXqx0C01tPL7rb3M7RfV5Jx63FbSW429ON05NLE Ik+LixYfEMVSn/leqvMVtvLt3cwl4OEqSKvGK4torxJJhIqCLnDOojVjVnqp40qVUfqP5r3cOhWN /aaFNPdah9bWO35syxvbTx24UusZDkmYSNw29NJGVm47qpn5wlv5fy1kl1D0/r8kFq92IVZYvVaW IuEVyzBeXSprmHr/AO5l8PvDTqPoKM/Lf/lC9O/57f8AJ+THQf3Mfj95XT/QF/mtPNUMlpe+XlN1 MWNvcWcjosKRNHI3rUZo+TiZYgfiNE5cRuczG5iBvvzte9j0+W0tyrQLNNdRRrHGpktwzR+r6+zx 3PJF4g1UVagNSqoyzfn6jyXFvaWjPNaW4eKX0iEu0ekxiT6zx9Li3w1cMw68WAqqynyZZedG1PVN Q8zyvGxlaKwso3T6r6LLHV1RXkbZozwLlWoTVQTiqO/MD/lD9S/1E/5OLmLrf7qTTn+gpvpn/HNt P+MMf/EBl+P6R7myPIInJsmKed7yyu9ET6tPHOItQgim9NlfjIslGRuJNGHcHMbtDHKMBxAiyDv3 OPmkDHbvV/M/mfSPL13ptvdWZkGovKDLGqcYo4E9SWR6/EQq70UEnMjhDfQSiX80vJcWqT2knBba G1uLlbtgqCSS1mED28UcgR3lMhIVR9qm1ceELQU5/wA4vyyhiu2Ny7zWYYTWyWVwZPUWJ5jEP3fH mEhkJBbbi1aAE48IWgnGoed/Jen6sukXMhGqSQx3KWaWs0kpjllSFfgSNiDzmQcTvv7GjwhaCSw/ m95EmsfrqJIkMa2890ZbaRPQtrkVSaZinFRRXoK1bieNceELQR1v+Zv5d3GmX2pxXatZadHDNdym 2lXilxIYoyAyAtycUqtR+OPCFoIef83vyvgimmmv1jihZ0ZmtZxVkeOMhQY6t8U6dOxr03x4QtB1 z+b35XWwnabUFVbYyCZvqs+xilSBlH7urHnIuw7EHoQS8IWgnPlvzRoHmK6v4tMhLwWBRTcvC0SS ly6kxiRULKDGRzHwnsceELQQ/wCXIA0i9A6fX7j/AI1zD0P0H+sWnT8j72VZmt7FfLP/ACmHmr/X tP8Ak2+YWn/vcnwaMf1y+DKszW92KoHXNYtdG0e91a7DNbWMLzyqnHkVQVIHMotT/lMB4kDFWDj8 9fKbXctstnfO9vJbx3TxC1mjj+ssqK3OK4dZFV3Ct6ZY16AjFV1v+d/lu4mMMWnag8iojuU+pyRr yQSEGVLlo6pGeTUbxAqwICrNPL2uWmu6JZaxZq6W19Es0SS8RIob9lwpdQw6GhOKphiq2SOOWNo5 FDxuCrowBVlIoQQeoOKrYba3g5+hEkXqEM/BQvIqoQE060RFX5ADFVTFWNfmR/yheo/88f8Ak/Hm Hr/7mXw+8NOo+gu/Lf8A5QvTv+e3/J+THQf3Mfj95XT/AEBItZ80/mjZ62Y7PQIbzSVa6k5qsgma K2qUjQiQp6swT4C4VSXH8prmNyWjzZ+dUBmj/wANwXMiW3rKHLgPcyXHp+lHInFBHGrcvj+MqK7n FUbD5y/NBtA1q8m8tCHUreO1m0iyWJ5Of1iQ84pv3yF5IoyvqenstCak/AFVBvNP5uTXmnwPoaWl tdX0DT3EUTSNFafpEQyQSc5CAxtI2mabjQAhQoYhsVZb+YG/k7Uh/kJ/ycXMbWGsRP45tOf6Cg9P 8j2r2Fs51fWF5RIaLqFwAKqOgDbDNzDtSXCP3eL/AJVxYR04rnL5lEf4Etf+rxrP/cRuP+asl/Kc v5mL/SRT+XH86X+mLCtV/Ln9C3E+uTajPPLc30Cxweo7AxmQUM7uWaV/uGR7Z7Z8XTRxCEQARvQ/ 2P8ANcU6XgPFfX8W9TudPsLp0e6top3jV1jaVFcqsq8ZApYGgddm8Rmvdkxe+uPy88p3tppz6ZZ6 at3a3PGSG2gihS2EsfrI3EKaSzTp8Cg8mNSO+KoSWX8mILMXP1XRPq8lwdPidbe24yXENF9GM8OL lWou2wai9cVQn+N/yjijj1sW9kt9DAQOMFv9bit7V6cajcLG0KHgG+H4TQbYqpfp38lba5ES6Xpy RxGK2S6jsrcxj1ozIkQCKZBt1ThUdxTfFUymvPykgsRS00qTTdTKpcvFb27QMkES3cbT0WjKnOMp 1ozrTriqmX/J2UQ/VNP0jUTfTwLws7W2uCJLkB4Xm4KfTD+mpVpKVIHemKqnl+D8uPMF3fW8Hlm1 huYiWu1urC2Uu6XDwv8AEA4do5rT4t9iF7jZVlenaJo2mNcPpthbWT3bmW6a2hjiMshJJeQoF5Nu dziqRfl1/wAcm+/7aFx/xrmFofpP9YtGn5H3sqzNb2K+Wf8AlMPNX+vaf8m3zC0/97k+DRj+uXwZ Vma3qZurYXC2xlQXLqXSAsOZQGhYL1oCeuKoTX9VtdI0S+1O6jaa3tIXlkhQKWcKPsKHKrVunxED xOKsJP5s+V7XUHs7vSLqzuLeYQ6jIIEljg4pdvzMkBl5Ki6c5J/ZHuGAVTKx/MfyBdWdze2zk2dl avd3Fx9VlWNbeMVqG4b8g3wqNzv3BxVAn86vJsErI0N5Bp1vHI0l61rIIU9KO0dVooJAP6QjUVA+ LboQcVRf/K2vLbySwwRXUs8crQ+l6EqsGjlSGX1eS/uuEsgT4+p6YqhrH87vJNzEXlN1ayektx9X lt5DKsL2qXaySBA6opjc0PLjt16YqzyCaOeCOaI8o5VDofFWFRiq/FWNfmR/yheo/wDPH/k/HmHr /wC5l8PvDTqPoLvy3/5QvTv+e3/J+THQf3Mfj95XT/QEJ5u86eZND1Nbew8szavYlLet5DLIpEtw biqcFglXjGLZeTc68pEHGh5ZmNzHtL/Mn8xgwsr7yVNNfRq0k1xHJPFAyeg1yAnK1PxUHooGPxSC hI64qrW/5pecpLdbiXyTcQROrcXaS7YK63EUP7xUsWkCMk3NSqM3wkcaAtiqaeUvzA17WtZTT9Q8 r3WkRSJNIl1K0jqPR4DjJWGNVZi5H2uqn2xVN/zA/wCUP1L/AFE/5OLmLrf7qTTn+gpvpn/HNtP+ MMf/ABAZfj+ke5sjyCJybJjXn/8A44sH/MZbf8nMw9b9A/rBpz/T8WS5mNyVa75W0LXRH+lLb12i Ro4ZFkkidFkeORuDxMjKS0Cbg12p0JxVDT+RfKc9rZWr6egt9OnkurKNHkjEcsshlkI4MtVZ2JKn 4fbFUkh/JfyJDql3fLbT+neRNFLYCd1twXpWRQpWQP8Aaoee3JqUriqY/wDKsvI4QqNNoWdZTKJp xJ6iRGFZPUEnPnwY/FWtd613xVUh/L3yxDb2tuluRFaXlzfooPHlLeSPNIH4gVT1HBUduKjoKYqi P8E+V1t7S3isFt47J4JIPq7yQNytRSH1HiZGlCdhIWGKojS/LWk6XdNcWMZhrbx2qwg1RUjllmLC vxc3edi7Emu2KppirFfy6/45N9/20Lj/AI1zC0P0n+sWjT8j72VZmt7FfLP/ACmHmr/XtP8Ak2+Y Wn/vcnwaMf1y+DKszW9TN1bC4W2MqC5dS6QFhzKA0LBetAT1xVC67e6dZaLfXepp6mnwwSNdxcPV 5xhTyT0wDz5DbjTfpirD31r8pdOX0dQ0+x0i5s4XhksZ7OISQRFT6sX7lZI+PC75OEYgLLU7Puqs 0nzn+U6pd2Udva6W9wZY7nT5LRInni+sm2LtFGrcklkfo/xUYFlFcVbg8x/kpcW5MA0qWG5iRCqW isJI/WEMUXERfEWlt1EcfU8BxGwxVEza5+UUsMusSjTJUsb6WCS8NsjtFeFTNO3L0ywPFC7ydKAk nY4qg7rUvyOTlFdWmkL6LTIyS2CDgyRgyLRotmaKMUHV1A41GKsj0Xzz5O1S5t9P0jUI7mSWL1La OFH4GJY0kqrBeFAkqd+9MVZBirGvzI/5QvUf+eP/ACfjzD1/9zL4feGnUfQXflv/AMoXp3/Pb/k/ JjoP7mPx+8rp/oDJczG5hmtfmK2k+Y5tKfTJ7yJHtIUNtwMga5dUMjK7LVOU0arTeofwxVC3P5mX aXlvEdLaxWf6mRbXzcL0fWNShsZA9unIL8M/OMh25dKDFWe4qx/8wP8AlD9S/wBRP+Ti5i63+6k0 5/oKb6Z/xzbT/jDH/wAQGX4/pHubI8gicmyY1+YAf9BRsqNJ6d1A7Kilm4q9TsMw9b9HxDTn+n4u /wAf6L/vi8/6RpMfzsO6XyXx4+bv8f6L/vi8/wCkaTH87Dul8l8ePm7/AB/ov++Lz/pGkx/Ow7pf JfHj5u/x/ov++Lz/AKRpMfzsO6XyXx4+anF+Y/l+bn6Ud2/psY5ONu54uvVT7jLcmfgriEhxCxt0 KBqInvVP8f6L/vi8/wCkaTKvzsO6XyT48fN3+P8ARf8AfF5/0jSY/nYd0vkvjx83f4/0X/fF5/0j SY/nYd0vkvjx83f4/wBF/wB8Xn/SNJj+dh3S+S+PHzU/y6WT9DXTvG8fqXs0iq6lTxbiQaHBofoP 9Yo0/wBPxZTma3sV8s/8ph5q/wBe0/5NvmFp/wC9yfBox/XL4MqzNb1piiMglKKZAOIeg5AHtXFV K+sbO/sp7G9hW4s7qNobiCQVR43HFlYeBBxVJD+Xfkss7vpccjyo8cjyNJIxWVFjkqXZjV0RVY9S FWv2Riqunkryql4bxdNiFwwAZvio1JPVBZK8See9SK9umKqH/KvfJn1eO3/RcfpxCMQ1aQsnoO8k XB+XJfTeRmSh+Htiq26/LryjcWcdi9iFsUunvXtFdxFJJJDJA4dST8DJO1VFAe+1aqoaP8qPIEc7 SLpMfBkKCAljGpZShdVrs/BuAb9kbLTFU3tPKfl60ntbi3slSayLNavyc8GeIQs27GrGNePI70+e KptirGvzI/5QvUf+eP8AyfjzD1/9zL4feGnUfQVD8vL+xi8n6fHLcRRuPWqjOoIrO56E5HQziMQs 9/3sdPIcAZH+k9N/5a4f+Rif1zL8SPeG7iHesF7o4macT24mdVR5Q6cmVCxVS1akKXag9zj4ke8L xDvdLe6NMqrNPbyKrK6h3RgHQ8lYVPVSKg4+JHvC8Q71/wCk9N/5a4f+Rif1x8SPeF4h3pB58v7G TylqKR3MTuyJRVdST+8XsDmNrJxOKW7VnkOAp/pn/HNtP+MMf/EBmTj+ke5tjyCJybJ2KuxV2Kux V2KsZ8j/APS//wC2zd/8aZsu0v8AJ/8ACo/pcfT/AMX9YsmzWuQ7FXYq7FXYq7FWK+Wf+Uw81f69 p/ybfMLT/wB7k+DRj+uXwZVma3uxV2KuxV2KuxV2KuxV2KuxVjX5kf8AKF6j/wA8f+T8eYev/uZf D7w06j6Cif8ABHlP/q2Q/cf65P8AKYv5oT4MO53+CPKf/Vsh+4/1x/KYv5oXwYdzv8EeU/8Aq2Q/ cf64/lMX80L4MO53+CPKf/Vsh+4/1x/KYv5oXwYdzv8ABHlP/q2Q/cf64/lMX80L4MO53+CPKf8A 1bIfuP8AXH8pi/mhfBh3J3GiRosaDiiAKqjsBsBmQBTa3hV2KuxV2KuxV2KsZ8j/APS//wC2zd/8 aZsu0v8AJ/8ACo/pcfT/AMX9YsmzWuQ7FXYq7FXYq7FWMTeVNaTWL/UdM1v6j+kGRpYvqsc392vF fidvc9AMwzpp8ZlGVcXlbQcUrJBq/Jd+gfOn/U0/9OEH9cfBzf6p/sQngn/O+x36B86f9TT/ANOE H9cfBzf6p/sQvBP+d9jv0D50/wCpp/6cIP64+Dm/1T/YheCf877HfoHzp/1NP/ThB/XHwc3+qf7E LwT/AJ32O/QPnT/qaf8Apwg/rj4Ob/VP9iF4J/zvsd+gfOn/AFNP/ThB/XHwc3+qf7ELwT/nfY79 A+dP+pp/6cIP64+Dm/1T/YheCf8AO+x36B86f9TT/wBOEH9cfBzf6p/sQvBP+d9jv0D50/6mn/pw g/rj4Ob/AFT/AGIXgn/O+x36B86f9TT/ANOEH9cfBzf6p/sQvBP+d9iG1Lyf5n1Kyksr3zJ6trLx 9SP6lEteLBhurKeqjvkMmlyTHDKe39UMZYpSFGX2MuzPch2KuxV2KuxV2KuxV2KuxV2KuxV2Kqdw 8qW8rwx+rMqM0cVePNgKheW9KnauSgASATQQeTAvys8xz6pda5ENPkt4vrs11PLIwHCWYqBb8abs oRix7bbb5vu29IMUcZ4gTwgDzA/icLR5TIy263+x6DnPuc7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq 7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FWAeTvKlhqvluzv7u4uzcTepzK3DqPhlZBtXwXNXpdNGe MSJN+/zcTDiEogm06/wBov8Av+8/6SZMyPyUO+XzbfAj5u/wBov+/wC8/wCkmTH8lDvl818CPm7/ AABov+/7z/pJkx/JQ75fNfAj5u/wBov+/wC8/wCkmTH8lDvl818CPmpxflx5fh5+k93H6jGSTjcO OTnqxp1JplmTT8dcUpHhFDfp3IGniO9U/wAAaL/v+8/6SZMr/JQ75fNPgR83f4A0X/f95/0kyY/k od8vmvgR83f4A0X/AH/ef9JMmP5KHfL5r4EfN3+ANF/3/ef9JMmP5KHfL5r4EfNT/LppP0NdI8jy enezRqzsWPFeIAqcGh+g/wBYo0/0/FlOZre7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7 FXYq7FXYqxr8t/8AlC9O/wCe3/J+TMPQf3Mfj95adP8AQE21fXtI0aO3l1O6S0juphbQPJWjSsrO FqAafCjGp2oMzG5v9PaHSv6RtacGlr60f92pKs/X7IYUJxVp9f0NEnY38B+rJJLOqSK7KkQrIxVS W+EddsVaufMOhW0XqzX8AT1fQFJFYmWoUoApJ5AuKjt3xVq48yaBbyW8c2oW6PdlhbgyKeQjiad2 qDsqxoWLHb7xiqyTzR5fjkuojfRNJZpbyXKIeZQXjslt9itTKykKBufpGKq02u6NDpc2qvew/o23 Vnmu1dWjUJ9r4lJ6eGKoW084eWbzU20u11CKa/VgjQLUnkyPIBWlN1ifv+y3dTiqcYqxX8uv+OTf f9tC4/41zC0P0n+sWjT8j72VZmt7sVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVd irGvy3/5QvTv+e3/ACfkzD0H9zH4/eWnT/QEy1zy3pGuGwGpwi4j065F5BE1ChmEMkKlgQa8VmJH vQ5mNzz+b8jfJGn2rTX+q3UemWcMJWW5lgX05re4nnW5lnkjoxDXbDi/wH9oMaUVTrTfys8nCyDa fPKbK7sLqzLWzwLHPBqDNI0hMMah+IlIi/YUUouKqNp+WnkO81AX2n3rSzRyvNMLeW3lDGZ4pyG+ B2WrRI1VKk9yRiqBg/JTyHcD0ItRu7lrVIo3UTW8jL6CSR2xekRp6SyuFB2f9sPTFU40v8qNC0i1 u4dLnmha6bT3UusJSM6XcG5h4xxJCKGVmLDpQ8V4qAAqg4Py2/LjT7pNHtrs2esy2npWkKXSpcC3 +qCyd47U/un5RQtydoW+LkfkqitG/KfRNI8wx67bXdzLd+ulzcmcxkzSx2txaozGJIgaJeSH4gaf Cq8VULirN8VYr+XX/HJvv+2hcf8AGuYWh+k/1i0afkfeyrM1vdirsVdirsVdirsVdirsVdirsVdi rsVdirsVdirsVdirsVdirsVY1+W//KF6d/z2/wCT8mYeg/uY/H7y06f6Anl/qmnaesL31zFarPKs ELTOEDyuCVRSaVY8TtmY3JTr1nZeYtNtEtNTS3dLiC9s7hPTlVniIljrG+zj4gw96HFWCR/klYpB Hb6h5lkuBbtbM0TRQpGGgiCSngxahmqhqT8IC8d6HFWn/JTyrb20EsmuHhbw2yXFzKsDtJb273DI rOegZJwoI7xqw+IAhV0P5H+VFsfq1rrTwWyBS8Fv6cMLuIoLdWmSIoW5CKTmOQ5NKx2NMVT2/wDI vlS5ttJ0uXVVgOkg3FlHHJGsnGC4t+MtHLk+lFEtuXNdn33IxVMvLHlny9pskN/aXzXdtZJPp9j6 zJxhk+sv9cIICAyPIgjY02EYA6tVVP73W9Isb20sry7jgu79jHZQu1GlcCpVPE0xVG4qxX8uv+OT ff8AbQuP+NcwtD9J/rFo0/I+9lWZre7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7 FXYqxr8t/wDlC9O/57f8n5Mw9B/cx+P3lp0/0BA/mFpXkzzKbby3r97LbzCt5biAlDE7JJBFO0xR 443Vmb0eZHJxsGpTMxuYvaeR/wAmodd9ZNfie/hka7SEXlqnpepdxsOLRJG6kXCBFfl6m5Xka4qh tV/K78rL83l3D5oSOaa5uJbieS4sbhQNRdlmicyJyYMOaxeozcW3+KhBVX6L+XX5M6rLcw2OsJqk 12TGCZLSQ+pHZvbymBhCPiEUrOwTZCQQFFBiqnp35dfkvq6Wl7aaqw5yCfjK0Fu91xhhgk9aKeBH kSXgkjfDQu5Zachiq5fys/KC/tJLWDXGjEiG3npLZJK4ijNo7ESwVPMWztzAozD1FNQrBVkc3kr8 vtU0V9JfVUurGynkv5wstmShNs0bmZlj+yFuGl5PurMCCFCgKoi10/8ALry7FBrdreFrCxWWaEJN LfxI10EWW7NPrEtfTQL6hbiiFugZqqs4xViv5df8cm+/7aFx/wAa5haH6T/WLRp+R97KszW92Kux V2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxVjX5b/8oXp3/Pb/AJPyZh6D+5j8fvLT p/oCh5w/LfQ/NVzHPqUk1USOEoj8R6Kzx3DopXiy+o0Kqxr06ANvmY3MCtk/I28udOkTVZJDck6n As5/ccbe4u4y8iSx+jGvK7uBxoKirduWKomby5+RiJaLDqsM0pmsYLeG1u45JpWkQWkEfFKswljd Y27lRSvWqqJ02b8odE1KaGLWZLa5mN3pFz60rIqtYQRW8od2VeBREjVXqOTN+0xxVAQeXf8AnH0z 3NhBfWqW8lsVfhcRpCqQXz3JWCXZ0PrepURNQrX2xVdceU/yHurm709tThL3ohv5bmO7jXgxtnii dblaMpeIGQDlT4eXzVZEmlflTosWqaHJrFtZwXcTR3mnz3sUaxwXEUScFRivFWiRB/lA71riqvYe RfI+rRyPp/1uE2d6Y7xw80frTWj1KTRzDjJGJKH7PEkA9hirO8VYr+XX/HJvv+2hcf8AGuYWh+k/ 1i0afkfeyrM1vdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVY1+W//KF6d/z2 /wCT8mYeg/uY/H7y06f6AyR0V0ZGrxYFTQkGh22I3GZjc8o1DQPyT07V7jy9daWtsIBHJcyLcOkf O7RoUgKJOJ39eNyhURlH5BDUkLiqywb/AJx/sb61uLO6VLxJjeQItxqDvJLPJFMrtEXb1fVkjiki DqQ5AZAeuKqVxD/zj1ctdvfRxRTXlJ7x5JLurGMC7RjNG7Jy4wrIAHr26sQVURNP+RFw3qeuzmWS O79eGbUR6Tt9Y1OKVZI2Ho0YzyDiRxYkbE0xVV0bRfyYEV3f6fBPa2mkMipcrc38aIWh5obZFl5J x/SbBQqqQ7txFd8VTqe+/LF7PVPN6wxaoLe2OqXkio9w/pNBGRIkUnwqWhto+gGy703xVlGhaLp2 kWTQWEc0cM8sl1Ik8007+rO3qSEtM8rCrEkgGlcVTHFWK/l1/wAcm+/7aFx/xrmFofpP9YtGn5H3 sqzNb3Yq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FWNflv/wAoXp3/AD2/5PyZ h6D+5j8fvLTp/oDJczG5IdU8jeV9TnnuLyyElxcSR3DylnNLiFBHFOiklFljVRxcLUUxVR0v8uPJ WlmE2OlxxNAkMatyclhbSieHnVvjMcgDKWrTtirZ/LnyObVLX9DW/oR+n6aAEcTAJBEVINQU9eTi RuK4qqSeQfJ8go+mRE0oWq4Y7ytUsG5EhrmQg12Jr4YqrWXk7yzY2n1OzsI4Latu3pIWA5WbK8Dd eqMimvsK4q5/Jvld9Jm0gabDFp1xGkNxBCvoiSOIURHaPizADahOKpwqhVCjoBQd+mKt4qxX8uv+ OTff9tC4/wCNcwtD9J/rFo0/I+9lWZre7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXY q7FXYq898oeb7PSPLtpp91ZXpng9TmUgJX45WcUJI7Nmq0uqEMYiRKx5OJiyiMQCCnH/ACsXSf8A lh1D/pH/AObsyPz0e6XybPzA7i7/AJWLpP8Ayw6h/wBI/wDzdj+ej3S+S/mB3F3/ACsXSf8Alh1D /pH/AObsfz0e6XyX8wO4u/5WLpP/ACw6h/0j/wDN2P56PdL5L+YHcXf8rF0n/lh1D/pH/wCbsfz0 e6XyX8wO4u/5WLpP/LDqH/SP/wA3Y/no90vkv5gdxd/ysXSf+WHUP+kf/m7H89Hul8l/MDuLv+Vi 6T/yw6h/0j/83Y/no90vkv5gdxd/ysXSf+WHUP8ApH/5ux/PR7pfJfzA7i78uRJ+hbp3jeL1b2aR VkUq3FuJGxx0H0H+sV0/0/FlWZre7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq//2Q== + + + + + + application/postscript + + + xmp.did:7A0663F6F15FE81192E8D15FACB9A0D6 + xmp.iid:7A0663F6F15FE81192E8D15FACB9A0D6 + uuid:4a21fb0a-ba1d-462a-a79a-936a32156688 + proof:pdf + + xmp.iid:790663F6F15FE81192E8D15FACB9A0D6 + xmp.did:790663F6F15FE81192E8D15FACB9A0D6 + uuid:4a21fb0a-ba1d-462a-a79a-936a32156688 + proof:pdf + + + + + saved + xmp.iid:790663F6F15FE81192E8D15FACB9A0D6 + 2018-05-25T15:27:46+05:30 + Adobe Illustrator CS5.1 + / + + + saved + xmp.iid:7A0663F6F15FE81192E8D15FACB9A0D6 + 2018-05-25T15:28:03+05:30 + Adobe Illustrator CS5.1 + / + + + + + + TouchUp 7.0 + + + 1 + False + False + + 51.000000 + 66.000000 + Picas + + + + Cyan + Magenta + Yellow + Black + + + + + + Default Swatch Group + 0 + + + + + + + +endstream endobj 99 0 obj <>stream +HWko_1 +d%+1 (~f7fk`ňI𕙡dwNmQHs{q?piN'iXD?FlvrrN#c1-fޭh0խAuO|}DIt I8\\<ԼXZ<0  h;$FO XBɔ엪Q%YeEDZf? = yҡ`%V$ N]8,?OPJ9F0{CIl} x>h&쎧{˪R㭤ϴ*j%sqяpE 1&=ҪLEmJ-nC!P4$-Ս:CMx$fW&RL<ňLo,wٙKAFfK+=a'1yh O]^CrYb7/29KcDE}Z-Sa ~Я1 :"0+َ^(T˸:2 oR CuMޫ #2XOҎzs a;g)yiu0}|֧F V8XM(2VIyOa؁cSUr쫜FIx fAtR&m[߀ ʷFxpubX rVZ,ë;~Zp%F(L TKZc76[ډ5H 1K6m7=n\n!ͯ@sҊ.X\4Uc;SmӐ}UwEN}|2T8`$xQfR`>WJ5Dg5};jiG. TA J#'ƳL:;G " ,ަ/U!̾+\; y!E tT\Ak@R4ى&M޴E}V[#Ŭ&C0ڰ,dt T W-)+wDs>pkP:thBu$hh |m˲\Eqkt'u&hې賂G᢯E54vA[.(zu9uB4GHWϱss\V'$,FÞ {O>A{U%rKi5`&/z98$lGG]ZLW>7-Ml>ZŠ 6U{;4 +o +fsz햯@dF#%-[Eq oG=@[XHG36OzHrжh@*CnӣV$ DLvPv!S5Pfթ]]:Ӱ2:ơVݨBAsL p_zް#2`)kw&Xb<HN%A u!2Ї;ʝԅ,녯P6at{jvrw˥oN|uuJq\C-p^__w7l?wdX\qPG۰@VĤf<=y;ۙcj^HQ&jQ"%63>2U8IcIW3nڦ/Y$ ~Hfpٲ'z NRm2 d ۈ)f]AC:"@Gl=&=Ri#.B-B'ZJ#>#݆yGCʬ"drp'w{#,Ý9 ƋleW5<Oyj9Pna13Ɗ}IKA|_?n; :"+N/ҫm#+@(!Np-z/XL(R%e'ffRtъ̷OMԲN}@G'ڣ{l`fH^{[K +WE6黁ZsVt6>ۮY]5Z["hK23Pnd 8 h pN$陳& $/Isx+4yws{M+oԸ^u;1sju]D/RF4:5K?KיЛ፧PՄ.19a{^QZ$pkg?Y Tgɷu&E7$ rCMwٍ]Kaә=坉%(qMS#a +Ic0w8t!cf9@S(IUnpL) e7FAĞ+[0#+S=& ]?1|r 0f1"亯=7 +ӗp;0.JVIxX{騁TQVE]y鑳CьM- ?P]ې&+*(ҞugE}(1&V^dB./zݕX)떥FF?W+ݔ:>z .M xܵX B `W{F< 7YK +8k4o@G\gPr˶rFq?רŐH{>stream +HWmoۼ_/+q$-^Ig&ޞ0mD~ԋt!@d{xzy?vsA8g N㧭|c1b [Y8aI>{` n)KL$t^m LYAF&wxmӛ^'Uͳ^zݻ0 _fo. `ۏHkEnlLAͧKI2m&l|]&.9^6;Y6/œpNluώ63?[t!V;*s"ak! +(YmO2Ŗj7{%h՛APx0Z…7+o eA) .T<| +Àg6ɜXq%hv;atfwXjLm3deNnwlvr\VY *tOX3&wH + ~ʾ{b~6$ȳ;LD-?N,t8YKvUEr7b2f#t^K2z.,|˶ThS^y +b`yX,+A;sAw*2"CnQ Nn'=7P= ?\nWuZrcZű(Wp_C7tLnH>+dҙ]+ +@k\z)Xk[y`[?rʓ0jBIRC&LLJf/.V;-;8Eűkeal`Q[#cjyؠ9{#z%p`mj$ΓPxI2*f*P-ˎj#X YOMcsRAg Y8'174vjivf{aL +8ȢXu䬓*NB+M0nz?j~;dɎkJ&}nk<WTM/{zs+L8 }Yo; )&PZ a|̂Z#MO[\YShZ|BMW;aljjmQp\R˜kY:UfмDMVi.ܲ6&8ʼn9fxB&}&ON!TO;\\#OxꇫKv-ɫ=sQ ~vEP8+RiԻ VRPM*Nӭ?t_!ȴ, ~b}rCQnkڵ!>*\mGw GQlVtvP(Y"7.ckˍ,ewÈC?t=2Z|r p-(I#ɻ͟a=R jGrÐFU1 [mi*wso-RAf|D%E3:}kXC@֧m[ +^+_1zM  +;4@AK"T2 Qr}gI%!v,H~Z!Ç0PJ HYU V-F?l5ȉ +{OP.=VNr}1pt[b +:=Psg|%k}ľ MohrjϸfJ&iW wf(Q6qh!H&]mpu!MF5-FgJ 06QTXv�s 6\Y .7ZS@@m5PʁEPwAMO FP`=LBb1ebBLį D}.@|ku\/PE4̲hDli{ID%짦V:9ySjuR"˥Amx̥rjvN> bvW 'Qj4l9O`%~^H5u>R b]Oeٍ<[-.wQJJBOy_àsCdcx'5.lRT%٫=L *ZN.ħ^SƦ#>NÀ̝dnGŘAMv,JnrrDͩ-tah(Bʓ؂ܱǻ͗0ٳՓJf{Qm"{ݲȘn|?(A=I74yznW<ɺa/d/!ʮ6H< a7z r :kwcy(1軨q"=oN C9 ZcbOn.$k%\WZg6Żo鏥@]2ىtCƛjfX0} lOoH?#)%O8FvxI*a5IK6bЭV?/[y"e?CYIh¢}TqKќj7dz>stream +HWko. a?]ȅEqdpd6iVj/(hPBRu}.)M[9<Μݙ/1[,Y$fG6O;#g*a&Rc%wz(,'NN{TT+DI5"<Ѥfn߬&sr'wWd|uزU9a<H*!j72]E֗[4]SwSSˬb{٧Kږھ̫qSv}[bŦhY-X_5wNxN?V'3݌| <꽺gp*ه2/`/A{GM{?/n~89v--ùX #h䕉l⒵žm6\cY9N/BǦƎem=;xzs]W65rhyӖe>B:틼++aCW?ߌƃד)yy_*ϋcX:tQWmE=;t:len,gl}]蟧U`jL|2O)"ɌU44#έsm$'6mIj4HiF% +pOOY j5cΉ+̳o-a},c*ߒ}j1O.ߚ*b\F%ngW?dժ8~5{s`"pl#WЦ]O;X%Gh$T[[Te]t/i7yw?{WV*Ϩmvn2ݥMYDR*cӞȁF](5ʞZ#z zf3ia$HH~d)`z|1;6;W9;3vzAEЪ$P3iX)G jQ +$2lS Z#8V,QL z{awSLQb:ANMX ɍbbGD0TM܃:S/gԜ kd*0%'^QbH,BCs֩_U +=Q֛pA"hd{;2Ϗg1rhB>G)FlJ^g5 I $a傏y]9$J$Q$<ӆQ=h "lXJ%:fD9 ZAuGq_7$BJ=S@+Iqؒȸ.,QT*%hzpIDR lQ|@& ]^hCFɕ5᫊uHd'2uO 2IFRnQ-Ʃ%K5+p\&A-]Hf8|\*`'1.Bl9{|$tKN;Kr*$թ͔SXu;#DƹBCG0zޢqV6%\z@X wd%ƎK)FD4H#UX +&( Hnt.4DDܹh=FQhb0qR8X9p #v,,?t z'2N%Jr!#*눕7FxϠYz R_$#u\l:KĮ 3Bh1 +G3 x:9/\I@E׊ e,"cc1G5CiQSՏخI!m`h?6u8\&Z LQq{&*]*"i3qxNC#ߡRlx+a< WPwF1t?ҵK/.9ƟE;-ƛC}]ˏ74TE){˭!C-Wu:qw2xl!_ƥ%amy]aA`EJF3f2pE鞎blqsǦ"J^ϗoI<-ngYs)Xz 5 +5^://>M$6c^g&/ XZOK`V]Fp/Y][gTa]OMlp] C=ˠHZڇ*Zn@fW6Usй)P> RHs6O%=T{lq6~} Ĉ_J:Eep!'.=Lol7l2-~,L[_pzdM33xYEu>P /AǔƎ$D_a z4a {Ⱥ6fTÊAŎKxxSesA9;So%2b7}IZYh EAmUv[f`n_%l:(7c]r#-o.ewO=)t4|(|#=@e{ϟb7yHPVWmk]_qUvf$n(Ph "d$ƒ㶿;3{Α| A;gwgW?\篮v//jɆַCíwpC~:zWy@1b`.L^ ޽x3`0T7wWwAu>|ÏկQ^}/._տŋϟ?/wgOc09bgGKxٙbfNpcea4W7Ҟ0.oGnu51~toϬC󿼒ݛ3Z +3z?̦".E%4xg4 .u1iNj1:Ll0F ^o\ܣM˿.^mGWc$nFC5CrĆo!Xr}-X'Oe57[0VIaF15NB6_U 7"o^˘PnBgHKro' vwXwg#f@W,VmcUʅQ&fb#;ŰL9m1y}M= ͙f {D*VP8ULbOķc.ӪC22<<[J]dvzs/O_#,qs9IjATï-R+҆$a:E F2礭\T 8BŠLIw)וm Ϩa%!t9WgFN`s؜6}J0a.y';A N@g!7 2.}`!@ t{!,g#D-U34 +|=3^y%DA5)`YπDw~tN%E#duFY268 313pN`DrCPH}Hym^։þX.Ǚ$-jFCLcRHS2%z⡈ȚQOH[ 9w$K(Ay4i-c)ds?99cT>xNaiS,mDh&pѯm%Әv* hjʲfY^l`($a] w4uQ1h 7_$c y/x~c#`5-3pG>5poF^38̈́swmqy#~3Rp\"'đs?u'D~"Vo &ux)2P? DNa3؟3N n^up-KL[=$1)GW*X3 +́jCar;eRcxȈy{#|D;+RoΞ:rع9K`QPW +G5nE6P9/ţu +lhC'Y4Z]( +7}W:ӽBS\o|8|8t6r>jЋZQ%ezEZFJRuJLvT먭bUwD|Ť*70evʩngXhhQn +v4)pDm{@-,PiX̷мBu:cS^95)Uͦ#Y벣h,_L ^zRM[ +"^bjf m%+38qf YS=\g Pi3v"48ݷ5Aыk"ya7{|]ex38Le\9~8{&SS{ˎIYhz$=$ bl\Lo&܋-bs4 4Nqhr64z{nXرdnf5Hs*/;'' eXq)"^WQIZwك/diȖx\$ 3ze灟#9p0mZIsMki5`>8G +ޏT_ 3UXxڹz!xֶ,k 0kb +kB!P,.c +䄩Ń RĀǴas[ +aLWwi-rdH=\~y 9U;U@q!d:9# ؉9SvNگhDG@O)XF]>VE8x75iiŅP1ٻ w Yi~&_m$]GyVKig9-ZjKPG<_>4$CJU,@>Qy1xG^|G^|Ő"EGGcuRaF@ƉI tt18K؂Jeo%0|FlBd|Ke=Tɰ)rM6NqI*^04QVV ʲu)V$eHǦ(W?s.D <޲V%/0w'2<+41Dd5 p |Ja>i@.F8ndQNh!:E +%єyfrH +X`Xw!Mt(z1'BlfyN3Oɟt];K9f +h29nq} {MDSMԫw+!UlVYtRaZG|U9WM[TscY767]9[U|vݟU^>o[h۫s%h}AoHϨHJC3ڸ͞'C.:ל N/j!Y=g3ƵǺڒ e _7MQH:!~ٿr<[j!"@~8EԴVn+P7AU.y0Gt0A1!"F`\39`*nb] )' } kjZoxZ4"_*_ԓ^^(˂g6Wl+vHb@FHctE]c_֧b[[v=%C ~}>z/|P73"ڽ)R/W욺?C c 7&BL:YWmMWI_&oz0SuNղȱ+֒剶;W7Y6e/|; eK;r>)bMQ9}-&}[e+6?-/s:'~=em]J]Q +v%CQ9Ǣ3r +KV@cq+ݦkj6Ey?%/֢EF ,iAڵD, c{zzvϔXo~|{AyLuR] atŁ$)lb SVyW}'{;̩ J'GU.)Z]X\W $)P@p&|e֧`ܷ#ǮD?x =n>Bt+|tSNdza 3PUi=Kե]@pZ$)}6tzp/c8vCo*&mlDbO7|%*-vZ*Yxu8IX$(E>DdDҫb*E+(Bk& bD|<˲y{i;(xt>Fmf YnW$c24.*h(յEJ%wR[}Tl|q|#֬sȾp.N3 =~8途`6Y|;#f/kMWon7]X"A,=5_!jwlM]ǡa5toI@6)5dXѵUk3Ł{1Uğ_|, +endstream endobj 97 0 obj <>/Filter/FlateDecode/Height 42/Intent/RelativeColorimetric/Length 2126/Metadata 106 0 R/Name/X/Subtype/Image/Type/XObject/Width 118>>stream +HWyTͰ Gp(5,"Dk p 9 "aA ǚԴY=M=9iMsF)ToM;g߻}w{o=o.G۴p{왌|m ݫۙܦ¢SE\xy/2Q^kp>Niz\4`AY DʲF]Ţ ӏg)ny1Q;2x(֧(Qe9\{2Gݏl?8~e Юhpl(؄[޻əo1=k-rEe8sm56#dq߰i Nv#D7b90+ZIjb?"f1Ѭ/8H6J;HG]?cpvc3&cE7XE3@.?'p*#` +CBP=e\^kG4sruTч%sZ6RO :>H+bŸc6eS<ގ[Z܊_U] ȶxΣkqMd(8;y'B?ݣw\ж[BK2X][90tg-zGS7Uv!3?Ja{GXwR勃df|GN먐+iq2ש0v=#F/P鈽9L$/Ag]"^3K Kb6/Fc~VEg\ODCiֲ<`lU덠¦G [q![~R#ϗ`Abv m D)Afb4*ϛ/1+ ʴԆ`NU +ol# S;"A=b}F y]=:e,iW"ˮ}^1!y}0 p#Zin%ƴZ"c~LnyuR*Q~Mi 5s@:Q? =R62T6 @ł'tu {XNI o㕰E0ܠes~,`VY0oWCT53i dk~r^xYXיB?եȮYhƖ# JoEӍDLfvi1^nBMv_Vj[d>%˛E1t$BM)PrO; z2e^ao7L.~K!N֐$F7MS#%~?uy4:\/n +F3. 5kŲ'_*Q:ˆ )M\6O-=7ިȧ]U,="Ms^XO0_wPlƩ;աv.7jzQt8AU{U}`ɹ<6hړnNNj=QgeTR]}SQqYBU:0&* K&DW#5>pnk%WZ~|f P^{jA×!w[lE#~aEjYʗI5zs#A>⓮sZբ=S1t}Q=jݚqZ B5'Ԓ!\V.Z~>m+Vᅘ /Q+rLp( dn' +endstream endobj 106 0 obj <>stream + + + + + + + + + + + + + + + +endstream endobj 103 0 obj <>/Border[0 0 0]/H/N/Rect[414.3 689.103 510.836 680.976]/Subtype/Link/Type/Annot>> endobj 104 0 obj <>/Border[0 0 0]/H/N/Rect[414.3 672.103 505.082 663.976]/Subtype/Link/Type/Annot>> endobj 105 0 obj <>/Border[0 0 0]/H/N/Rect[315.064 99.462 371.223 90.174]/Subtype/Link/Type/Annot>> endobj 109 0 obj <> endobj 108 0 obj <> endobj 107 0 obj <> endobj 8 0 obj <> endobj 7 0 obj <> endobj 110 0 obj <> endobj xref +0 111 +0000000000 65535 f +0000000016 00000 n +0000000141 00000 n +0000240685 00000 n +0000000000 00000 f +0000240619 00000 n +0000240651 00000 n +0000749782 00000 n +0000749750 00000 n +0000240751 00000 n +0000240844 00000 n +0000240938 00000 n +0000407150 00000 n +0000407576 00000 n +0000407923 00000 n +0000408282 00000 n +0000408717 00000 n +0000390210 00000 n +0000390557 00000 n +0000390904 00000 n +0000391263 00000 n +0000391610 00000 n +0000241046 00000 n +0000241459 00000 n +0000241807 00000 n +0000242143 00000 n +0000242491 00000 n +0000242865 00000 n +0000243227 00000 n +0000309323 00000 n +0000280715 00000 n +0000280604 00000 n +0000390097 00000 n +0000379800 00000 n +0000247671 00000 n +0000248272 00000 n +0000379930 00000 n +0000248780 00000 n +0000293232 00000 n +0000311670 00000 n +0000305631 00000 n +0000249358 00000 n +0000301925 00000 n +0000298372 00000 n +0000288961 00000 n +0000288993 00000 n +0000283399 00000 n +0000283445 00000 n +0000243575 00000 n +0000274279 00000 n +0000274367 00000 n +0000274945 00000 n +0000269837 00000 n +0000270254 00000 n +0000259141 00000 n +0000259739 00000 n +0000249896 00000 n +0000250390 00000 n +0000250967 00000 n +0000260378 00000 n +0000270778 00000 n +0000275588 00000 n +0000280750 00000 n +0000288027 00000 n +0000288160 00000 n +0000288294 00000 n +0000288428 00000 n +0000288855 00000 n +0000288749 00000 n +0000288655 00000 n +0000288561 00000 n +0000297871 00000 n +0000298006 00000 n +0000293684 00000 n +0000294095 00000 n +0000294606 00000 n +0000298256 00000 n +0000298140 00000 n +0000377537 00000 n +0000385001 00000 n +0000385026 00000 n +0000380352 00000 n +0000380724 00000 n +0000381158 00000 n +0000385338 00000 n +0000385512 00000 n +0000385581 00000 n +0000385840 00000 n +0000385920 00000 n +0000403651 00000 n +0000395089 00000 n +0000400530 00000 n +0000397943 00000 n +0000395201 00000 n +0000391969 00000 n +0000736353 00000 n +0000736395 00000 n +0000744767 00000 n +0000732491 00000 n +0000728784 00000 n +0000412069 00000 n +0000696667 00000 n +0000409064 00000 n +0000749093 00000 n +0000749230 00000 n +0000749367 00000 n +0000747160 00000 n +0000749655 00000 n +0000749573 00000 n +0000749504 00000 n +0000751240 00000 n +trailer +<<66B8FACB229D9C45B561E767221A6EF4>]>> +startxref +751417 +%%EOF diff --git a/ingest.py b/ingest.py new file mode 100644 index 0000000000000000000000000000000000000000..fd83d440054f82ea3f17c5733182c0fb99d9780d --- /dev/null +++ b/ingest.py @@ -0,0 +1 @@ +import os from langchain_community.embeddings import SentenceTransformerEmbeddings from langchain_community.document_loaders import UnstructuredFileLoader, DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Qdrant embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings") loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredFileLoader) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=70) texts = text_splitter.split_documents(documents) url = "http://localhost:6333" # This is the same URL that must match Step 4d qdrant = Qdrant.from_documents( texts, embeddings, url=url, prefer_grpc=False, collection_name="vector_db" ) print("Vector DB Successfully Created!") \ No newline at end of file diff --git a/llama-cpp-python/.DS_Store b/llama-cpp-python/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..230034a855c0f23467283076479e222eff11c6df Binary files /dev/null and b/llama-cpp-python/.DS_Store differ diff --git a/llama-cpp-python/.dockerignore b/llama-cpp-python/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..fd64c09b37947c97e58903ce570785c657d56722 --- /dev/null +++ b/llama-cpp-python/.dockerignore @@ -0,0 +1,166 @@ +_skbuild/ + +.envrc + +models/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/llama-cpp-python/.github/ISSUE_TEMPLATE/bug_report.md b/llama-cpp-python/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000000000000000000000000000000000..eb0fb9662e690d0f9de4632cddd321b3f872a725 --- /dev/null +++ b/llama-cpp-python/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,96 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +# Prerequisites + +Please answer the following questions for yourself before submitting an issue. + +- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now. +- [ ] I carefully followed the [README.md](https://github.com/abetlen/llama-cpp-python/blob/main/README.md). +- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed). +- [ ] I reviewed the [Discussions](https://github.com/abetlen/llama-cpp-python/discussions), and have a new bug or useful enhancement to share. + +# Expected Behavior + +Please provide a detailed written description of what you were trying to do, and what you expected `llama-cpp-python` to do. + +# Current Behavior + +Please provide a detailed written description of what `llama-cpp-python` did, instead. + +# Environment and Context + +Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions. + +* Physical (or virtual) hardware you are using, e.g. for Linux: + +`$ lscpu` + +* Operating System, e.g. for Linux: + +`$ uname -a` + +* SDK version, e.g. for Linux: + +``` +$ python3 --version +$ make --version +$ g++ --version +``` + +# Failure Information (for bugs) + +Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. + +# Steps to Reproduce + +Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better. + +1. step 1 +2. step 2 +3. step 3 +4. etc. + +**Note: Many issues seem to be regarding functional or performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.** + +Try the following: + +1. `git clone https://github.com/abetlen/llama-cpp-python` +2. `cd llama-cpp-python` +3. `rm -rf _skbuild/` # delete any old builds +4. `python -m pip install .` +5. `cd ./vendor/llama.cpp` +6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp +7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues) + +# Failure Logs + +Please include any relevant log snippets or files. If it works under one configuration but not under another, please provide logs for both configurations and their corresponding outputs so it is easy to see where behavior changes. + +Also, please try to **avoid using screenshots** if at all possible. Instead, copy/paste the console output and use [Github's markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to cleanly format your logs for easy readability. + +Example environment info: +``` +llama-cpp-python$ git log | head -1 +commit 47b0aa6e957b93dbe2c29d53af16fbae2dd628f2 + +llama-cpp-python$ python3 --version +Python 3.10.10 + +llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette|numpy" +fastapi 0.95.0 +numpy 1.24.3 +sse-starlette 1.3.3 +uvicorn 0.21.1 + +llama-cpp-python/vendor/llama.cpp$ git log | head -3 +commit 66874d4fbcc7866377246efbcee938e8cc9c7d76 +Author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> +Date: Thu May 25 20:18:01 2023 -0600 +``` diff --git a/llama-cpp-python/.github/ISSUE_TEMPLATE/feature_request.md b/llama-cpp-python/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000000000000000000000000000000000000..bbcbbe7d61558adde3cbfd0c7a63a67c27ed6d30 --- /dev/null +++ b/llama-cpp-python/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/llama-cpp-python/.github/dependabot.yml b/llama-cpp-python/.github/dependabot.yml new file mode 100644 index 0000000000000000000000000000000000000000..c58c9ae570cfb83607ade62e596ccdd6e0599819 --- /dev/null +++ b/llama-cpp-python/.github/dependabot.yml @@ -0,0 +1,15 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/llama-cpp-python/.github/workflows/build-and-release.yaml b/llama-cpp-python/.github/workflows/build-and-release.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8fbd68f4ad8cb076c7aa7297c2fb39e315668b43 --- /dev/null +++ b/llama-cpp-python/.github/workflows/build-and-release.yaml @@ -0,0 +1,112 @@ +name: Build Release + +on: workflow_dispatch + +permissions: + contents: write + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, windows-2019, macos-11] + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + # Used to host cibuildwheel + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install -e .[all] + + - name: Build wheels + uses: pypa/cibuildwheel@v2.17.0 + env: + # disable repair + CIBW_REPAIR_WHEEL_COMMAND: "" + with: + package-dir: . + output-dir: wheelhouse + + - uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }} + path: ./wheelhouse/*.whl + + build_wheels_arm64: + name: Build arm64 wheels + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/arm64 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.17.0 + env: + CIBW_SKIP: "*musllinux* pp*" + CIBW_REPAIR_WHEEL_COMMAND: "" + CIBW_ARCHS: "aarch64" + CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*" + with: + output-dir: wheelhouse + + - name: Upload wheels as artifacts + uses: actions/upload-artifact@v4 + with: + name: wheels_arm64 + path: ./wheelhouse/*.whl + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python -m pip install --upgrade pip build + python -m pip install -e .[all] + - name: Build source distribution + run: | + python -m build --sdist + - uses: actions/upload-artifact@v4 + with: + name: sdist + path: ./dist/*.tar.gz + + release: + name: Release + needs: [build_wheels, build_wheels_arm64, build_sdist] + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: dist + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama-cpp-python/.github/workflows/build-docker.yaml b/llama-cpp-python/.github/workflows/build-docker.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ebe3bb6d716ef0c49990158094cf79fc0a0ee49 --- /dev/null +++ b/llama-cpp-python/.github/workflows/build-docker.yaml @@ -0,0 +1,50 @@ +name: Build Docker + +on: workflow_dispatch + +permissions: + contents: write + packages: write + +jobs: + docker: + name: Build and push Docker image + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: "recursive" + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + id: docker_build + uses: docker/build-push-action@v5 + with: + context: . + file: "docker/simple/Dockerfile" + push: ${{ startsWith(github.ref, 'refs/tags/') }} + pull: true + platforms: linux/amd64,linux/arm64 + tags: | + ghcr.io/abetlen/llama-cpp-python:latest + ghcr.io/abetlen/llama-cpp-python:${{ github.ref_name }} + build-args: | + BUILDKIT_INLINE_CACHE=1 + + - name: Publish to GitHub Tag + if: steps.docker_build.outputs.digest && startsWith(github.ref, 'refs/tags/') + run: | + echo "Docker image published for tag: ${{ github.ref_name }}" diff --git a/llama-cpp-python/.github/workflows/build-wheels-cuda.yaml b/llama-cpp-python/.github/workflows/build-wheels-cuda.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ae9e8632c0ad94287270b68ffb3f9b608464a222 --- /dev/null +++ b/llama-cpp-python/.github/workflows/build-wheels-cuda.yaml @@ -0,0 +1,131 @@ +name: Build Wheels (CUDA) + +on: workflow_dispatch + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = @('ubuntu-20.04', 'windows-latest') + 'pyver' = @("3.10", "3.11", "3.12") + 'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1") + 'releasetag' = @("basic") + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + defaults: + run: + shell: pwsh + env: + CUDAVER: ${{ matrix.cuda }} + AVXVER: ${{ matrix.releasetag }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.pyver }} + + - name: Setup Mamba + uses: conda-incubator/setup-miniconda@v3.0.4 + with: + activate-environment: "build" + python-version: ${{ matrix.pyver }} + miniforge-variant: Mambaforge + miniforge-version: latest + use-mamba: true + add-pip-as-python-dependency: true + auto-activate-base: false + + - name: VS Integration Cache + id: vs-integration-cache + if: runner.os == 'Windows' + uses: actions/cache@v4.0.2 + with: + path: ./MSBuildExtensions + key: cuda-${{ matrix.cuda }}-vs-integration + + - name: Get Visual Studio Integration + if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true' + run: | + if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER} + $links = (Invoke-RestMethod 'https://raw.githubusercontent.com/Jimver/cuda-toolkit/master/src/links/windows-links.ts').Trim().split().where({$_ -ne ''}) + for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}} + Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip' + & 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null + Remove-Item 'cudainstaller.zip' + + - name: Install Visual Studio Integration + if: runner.os == 'Windows' + run: | + $y = (gi '.\MSBuildExtensions').fullname + '\*' + (gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_}) + $cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_') + echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV + + - name: Install Dependencies + env: + MAMBA_DOWNLOAD_FAILFAST: "0" + MAMBA_NO_LOW_SPEED_LIMIT: "1" + run: | + $cudaVersion = $env:CUDAVER + mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion + python -m pip install build wheel + + - name: Build Wheel + run: | + $cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','') + $env:CUDA_PATH = $env:CONDA_PREFIX + $env:CUDA_HOME = $env:CONDA_PREFIX + $env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX + if ($IsLinux) { + $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH + } + $env:VERBOSE = '1' + $env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all' + $env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS" + if ($env:AVXVER -eq 'AVX') { + $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off' + } + if ($env:AVXVER -eq 'AVX512') { + $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on' + } + if ($env:AVXVER -eq 'basic') { + $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off' + } + python -m build --wheel + # write the build tag to the output + Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + # Set tag_name to -cu + tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama-cpp-python/.github/workflows/build-wheels-metal.yaml b/llama-cpp-python/.github/workflows/build-wheels-metal.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fc798c84e7c19fc1bf1616a4e6de562a984837ea --- /dev/null +++ b/llama-cpp-python/.github/workflows/build-wheels-metal.yaml @@ -0,0 +1,87 @@ +name: Build Wheels (Metal) + +on: workflow_dispatch + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = @('macos-11', 'macos-12', 'macos-13') + 'pyver' = @('3.10', '3.11', '3.12') + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: ${{ matrix.os }} Python ${{ matrix.pyver }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + env: + OSVER: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.pyver }} + + - name: Install Dependencies + run: | + python -m pip install build wheel cmake + + - name: Build Wheel + run: | + XCODE15PATH="/Applications/Xcode_15.0.app/Contents/Developer" + XCODE15BINPATH="${XCODE15PATH}/Toolchains/XcodeDefault.xctoolchain/usr/bin" + export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_METAL=on" + [[ "$OSVER" == "macos-13" ]] && export CC="${XCODE15BINPATH}/cc" && export CXX="${XCODE15BINPATH}/c++" && export MACOSX_DEPLOYMENT_TARGET="13.0" + [[ "$OSVER" == "macos-12" ]] && export MACOSX_DEPLOYMENT_TARGET="12.0" + [[ "$OSVER" == "macos-11" ]] && export MACOSX_DEPLOYMENT_TARGET="11.0" + + export CMAKE_OSX_ARCHITECTURES="arm64" && export ARCHFLAGS="-arch arm64" + VERBOSE=1 python -m build --wheel + + if [[ "$OSVER" == "macos-13" ]]; then + export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk" + export MACOSX_DEPLOYMENT_TARGET="14.0" + VERBOSE=1 python -m build --wheel + fi + + for file in ./dist/*.whl; do cp "$file" "${file/arm64.whl/aarch64.whl}"; done + + export CMAKE_OSX_ARCHITECTURES="x86_64" && export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_METAL=on" && export ARCHFLAGS="-arch x86_64" + VERBOSE=1 python -m build --wheel + + if [[ "$OSVER" == "macos-13" ]]; then + export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk" + export MACOSX_DEPLOYMENT_TARGET="14.0" + VERBOSE=1 python -m build --wheel + fi + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + # set release name to -metal + tag_name: ${{ github.ref_name }}-metal + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama-cpp-python/.github/workflows/generate-index-from-release.yaml b/llama-cpp-python/.github/workflows/generate-index-from-release.yaml new file mode 100644 index 0000000000000000000000000000000000000000..500c4613caccf38c9911e4c39d89866f20caff98 --- /dev/null +++ b/llama-cpp-python/.github/workflows/generate-index-from-release.yaml @@ -0,0 +1,50 @@ +name: Wheels Index + +on: + # Trigger on any new release + release: + types: [published] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v5 + - name: Build + run: | + ./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$' + ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$' + ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$' + ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$' + ./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' + ./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$' + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: 'index' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/llama-cpp-python/.github/workflows/publish-to-test.yaml b/llama-cpp-python/.github/workflows/publish-to-test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2bf0ea9ba4ace5af53521ae21d7e25afaedc8f5e --- /dev/null +++ b/llama-cpp-python/.github/workflows/publish-to-test.yaml @@ -0,0 +1,43 @@ +# Based on: https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ + +name: Publish to TestPyPI + +on: + workflow_dispatch: + inputs: + dev_version: + description: 'Dev version N' + required: true + + +jobs: + build-n-publish: + name: Build and publish + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Append Dev Version to __version__ + run: | + DEV_VERSION=${{ github.event.inputs.dev_version }} + CURRENT_VERSION=$(awk -F= '/__version__ =/ {print $2}' llama_cpp/__init__.py | tr -d ' "') + NEW_VERSION="${CURRENT_VERSION}.dev${DEV_VERSION}" + sed -i 's/__version__ = \".*\"/__version__ = \"'"${NEW_VERSION}"'\"/' llama_cpp/__init__.py + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip build + python3 -m pip install -e .[all] + - name: Build source distribution + run: | + python3 -m build --sdist + - name: Publish to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository-url: https://test.pypi.org/legacy/ diff --git a/llama-cpp-python/.github/workflows/publish.yaml b/llama-cpp-python/.github/workflows/publish.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bc4ec9063b6756d621b8390fe24303f021876d03 --- /dev/null +++ b/llama-cpp-python/.github/workflows/publish.yaml @@ -0,0 +1,32 @@ +name: Publish to PyPI + +# Based on: https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ + +on: workflow_dispatch + +jobs: + build-n-publish: + name: Build and publish + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip build + python3 -m pip install -e .[all] + - name: Build source distribution + run: | + python3 -m build --sdist + - name: Publish distribution to PyPI + # TODO: move to tag based releases + # if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/llama-cpp-python/.github/workflows/test-pypi.yaml b/llama-cpp-python/.github/workflows/test-pypi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa8e8fa0b0f67ee1fbf0b082443e6d5f28ab0354 --- /dev/null +++ b/llama-cpp-python/.github/workflows/test-pypi.yaml @@ -0,0 +1,64 @@ +name: Tests for PyPI package + +on: workflow_dispatch + +jobs: + build-linux: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install --verbose llama-cpp-python[all] + - name: Test with pytest + run: | + python3 -c "import llama_cpp" + + build-windows: + + runs-on: windows-latest + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install --verbose llama-cpp-python[all] + - name: Test with pytest + run: | + python3 -c "import llama_cpp" + + build-macos: + + runs-on: macos-latest + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install --verbose llama-cpp-python[all] + - name: Test with pytest + run: | + python3 -c "import llama_cpp" diff --git a/llama-cpp-python/.github/workflows/test.yaml b/llama-cpp-python/.github/workflows/test.yaml new file mode 100644 index 0000000000000000000000000000000000000000..292343c2bf6858c5daeeec7a54f615f01d57af10 --- /dev/null +++ b/llama-cpp-python/.github/workflows/test.yaml @@ -0,0 +1,126 @@ +name: Tests + +on: + pull_request: + branches: + - main + push: + branches: + - main + +jobs: + build-linux: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install .[all] -v + - name: Test with pytest + run: | + python3 -m pytest + + build-windows: + + runs-on: windows-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install .[all] -v + - name: Test with pytest + run: | + python3 -m pytest + + build-macos: + + runs-on: macos-13 + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + python3 -m pip install .[all] --verbose + - name: Test with pytest + run: | + python3 -m pytest + + # build-linux-opencl: + + # runs-on: ubuntu-latest + + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: "recursive" + # - name: Set up Python 3.8 + # uses: actions/setup-python@v5 + # with: + # python-version: "3.8" + # - name: Set up OpenCL & CLBlast + # run: | + # wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + # echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + # sudo apt-get update + # sudo apt-get install -y --no-install-recommends llvm intel-oneapi-runtime-opencl intel-oneapi-runtime-compilers libclblast-dev + # - name: Install dependencies + # run: | + # python3 -m pip install --upgrade pip + # CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install .[all] --verbose + # - name: Test with pytest + # run: | + # python3 -m pytest + + + build-macos-metal: + + runs-on: macos-13 + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + - name: Set up Python 3.8 + uses: actions/setup-python@v5 + with: + python-version: "3.8" + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install .[all] --verbose + - name: Test with pytest + run: | + python3 -m pytest diff --git a/llama-cpp-python/.gitignore b/llama-cpp-python/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9d68dbcd9d94bcd8c4fb1fb90891cd0003d07c32 --- /dev/null +++ b/llama-cpp-python/.gitignore @@ -0,0 +1,180 @@ +*.local + +.python-version + +.vscode/ + +_skbuild/ + +.envrc +.direnv + +models/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +llama_cpp/*.so +llama_cpp/*.dylib +llama_cpp/*.metal +llama_cpp/*.dll +llama_cpp/*.lib + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# downloaded model .bin files +docker/open_llama/*.bin diff --git a/llama-cpp-python/.gitmodules b/llama-cpp-python/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..7edf0975dc12ccc95ad14de085f07efe6d65c620 --- /dev/null +++ b/llama-cpp-python/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/llama.cpp"] + path = vendor/llama.cpp + url = https://github.com/ggerganov/llama.cpp.git diff --git a/llama-cpp-python/.readthedocs.yaml b/llama-cpp-python/.readthedocs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ff3e950cd1110fe552912cea4c268c4023d2b737 --- /dev/null +++ b/llama-cpp-python/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file for MkDocs projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +mkdocs: + configuration: mkdocs.yml + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +submodules: + include: all + recursive: true \ No newline at end of file diff --git a/llama-cpp-python/CHANGELOG.md b/llama-cpp-python/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..9b995509eb6175b04ca23cbe5b43c006e3b679ac --- /dev/null +++ b/llama-cpp-python/CHANGELOG.md @@ -0,0 +1,630 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.2.69] + +- feat: Update llama.cpp to ggerganov/llama.cpp@6ecf3189e00a1e8e737a78b6d10e1d7006e050a2 +- feat: Add llama-3-vision-alpha chat format by @abetlen in 31b1d95a6c19f5b615a3286069f181a415f872e8 +- fix: Change default verbose value of verbose in image chat format handlers to True to match Llama by @abetlen in 4f01c452b6c738dc56eacac3758119b12c57ea94 +- fix: Suppress all logs when verbose=False, use hardcoded fileno's to work in colab notebooks by @abetlen in f116175a5a7c84569c88cad231855c1e6e59ff6e +- fix: UTF-8 handling with grammars by @jsoma in #1415 + +## [0.2.68] + +- feat: Update llama.cpp to ggerganov/llama.cpp@77e15bec6217a39be59b9cc83d6b9afb6b0d8167 +- feat: Add option to enable flash_attn to Lllama params and ModelSettings by @abetlen in 22d77eefd2edaf0148f53374d0cac74d0e25d06e +- fix(ci): Fix build-and-release.yaml by @Smartappli in #1413 + +## [0.2.67] + +- fix: Ensure image renders before text in chat formats regardless of message content order by @abetlen in 3489ef09d3775f4a87fb7114f619e8ba9cb6b656 +- fix(ci): Fix bug in use of upload-artifact failing to merge multiple artifacts into a single release by @abetlen in d03f15bb73a1d520970357b702a9e7d4cc2a7a62 + +## [0.2.66] + +- feat: Update llama.cpp to ggerganov/llama.cpp@8843a98c2ba97a25e93319a104f9ddfaf83ce4c4 +- feat: Generic Chat Formats, Tool Calling, and Huggingface Pull Support for Multimodal Models (Obsidian, LLaVA1.6, Moondream) by @abetlen in #1147 +- ci(fix): Workflow actions updates and fix arm64 wheels not included in release by @Smartappli in #1392 +- ci: Add support for pre-built cuda 12.4.1 wheels by @Smartappli in #1388 +- feat: Add support for str type kv_overrides by @abetlen in a411612b385cef100d76145da1fbd02a7b7cc894 +- fix: Functionary bug fixes by @jeffrey-fong in #1385 +- examples: fix quantize example by @iyubondyrev in #1387 +- ci: Update dependabot.yml by @Smartappli in #1391 + +## [0.2.65] + +- feat: Update llama.cpp to ggerganov/llama.cpp@46e12c4692a37bdd31a0432fc5153d7d22bc7f72 +- feat: Allow for possibly non-pooled embeddings by @iamlemec in #1380 + +## [0.2.64] + +- feat: Update llama.cpp to ggerganov/llama.cpp@4e96a812b3ce7322a29a3008db2ed73d9087b176 +- feat: Add `llama-3` chat format by @andreabak in #1371 +- feat: Use new llama_token_is_eog in create_completions by @abetlen in d40a250ef3cfaa8224d12c83776a2f1de96ae3d1 +- feat(server): Provide ability to dynamically allocate all threads if desired using -1 by @sean-bailey in #1364 +- ci: Build arm64 wheels by @gaby in 611781f5319719a3d05fefccbbf0cc321742a026 +- fix: Update scikit-build-core build dependency avoid bug in 0.9.1 by @evelkey in #1370 + +## [0.2.63] + +- feat: Update llama.cpp to ggerganov/llama.cpp@0e4802b2ecbaab04b4f829fde4a3096ca19c84b5 +- feat: Add stopping_criteria to ChatFormatter, allow stopping on arbitrary token ids, fixes llama3 instruct by @abetlen in cc81afebf04d26ca1ac3cf72f23f18da6ab58588 + +## [0.2.62] + +- feat: Update llama.cpp to ggerganov/llama.cpp@3b8f1ec4b18770531d0b1d792f3edf08254e4f0c +- feat: update grammar schema converter to match llama.cpp by @themrzmaster in #1353 +- feat: add disable_ping_events flag by @khimaros in #1257 +- feat: Make saved state more compact on-disk by @tc-wolf in #1296 +- feat: Use all available CPUs for batch processing by @ddh0 in #1345 + +## [0.2.61] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ba5e134e073ec6837078c874aba44a702944a676 +- fix: pass correct type to chat handlers for chat completion logprobs by @abetlen in bb65b4d76411112c6fb0bf759efd746f99ef3c6b +- feat: Add support for yaml based server configs by @abetlen in 060bfa64d529ade2af9b1f4e207a3937bbc4138f +- feat: Add typechecking for ctypes structure attributes by @abetlen in 1347e1d050fc5a9a32ffe0bb3e22858da28003bd + +## [0.2.60] + +- feat: Update llama.cpp to ggerganov/llama.cpp@75cd4c77292034ecec587ecb401366f57338f7c0 +- fix: Always embed metal library by @abetlen in b3bfea6dbfb6ed9ce18f9a2723e0a9e4bd1da7ad +- fix: missing logprobs in response, incorrect response type for functionary by @abetlen in 1ae3abbcc3af7f4a25a3ffc40b246f18039565e8 +- fix(docs): incorrect tool_choice example by @CISC in #1330 + +## [0.2.59] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c +- feat: Binary wheels for CPU, CUDA (12.1 - 12.3), Metal by @abetlen, @jllllll, and @oobabooga in #1247 +- fix: segfault when logits_all=False by @abetlen in 8649d7671bd1a7c0d9cc6a5ad91c6ca286512ab3 +- fix: last tokens passing to sample_repetition_penalties function by @ymikhailov in #1295 + +## [0.2.58] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c +- feat: add support for KV cache quantization options by @Limour-dev in #1307 +- feat: Add logprobs support to chat completions by @windspirit95 in #1311 +- fix: set LLAMA_METAL_EMBED_LIBRARY=on on MacOS arm64 by @bretello in #1289 +- feat: Add tools/functions variables to Jinja2ChatFormatter, add function response formatting for all simple chat formats by @CISC in #1273 +- fix: Changed local API doc references to hosted by by @lawfordp2017 in #1317 + +## [0.2.57] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ac9ee6a4ad740bc1ee484ede43e9f92b5af244c1 +- fix: set default embedding pooling type to unspecified by @abetlen in 4084aabe867b8ec2aba1b22659e59c9318b0d1f3 +- fix: Fix and optimize functionary chat handler by @jeffrey-fong in #1282 +- fix: json mode for basic chat formats by @abetlen in 20e6815252d0efd9f015f7adbf108faaf36e3f3c + +## [0.2.56] + +- feat: Update llama.cpp to ggerganov/llama.cpp@c2101a2e909ac7c08976d414e64e96c90ee5fa9e +- feat(server): Add endpoints for tokenize, detokenize and count tokens by @felipelo in #1136 +- feat: Switch embed to llama_get_embeddings_seq by @iamlemec in #1263 +- fix: Fixed json strings grammar by blacklisting character control set by @ExtReMLapin in d02a9cf16ff88ad011e2eb1ce29f4d9400f13cd1 +- fix: Check for existence of clip model path by @kejcao in #1264 + +## [0.2.55] + +- feat: Update llama.cpp to ggerganov/llama.cpp@9731134296af3a6839cd682e51d9c2109a871de5 +- docs: fix small typo in README: 'model know how' -> 'model knows how' by @boegel in #1244 + +## [0.2.54] + +- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a +- docs: fix typo in README.md embeddings example by @iamlemec in #1232 + +## [0.2.53] + +- feat: Update llama.cpp to ggerganov/llama.cpp@cb49e0f8c906e5da49e9f6d64a57742a9a241c6a +- fix: eos/bos_token set correctly for Jinja2ChatFormatter and automatic chat formatter by @CISC in #1230 + +## [0.2.52] + +- feat: Update llama.cpp to ggerganov/llama.cpp@a33e6a0d2a66104ea9a906bdbf8a94d050189d91 +- fix: Llava15ChatHandler (this function takes at least 4 arguments) by @abetlen in 8383a9e5620f5df5a88f62da16813eac200dd706 + +## [0.2.51] + +- feat: Update llama.cpp to ggerganov/llama.cpp@c39373398803c669056304090050fe3f44b41bf9 +- fix: Restore type hints for low-level api by @abetlen in 19234aa0dbd0c3c87656e65dd2b064665371925b + +## [0.2.50] + +- docs: Update Functionary OpenAI Server Readme by @jeffrey-fong in #1193 +- fix: LlamaHFTokenizer now receives pre_tokens by @abetlen in 47bad30dd716443652275099fa3851811168ff4a + +## [0.2.49] + +- fix: module 'llama_cpp.llama_cpp' has no attribute 'c_uint8' in Llama.save_state by @abetlen in db776a885cd4c20811f22f8bd1a27ecc71dba927 +- feat: Auto detect Mixtral's slightly different format by @lukestanley in #1214 + +## [0.2.48] + +- feat: Update llama.cpp to ggerganov/llama.cpp@15499eb94227401bdc8875da6eb85c15d37068f7 +- feat: Add Google's Gemma formatting via chat_format="gemma" by @alvarobartt in #1210 +- feat: support minItems/maxItems in JSON grammar converter by @nopperl in 3921e10770996d95a9eb22c8248bacef39f69365 +- fix: Update from_pretrained defaults to match hf_hub_download and pull to local cache folder by @abetlen in e6d6260a91b7831733f7d1f73c7af46a3e8185ed +- fix: Raise exceptions when llama model or context fails to load by @abetlen in dd22010e85265ae840c76ec835d67a29ed852722 +- docs: Update README.md to fix pip install llama cpp server by @audip in #1187 + +## [0.2.47] + +- feat: Update llama.cpp to ggerganov/llama.cpp@973053d8b0d04809836b3339a50f68d9c842de90 + +## [0.2.46] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ba2135ccae7462470b3865c6e41d2e1d734eac05 +- feat: Pull models directly from huggingface by @abetlen in #1206 +- feat(low-level-api): Improve API static type-safety and performance. Low level api functions are positional args only now. by @abetlen in #1205 + +## [0.2.45] + +- feat: Update llama.cpp to ggerganov/llama.cpp@89febfed9322c8849520dc63c93ee4f5fd72556e + +## [0.2.44] + +- feat: Update llama.cpp to ggerganov/llama.cpp@4524290e87b8e107cc2b56e1251751546f4b9051 +- fix: create_embedding broken response for input type str by @abetlen in 0ce66bc080fe537590b05b24bf442480bf2dd045 +- fix: Use '\n' seperator for EventSourceResponse by @khimaros in #1188 +- fix: Incorporate embedding pooling layer fixes by @iamlemec in #1194 + +## [0.2.43] + +- feat: Update llama.cpp to ggerganov/llama.cpp@8084d554406b767d36b3250b3b787462d5dd626f +- feat: Support batch embeddings by @iamlemec in #1186 +- fix: submodule kompute is not included in sdist by @abetlen in 7dbbfdecadebe7750be650d9409959640ff9a460 +- fix: fix: Update openbuddy prompt format by @abetlen in 07a783779a62a4aac0b11161c7e0eb983ff215f8 + +## [0.2.42] + +- feat: Update llama.cpp to ggerganov/llama.cpp@ea9c8e11436ad50719987fa23a289c74b7b40d40 +- fix: sample idx off-by-one error for logit_processors by @lapp0 in #1179 +- fix: chat formatting bugs in `chatml-function-calling` by @abetlen in 4b0e3320bd8c2c209e29978d0b21e2e471cc9ee3 and 68fb71b6a26a1e57331868f959b47ab4b87851e1 + +## [0.2.41] + +- feat: Update llama.cpp to ggerganov/llama.cpp@895407f31b358e3d9335e847d13f033491ec8a5b +- fix: Don't change order of json schema object properties in generated grammar unless prop_order is passed by @abetlen in d1822fed6b706f38bd1ff0de4dec5baaa3cf84fa + +## [0.2.40] + +- feat: Update llama.cpp to ggerganov/llama.cpp@3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 +- feat: Generic chatml Function Calling using chat_format="chatml-function-calling"` by @abetlen in #957 +- fix: Circular dependancy preventing early Llama object free by @notwa in #1176 +- docs: Set the correct command for compiling with syscl support by @akarshanbiswas in #1172 +- feat: use gpu backend for clip if available by @iamlemec in #1175 + +## [0.2.39] + +- feat: Update llama.cpp to ggerganov/llama.cpp@b08f22c882a1443e6b97081f3ce718a4d1a741f8 +- fix: Fix destructor logging bugs by using llama_log_callback to avoid suppress_stdout_stderr by @abetlen in 59760c85eddc72dfcc1839f43760ef72c23d6874 + +## [0.2.38] + +- feat: Update llama.cpp to ggerganov/llama.cpp@1cfb5372cf5707c8ec6dde7c874f4a44a6c4c915 +- feat: Add speculative decoding by @abetlen in #1120 +- fix: Pass raise_exception and add_generation_prompt to jinja2 chat template by @abetlen in 078cca0361bf5a94d2cf52ed04980d20e32d6f95 + +## [0.2.37] + +- feat: Update llama.cpp to ggerganov/llama.cpp@fea4fd4ba7f6b754ac795387b275e1a014a77bde +- feat: Automatically set chat format from gguf by @abetlen in #1110 + +## [0.2.36] + +- feat: Update llama.cpp to ggerganov/llama.cpp@2aed77eb06a329f0d82bb1c467f4244904d4073f +- feat: Add mistral instruct chat format as "mistral-instruct" by @Rafaelblsilva in #799 + +## [0.2.35] + +- feat: Update llama.cpp to ggerganov/llama.cpp@d2f650cb5b04ee2726663e79b47da5efe196ce00 + +## [0.2.34] + +- feat: Update llama.cpp to ggerganov/llama.cpp@6db2b41a76ee78d5efdd5c3cddd5d7ad3f646855 +- feat: Add json schema mode by @abetlen in #1122 + +## [0.2.33] + +- feat: Update llama.cpp to ggerganov/llama.cpp@faa3526a1eba458120987ed8269e5616385a76f4 +- feat(server): include llama-cpp-python version in openapi spec by @abetlen in cde7514c3d28e6d52f272614e9957208c344dde5 +- fix: use both eos and bos tokens as stop sequences for hf-tokenizer-config chat format. by @abetlen in 5b982d0f8c6f35242c8862ffdce00e17cea0b44f +- fix: GGUF metadata KV overrides, re #1011 by @phiharri in #1116 +- fix: llama_log_set should be able to accept null pointer by @abetlen in c970d41a85381fd55235136f123422df0bf0c7e7 + +## [0.2.32] + +- feat: Update llama.cpp to ggerganov/llama.cpp@504dc37be8446fb09b1ede70300250ad41be32a2 +- fix: from_json_schema oneof/anyof bug by @jndiogo in d3f5528ca8bcb9d69d4f27e21631e911f1fb9bfe +- fix: pass chat handler not chat formatter for huggingface autotokenizer and tokenizer_config formats by @abetlen in 24f39454e91cf5dddbc4b6041aead4accc7c7a2d +- feat: Add add_generation_prompt option for jinja2chatformatter by @abetlen in 7f3209b1eb4ad3260ba063801fab80a8c25a2f4c +- feat: Add Jinja2ChatFormatter by @abetlen in be09318c26add8674ce494ae7cc480cce72a4146 +- feat: Expose gguf model metadata in metadata property by @abetlen in 5a34c57e5479e50c99aba9b38218cc48e6560b81 + +## [0.2.31] + +- feat: Update llama.cpp to ggerganov/llama.cpp@a5cacb22b2114fd9adf61c00cbb237384d86bced +- fix: Mirostat sampling now passes correct type to ctypes and tracks state during generation by @abetlen in 3babe3512cb95743108f2b595210c38ed6f1b904 +- fix: Python3.8 support in server by @abetlen in 141293a75b564a8699e0acba1da24d9aa1cf0ab1 + +## [0.2.30] + +- feat: Update llama.cpp to ggerganov/llama.cpp@57e2a7a52a819883f40dada8a2edc24ecf48186b +- feat(server): Add ability to load chat format from huggingface autotokenizer or tokenizer_config.json files by @abetlen in b8fc1c7d83ad4a9207c707ba1d954fe580286a01 +- feat: Integration of Jinja2 Templating for chat formats by @teleprint-me in #875 +- fix: Offload KQV by default by @abetlen in 48c3b77e6f558a9899de0e1155c7dc0c7958d8e8 +- fix: Support Accept text/event-stream in chat and completion endpoints, resolves #1083 by @aniljava in #1088 +- fix(cli): allow passing n_ctx=0 to openAI API server args to use model n_ctx_train field per #1015 by @K-Mistele in #1093 + +## [0.2.29] + +- feat: Update llama.cpp to ggerganov/llama.cpp@4483396751c79dea540808b9cb9238245d06da2b +- feat: Add split_mode option by @abetlen in 84615adbc6855c8384807c42f0130f9a1763f99d +- feat: Implement GGUF metadata KV overrides by @phiharri in #1011 +- fix: Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor by @yieldthought in #1012 +- fix: Fix low_level_api_chat_cpp example to match current API by @aniljava in #1086 +- fix: Fix Pydantic model parsing by @DeNeutoy in #1087 + +## [0.2.28] + +- feat: Update llama.cpp to ggerganov/llama.cpp@6efb8eb30e7025b168f3fda3ff83b9b386428ad6 +- feat: Add ability to pass in penalize_nl param by @shankinson in #1068 +- fix: print_grammar to stderr by @turian in #1052 + +## [0.2.27] + +- feat: Update llama.cpp to ggerganov/llama.cpp@b3a7c20b5c035250257d2b62851c379b159c899a +- feat: Add `saiga` chat format by @femoiseev in #1050 +- feat: Added `chatglm3` chat format by @xaviviro in #1059 +- fix: Correct typo in README.md by @qeleb in (#1058) + +## [0.2.26] + +- feat: Update llama.cpp to ggerganov/llama.cpp@f6793491b5af6da75edad34d6f503ef86d31b09f + +## [0.2.25] + +- feat(server): Multi model support by @D4ve-R in #931 +- feat(server): Support none defaulting to infinity for completions by @swg in #111 +- feat(server): Implement openai api compatible authentication by @docmeth2 in #1010 +- fix: text_offset of multi-token characters by @twaka in #1037 +- fix: ctypes bindings for kv override by @phiharri in #1011 +- fix: ctypes definitions of llama_kv_cache_view_update and llama_kv_cache_view_free. by @e-c-d in #1028 + +## [0.2.24] + +- feat: Update llama.cpp to ggerganov/llama.cpp@0e18b2e7d0b5c0a509ea40098def234b8d4a938a +- feat: Add offload_kqv option to llama and server by @abetlen in 095c65000642a3cf73055d7428232fb18b73c6f3 +- feat: n_ctx=0 now uses the n_ctx_train of the model by @DanieleMorotti in #1015 +- feat: logits_to_logprobs supports both 2-D and 3-D logits arrays by @kddubey in #1002 +- fix: Remove f16_kv, add offload_kqv fields in low level and llama apis by @brandonrobertz in #1019 +- perf: Don't convert logprobs arrays to lists by @kddubey in #1021 +- docs: Fix README.md functionary demo typo by @evelynmitchell in #996 +- examples: Update low_level_api_llama_cpp.py to match current API by @jsoma in #1023 + +## [0.2.23] + +- Update llama.cpp to ggerganov/llama.cpp@948ff137ec37f1ec74c02905917fa0afc9b97514 +- Add qwen chat format by @yhfgyyf in #1005 +- Add support for running the server with SSL by @rgerganov in #994 +- Replace logits_to_logprobs implementation with numpy equivalent to llama.cpp by @player1537 in #991 +- Fix UnsupportedOperation: fileno in suppress_stdout_stderr by @zocainViken in #961 +- Add Pygmalion chat format by @chiensen in #986 +- README.md multimodal params fix by @zocainViken in #967 +- Fix minor typo in README by @aniketmaurya in #958 + +## [0.2.22] + +- Update llama.cpp to ggerganov/llama.cpp@8a7b2fa528f130631a5f43648481596ab320ed5a +- Fix conflict with transformers library by kddubey in #952 + +## [0.2.21] + +- Update llama.cpp to ggerganov/llama.cpp@64e64aa2557d97490b2fe1262b313e2f4a1607e3 +- Make building llava optional by setting `CMAKE_ARGS="-DLLAVA_BUILD=OFF"` and using `LLAVA_CPP_LIB` to specify alternative path to shared library by @abetlen in e3941d9c674dbd9891dc3ceda390daeb21f05fd1 + +## [0.2.20] + +- Update llama.cpp to ggerganov/llama.cpp@b38a16dfcff88d547f78f52d1bea31b84a05aff7 +- Add `zephyr` chat format by @fakerybakery in #938 +- Add `baichuan` chat format by @caiyesd in #938 +- Add `baichuan-2` chat format by @caiyesd in #936 +- Improve documentation for server chat formats by @jooray in #934 +- Fix typo in README by @antonvice in 940 +- Fix typo in the Open Orca chat format by @gardner in #947 + +## [0.2.19] + +- Update llama.cpp to ggerganov/llama.cpp@0b871f1a04ef60e114bbe43004fd9c21114e802d +- Fix #569: stop parameter in chat completion api should accept str by @abetlen in 128dc4731fa846ead7e684a137ca57d8931b8899 +- Document server host and port parameters by @jamesbraza in #768 +- Do not set grammar to None when initializing LlamaGrammar by @mthuurne in #834 +- Add mistrallite, intel, and openchat formats by @fakerybakery in #927 +- Add support for min_p parameter by @tk-master in #921 +- Fix #929: tokenizer adding leading space when generating from empty prompt by @abetlen in a34d48014192771d2e308a76c22f33bc0318d983 +- Fix low level api example by @zocainViken in #925 +- Fix missing package in openblas docker image by @ZisisTsatsas in #920 + +## [0.2.18] + +- Update llama.cpp to ggerganov/llama.cpp@6bb4908a17150b49373b5f977685b2e180a04f6f + +## [0.2.17] + +- Update llama.cpp to ggerganov/llama.cpp@df9d1293defe783f42bc83af732d3c670552c541 +- Hotfix: Set `CUDA_ARCHITECTURES=OFF` for `llava_shared` target on Windows by @abetlen in 4388f3341413110217b98c4f097ac5c590bdf40b + +## [0.2.16] + +- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c +- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d +- Fix server doc arguments by @kjunggithub in #892 +- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380 +- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8 +- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829 +- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24 +- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a +- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c +- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617 + +## [0.2.15] + +- Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4 +- Add support for Llava1.5 multimodal models by @damian0815 and @abetlen in #821 +- Update OpenAI API compatibility to match dev day update by @abetlen in #821 +- Add seed parameter to completion and chat_completion functions of Llama class by @abetlen in 86aeb9f3a14808575d2bb0076e6acb4a30907e6a +- Add JSON mode support to constrain chat completion to JSON objects by @abetlen in b30b9c338bf9af316d497ea501d39f5c246900db + +## [0.2.14] + +- Update llama.cpp to ggerganov/llama.cpp@f0b30ef7dc1360922ccbea0a8cd3918ecf15eaa7 +- Add support for Huggingface Autotokenizer Chat Formats by @bioshazard and @abetlen in #790 and bbffdaebaa7bb04b543dbf683a07276087251f86 +- Fix llama-2 chat format by @earonesty in #869 +- Add support for functionary chat format by @abetlen in #784 +- Migrate inference from deprecated `llama_eval`API to `llama_batch` and `llama_decode` by @abetlen in #795 + +## [0.2.13] + +- Update llama.cpp to ggerganov/llama.cpp@51b2fc11f7f605fff49725a4540e9a6ef7b51b70 +- Fix name 'open' is not defined exception when deleting model by @abetlen in 011b95d7f34cbfc528af75a892757bd9a20838ab +- Fix tokenization of special characters by @antoine-lizee in #850 + +## [0.2.12] + +- Update llama.cpp to ggerganov/llama.cpp@50337961a678fce4081554b24e56e86b67660163 +- Fix missing `n_seq_id` in `llama_batch` by @NickAlgra in #842 +- Fix for shared libraries on Windows that start with `lib` prefix by @sujeendran in #848 +- Fix exception raised in `__del__` when freeing models by @cebtenzzre in #846 +- Performance improvement for logit bias by @zolastro in #851 +- Fix suffix check arbitrary code execution bug by @mtasic85 in #854 +- Fix typo in `function_call` parameter in `llama_types.py` by @akatora28 in #849 +- Fix streaming not returning `finish_reason` by @gmcgoldr in #798 +- Fix `n_gpu_layers` check to allow values less than 1 for server by @hxy9243 in #826 +- Supppress stdout and stderr when freeing model by @paschembri in #803 +- Fix `llama2` chat format by @delock in #808 +- Add validation for tensor_split size by @eric1932 #820 +- Print stack trace on server error by @abetlen in d6a130a052db3a50975a719088a9226abfebb266 +- Update docs for gguf by @johnccshen in #783 +- Add `chatml` chat format by @abetlen in 305482bd4156c70802fc054044119054806f4126 + +## [0.2.11] + +- Fix bug in `llama_model_params` object has no attribute `logits_all` by @abetlen in d696251fbe40015e8616ea7a7d7ad5257fd1b896 + +## [0.2.10] + +- Fix bug 'llama_model_params' object has no attribute 'embedding' by @abetlen in 42bb721d64d744242f9f980f2b89d5a6e335b5e4 + +## [0.2.9] + +- Fix critical bug in pip installation of v0.2.8 due to `.git` directory in ac853e01e1a217a578080a4e1b851d2d08450adf + +## [0.2.8] + +- Update llama.cpp to ggerganov/llama.cpp@40e07a60f9ce06e79f3ccd4c903eba300fb31b5e +- Add configurable chat formats by @abetlen in #711 +- Fix rope scaling bug by @Josh-XT in #767 +- Fix missing numa parameter in server by @abetlen in d9bce17794d0dd6f7962d10aad768fedecf3ab89 + +## [0.2.7] + +- Update llama.cpp to ggerganov/llama.cpp@a98b1633d5a94d0aa84c7c16e1f8df5ac21fc850 +- Install required runtime dlls to package directory on windows by @abetlen in 8d75016549e2ff62a511b1119d966ffc0df5c77b +- Add openai-processing-ms to server response header by @Tradunsky in #748 +- Bump minimum version of scikit-build-core to 0.5.1 to fix msvc cmake issue by @abetlen in 1ed0f3ebe16993a0f961155aa4b2c85f1c68f668 +- Update `llama_types.py` to better match the openai api, old names are aliased to new ones by @abetlen in dbca136feaaf7f8b1182c4c3c90c32918b1d0bb3 + +## [0.2.6] + +- Update llama.cpp to 80291a1d02a07f7f66666fb576c5b1e75aa48b46 + +## [0.2.5] + +- Fix docker images missing starlette-context dependency by @abetlen in 22917989003c5e67623d54ab45affa1e0e475410 +- Fix loading dll in Windows Isolation Containers by @abetlen in 847466562573191efa655753d9252f308c4fbdb0 +- Fix build issue on m1 macs by @abetlen in dbd3a6d1ed8416a8fd800127251e730153afa305 +- Update docs to gguf and add hw acceleration docs for server by @jasonacox in #688 + +## [0.2.4] + +- Add NUMA support. **NOTE** low level api users must call llama_backend_init at the start of their programs by abetlen in f4090a0bb2a2a25acfe28d31c82cc1aa273bedee +- Fix tensor_split server cli argument by @abetlen in c4c440ba2dc86d9de728a751311fdd1c8e3756fa +- Made all `Llama` init parameters into keyword-only parameters by @abetlen in c8f9b8a734b5b040379bbd93995ba177affab1fe +- Added server params for `low_vram`, `main_gpu`, `lora_base`, and `lora_path` by @abetlen in 2920c4bf7ee1412d6bba7846e0e1b7ef6d34043b +- Removed server params for `rms_norm_eps` and `n_gqa` by @abetlen in 2920c4bf7ee1412d6bba7846e0e1b7ef6d34043b +- Fix boolean cli options by @abetlen in c999325e8e4507f6c6249dd2fb8de7f8bf57f71e and 0449d29b9f940e437231a07b9d56550226558bac +- Silence Pydantic Settings warnings about `model_alias` setting by @earonesty in #705 + +## [0.2.3] + +- Update llama.cpp to ggerganov/llama.cpp@71ca2fad7d6c0ef95ef9944fb3a1a843e481f314 +- Add X-Request-ID request header for mirroring custom IDs by @devrimcavusoglu in #703 +- Add pyproject extra for scikit-build-core to ensure compatible pathspec version by @abetlen in 6cfc54284b99ef1bff8193e2d5e483dbd89ada02 +- Fix issue with Literal and Optional cli arguments not working by @abetlen in #702 + +## [0.2.2] + +- Fix bug in pip install of v0.2.1 due to scikit-build-core removing all `.metal` files in the source distribution (see #701) + +## [0.2.1] + +- Fix bug in pip install of v0.2.0 due to .git folder being included in the source distribution (see #701) + +## [0.2.0] + +- Migrated to scikit-build-core build system by @abetlen in #499 +- Use `numpy` views for `LogitsProcessor` and `StoppingCriteria` instead of python lists by @abetlen in #499 +- Drop support for end-of-life Python3.7 by @abetlen in #499 +- Convert low level `llama.cpp` constants to use basic python types instead of `ctypes` types by @abetlen in #499 + +## [0.1.85] + +- Add `llama_cpp.__version__` attribute by @janvdp in #684 +- Fix low level api examples by @jbochi in #680 + +## [0.1.84] + +- Update llama.cpp + +## [0.1.83] + +- Update llama.cpp + +## [0.1.82] + +- Update llama.cpp + +## [0.1.81] + +- Update llama.cpp + +## [0.1.80] + +- Update llama.cpp + +## [0.1.79] + +- GGUF Support (breaking change requiring new model format) + +## [0.1.78] + +- Grammar based sampling via LlamaGrammar which can be passed to completions +- Make n_gpu_layers == -1 offload all layers + +## [0.1.77] + +- (llama.cpp) Update llama.cpp add support for LLaMa 2 70B +- (server) Add temporary n_gqa and rms_norm_eps parameters required for LLaMa 2 70B + +## [0.1.76] + +- (llama.cpp) Update llama.cpp add support for LLaMa 2 70B + +## [0.1.75] + +- Update llama.cpp + +## [0.1.74] + +- (server) OpenAI style error responses + +## [0.1.73] + +- (server) Add rope parameters to server settings + +## [0.1.72] + +- (llama.cpp) Update llama.cpp added custom_rope for extended context lengths + +## [0.1.71] + +- (llama.cpp) Update llama.cpp + +- (server) Fix several pydantic v2 migration bugs + +## [0.1.70] + +- (Llama.create_completion) Revert change so that `max_tokens` is not truncated to `context_size` in `create_completion` +- (server) Fixed changed settings field names from pydantic v2 migration + +## [0.1.69] + +- (server) Streaming requests can are now interrupted pre-maturely when a concurrent request is made. Can be controlled with the `interrupt_requests` setting. +- (server) Moved to fastapi v0.100.0 and pydantic v2 +- (docker) Added a new "simple" image that builds llama.cpp from source when started. +- (server) performance improvements by avoiding unnecessary memory allocations during sampling + +## [0.1.68] + +- (llama.cpp) Update llama.cpp + +## [0.1.67] + +- Fix performance bug in Llama model by pre-allocating memory tokens and logits. +- Fix bug in Llama model where the model was not free'd after use. + +## [0.1.66] + +- (llama.cpp) New model API + +- Performance issue during eval caused by looped np.concatenate call +- State pickling issue when saving cache to disk + +## [0.1.65] + +- (llama.cpp) Fix struct misalignment bug + +## [0.1.64] + +- (llama.cpp) Update llama.cpp +- Fix docs for seed. Set -1 for random. + +## [0.1.63] + +- (llama.cpp) Add full gpu utilisation in CUDA +- (llama.cpp) Add get_vocab +- (llama.cpp) Add low_vram parameter +- (server) Add logit_bias parameter + +## [0.1.62] + +- Metal support working +- Cache re-enabled + +## [0.1.61] + +- Fix broken pip installation + +## [0.1.60] + +NOTE: This release was deleted due to a bug with the packaging system that caused pip installations to fail. + +- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size. +- Temporarily disable cache for completion requests + +## [v0.1.59] + +- (llama.cpp) k-quants support +- (server) mirostat sampling parameters to server +- Support both `.so` and `.dylib` for `libllama` on MacOS + +## [v0.1.58] + +- (llama.cpp) Metal Silicon support + +## [v0.1.57] + +- (llama.cpp) OpenLlama 3B support + +## [v0.1.56] + +- (misc) Added first version of the changelog +- (server) Use async routes +- (python-api) Use numpy for internal buffers to reduce memory usage and improve performance. +- (python-api) Performance bug in stop sequence check slowing down streaming. diff --git a/llama-cpp-python/CMakeLists.txt b/llama-cpp-python/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..70f9b99936be49be486a8cad1035f4e91a937413 --- /dev/null +++ b/llama-cpp-python/CMakeLists.txt @@ -0,0 +1,87 @@ +cmake_minimum_required(VERSION 3.21) + +project(llama_cpp) + +option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON) +option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON) + +if (LLAMA_BUILD) + set(BUILD_SHARED_LIBS "On") + + # Building llama + if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") + # Need to disable these llama.cpp flags on Apple x86_64, + # otherwise users may encounter invalid instruction errors + set(LLAMA_AVX "Off" CACHE BOOL "llama: enable AVX" FORCE) + set(LLAMA_AVX2 "Off" CACHE BOOL "llama: enable AVX2" FORCE) + set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE) + set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE) + endif() + + if (APPLE) + set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE) + endif() + + add_subdirectory(vendor/llama.cpp) + install( + TARGETS llama + LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + ) + # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374 + install( + TARGETS llama + LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + ) + # Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563 + install( + FILES $ + DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + ) + install( + FILES $ + DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + ) + + if (LLAVA_BUILD) + if (LLAMA_CUBLAS) + add_compile_definitions(GGML_USE_CUBLAS) + endif() + + if (LLAMA_METAL) + add_compile_definitions(GGML_USE_METAL) + endif() + + # Building llava + add_subdirectory(vendor/llama.cpp/examples/llava) + set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava") + # Set CUDA_ARCHITECTURES to OFF on windows + if (WIN32) + set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF) + endif() + install( + TARGETS llava_shared + LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp + ) + # Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374 + install( + TARGETS llava_shared + LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp + ) + endif() +endif() diff --git a/llama-cpp-python/LICENSE.md b/llama-cpp-python/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..3a1d7180d508818fe957923e00dcd8950938632d --- /dev/null +++ b/llama-cpp-python/LICENSE.md @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2023 Andrei Betlen + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/llama-cpp-python/Makefile b/llama-cpp-python/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4ae01107493deb8dda7a9cf6254ee9b81b7f5eb2 --- /dev/null +++ b/llama-cpp-python/Makefile @@ -0,0 +1,82 @@ +update: + poetry install + git submodule update --init --recursive + +update.vendor: + cd vendor/llama.cpp && git pull origin master + +deps: + python3 -m pip install --upgrade pip + python3 -m pip install -e ".[all]" + +build: + python3 -m pip install --verbose -e . + +build.debug: + CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false --editable . + +build.cuda: + CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e . + +build.opencl: + CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e . + +build.openblas: + CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e . + +build.blis: + CMAKE_ARGS="-DLLAMA_BLAS=on -DLLAMA_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e . + +build.metal: + CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e . + +build.vulkan: + CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e . + +build.kompute: + CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e . + +build.sycl: + CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e . + +build.sdist: + python3 -m build --sdist + +deploy.pypi: + python3 -m twine upload dist/* + +deploy.gh-docs: + mkdocs build + mkdocs gh-deploy + +test: + python3 -m pytest + +docker: + docker build -t llama-cpp-python:latest -f docker/simple/Dockerfile . + +run-server: + uvicorn --factory llama.server:app --host ${HOST} --port ${PORT} + +clean: + - cd vendor/llama.cpp && make clean + - cd vendor/llama.cpp && rm libllama.so + - rm -rf _skbuild + - rm llama_cpp/*.so + - rm llama_cpp/*.dylib + - rm llama_cpp/*.metal + - rm llama_cpp/*.dll + - rm llama_cpp/*.lib + +.PHONY: \ + update \ + update.vendor \ + build \ + build.cuda \ + build.opencl \ + build.openblas \ + build.sdist \ + deploy.pypi \ + deploy.gh-docs \ + docker \ + clean \ No newline at end of file diff --git a/llama-cpp-python/README.md b/llama-cpp-python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..640671460c48d6acb16506d2d7fc51a13d71c70a --- /dev/null +++ b/llama-cpp-python/README.md @@ -0,0 +1,792 @@ +# 🦙 Python Bindings for [`llama.cpp`](https://github.com/ggerganov/llama.cpp) + +[![Documentation Status](https://readthedocs.org/projects/llama-cpp-python/badge/?version=latest)](https://llama-cpp-python.readthedocs.io/en/latest/?badge=latest) +[![Tests](https://github.com/abetlen/llama-cpp-python/actions/workflows/test.yaml/badge.svg?branch=main)](https://github.com/abetlen/llama-cpp-python/actions/workflows/test.yaml) +[![PyPI](https://img.shields.io/pypi/v/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/) +[![PyPI - License](https://img.shields.io/pypi/l/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/) +[![Github All Releases](https://img.shields.io/github/downloads/abetlen/llama-cpp-python/total.svg?label=Github%20Downloads)]() + +Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library. +This package provides: + +- Low-level access to C API via `ctypes` interface. +- High-level Python API for text completion + - OpenAI-like API + - [LangChain compatibility](https://python.langchain.com/docs/integrations/llms/llamacpp) + - [LlamaIndex compatibility](https://docs.llamaindex.ai/en/stable/examples/llm/llama_2_llama_cpp.html) +- OpenAI compatible web server + - [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion) + - [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling) + - [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models) + - [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support) + +Documentation is available at [https://llama-cpp-python.readthedocs.io/en/latest](https://llama-cpp-python.readthedocs.io/en/latest). + +## Installation + +Requirements: + + - Python 3.8+ + - C compiler + - Linux: gcc or clang + - Windows: Visual Studio or MinGW + - MacOS: Xcode + +To install the package, run: + +```bash +pip install llama-cpp-python +``` + +This will also build `llama.cpp` from source and install it alongside this python package. + +If this fails, add `--verbose` to the `pip install` see the full cmake build log. + +**Pre-built Wheel (New)** + +It is also possible to install a pre-built wheel with basic CPU support. + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu +``` + +### Installation Configuration + +`llama.cpp` supports a number of hardware acceleration backends to speed up inference as well as backend specific options. See the [llama.cpp README](https://github.com/ggerganov/llama.cpp#build) for a full list. + +All `llama.cpp` cmake build options can be set via the `CMAKE_ARGS` environment variable or via the `--config-settings / -C` cli flag during installation. + +
+Environment Variables + +```bash +# Linux and Mac +CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" \ + pip install llama-cpp-python +``` + +```powershell +# Windows +$env:CMAKE_ARGS = "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" +pip install llama-cpp-python +``` +
+ +
+CLI / requirements.txt + +They can also be set via `pip install -C / --config-settings` command and saved to a `requirements.txt` file: + +```bash +pip install --upgrade pip # ensure pip is up to date +pip install llama-cpp-python \ + -C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS" +``` + +```txt +# requirements.txt + +llama-cpp-python -C cmake.args="-DLLAMA_BLAS=ON;-DLLAMA_BLAS_VENDOR=OpenBLAS" +``` + +
+ +### Supported Backends + +Below are some common backends, their build commands and any additional environment variables required. + +
+OpenBLAS (CPU) + +To install with OpenBLAS, set the `LLAMA_BLAS` and `LLAMA_BLAS_VENDOR` environment variables before installing: + +```bash +CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python +``` +
+ +
+CUDA + +To install with CUDA support, set the `LLAMA_CUDA=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_CUDA=on" pip install llama-cpp-python +``` + +**Pre-built Wheel (New)** + +It is also possible to install a pre-built wheel with CUDA support. As long as your system meets some requirements: + +- CUDA Version is 12.1, 12.2, 12.3, or 12.4 +- Python Version is 3.10, 3.11 or 3.12 + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/ +``` + +Where `` is one of the following: +- `cu121`: CUDA 12.1 +- `cu122`: CUDA 12.2 +- `cu123`: CUDA 12.3 +- `cu124`: CUDA 12.4 + +For example, to install the CUDA 12.1 wheel: + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 +``` + +
+ +
+Metal + +To install with Metal (MPS), set the `LLAMA_METAL=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_METAL=on" pip install llama-cpp-python +``` + +**Pre-built Wheel (New)** + +It is also possible to install a pre-built wheel with Metal support. As long as your system meets some requirements: + +- MacOS Version is 11.0 or later +- Python Version is 3.10, 3.11 or 3.12 + +```bash +pip install llama-cpp-python \ + --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/metal +``` + +
+
+ +CLBlast (OpenCL) + +To install with CLBlast, set the `LLAMA_CLBLAST=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python +``` + +
+ +
+hipBLAS (ROCm) + +To install with hipBLAS / ROCm support for AMD cards, set the `LLAMA_HIPBLAS=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python +``` + +
+ +
+Vulkan + +To install with Vulkan support, set the `LLAMA_VULKAN=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_VULKAN=on" pip install llama-cpp-python +``` + +
+ +
+Kompute + +To install with Kompute support, set the `LLAMA_KOMPUTE=on` environment variable before installing: + +```bash +CMAKE_ARGS="-DLLAMA_KOMPUTE=on" pip install llama-cpp-python +``` +
+ +
+SYCL + +To install with SYCL support, set the `LLAMA_SYCL=on` environment variable before installing: + +```bash +source /opt/intel/oneapi/setvars.sh +CMAKE_ARGS="-DLLAMA_SYCL=on -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx" pip install llama-cpp-python +``` +
+ + +### Windows Notes + +
+Error: Can't find 'nmake' or 'CMAKE_C_COMPILER' + +If you run into issues where it complains it can't find `'nmake'` `'?'` or CMAKE_C_COMPILER, you can extract w64devkit as [mentioned in llama.cpp repo](https://github.com/ggerganov/llama.cpp#openblas) and add those manually to CMAKE_ARGS before running `pip` install: + +```ps +$env:CMAKE_GENERATOR = "MinGW Makefiles" +$env:CMAKE_ARGS = "-DLLAMA_OPENBLAS=on -DCMAKE_C_COMPILER=C:/w64devkit/bin/gcc.exe -DCMAKE_CXX_COMPILER=C:/w64devkit/bin/g++.exe" +``` + +See the above instructions and set `CMAKE_ARGS` to the BLAS backend you want to use. +
+ +### MacOS Notes + +Detailed MacOS Metal GPU install documentation is available at [docs/install/macos.md](https://llama-cpp-python.readthedocs.io/en/latest/install/macos/) + +
+M1 Mac Performance Issue + +Note: If you are using Apple Silicon (M1) Mac, make sure you have installed a version of Python that supports arm64 architecture. For example: + +```bash +wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh +bash Miniforge3-MacOSX-arm64.sh +``` + +Otherwise, while installing it will build the llama.cpp x86 version which will be 10x slower on Apple Silicon (M1) Mac. +
+ +
+M Series Mac Error: `(mach-o file, but is an incompatible architecture (have 'x86_64', need 'arm64'))` + +Try installing with + +```bash +CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on" pip install --upgrade --verbose --force-reinstall --no-cache-dir llama-cpp-python +``` +
+ +### Upgrading and Reinstalling + +To upgrade and rebuild `llama-cpp-python` add `--upgrade --force-reinstall --no-cache-dir` flags to the `pip install` command to ensure the package is rebuilt from source. + +## High-level API + +[API Reference](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#high-level-api) + +The high-level API provides a simple managed interface through the [`Llama`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) class. + +Below is a short example demonstrating how to use the high-level API to for basic text completion: + +```python +from llama_cpp import Llama + +llm = Llama( + model_path="./models/7B/llama-model.gguf", + # n_gpu_layers=-1, # Uncomment to use GPU acceleration + # seed=1337, # Uncomment to set a specific seed + # n_ctx=2048, # Uncomment to increase the context window +) +output = llm( + "Q: Name the planets in the solar system? A: ", # Prompt + max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window + stop=["Q:", "\n"], # Stop generating just before the model would generate a new question + echo=True # Echo the prompt back in the output +) # Generate a completion, can also call create_completion +print(output) +``` + +By default `llama-cpp-python` generates completions in an OpenAI compatible format: + +```python +{ + "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", + "object": "text_completion", + "created": 1679561337, + "model": "./models/7B/llama-model.gguf", + "choices": [ + { + "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", + "index": 0, + "logprobs": None, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 14, + "completion_tokens": 28, + "total_tokens": 42 + } +} +``` + +Text completion is available through the [`__call__`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.__call__) and [`create_completion`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_completion) methods of the [`Llama`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) class. + +### Pulling models from Hugging Face Hub + +You can download `Llama` models in `gguf` format directly from Hugging Face using the [`from_pretrained`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.from_pretrained) method. +You'll need to install the `huggingface-hub` package to use this feature (`pip install huggingface-hub`). + +```python +llm = Llama.from_pretrained( + repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", + filename="*q8_0.gguf", + verbose=False +) +``` + +By default [`from_pretrained`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.from_pretrained) will download the model to the huggingface cache directory, you can then manage installed model files with the [`huggingface-cli`](https://huggingface.co/docs/huggingface_hub/en/guides/cli) tool. + +### Chat Completion + +The high-level API also provides a simple interface for chat completion. + +Chat completion requires that the model knows how to format the messages into a single prompt. +The `Llama` class does this using pre-registered chat formats (ie. `chatml`, `llama-2`, `gemma`, etc) or by providing a custom chat handler object. + +The model will will format the messages into a single prompt using the following order of precedence: + - Use the `chat_handler` if provided + - Use the `chat_format` if provided + - Use the `tokenizer.chat_template` from the `gguf` model's metadata (should work for most new models, older models may not have this) + - else, fallback to the `llama-2` chat format + +Set `verbose=True` to see the selected chat format. + +```python +from llama_cpp import Llama +llm = Llama( + model_path="path/to/llama-2/llama-model.gguf", + chat_format="llama-2" +) +llm.create_chat_completion( + messages = [ + {"role": "system", "content": "You are an assistant who perfectly describes images."}, + { + "role": "user", + "content": "Describe this image in detail please." + } + ] +) +``` + +Chat completion is available through the [`create_chat_completion`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion) method of the [`Llama`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama) class. + +For OpenAI API v1 compatibility, you use the [`create_chat_completion_openai_v1`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion_openai_v1) method which will return pydantic models instead of dicts. + + +### JSON and JSON Schema Mode + +To constrain chat responses to only valid JSON or a specific JSON Schema use the `response_format` argument in [`create_chat_completion`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion). + +#### JSON Mode + +The following example will constrain the response to valid JSON strings only. + +```python +from llama_cpp import Llama +llm = Llama(model_path="path/to/model.gguf", chat_format="chatml") +llm.create_chat_completion( + messages=[ + { + "role": "system", + "content": "You are a helpful assistant that outputs in JSON.", + }, + {"role": "user", "content": "Who won the world series in 2020"}, + ], + response_format={ + "type": "json_object", + }, + temperature=0.7, +) +``` + +#### JSON Schema Mode + +To constrain the response further to a specific JSON Schema add the schema to the `schema` property of the `response_format` argument. + +```python +from llama_cpp import Llama +llm = Llama(model_path="path/to/model.gguf", chat_format="chatml") +llm.create_chat_completion( + messages=[ + { + "role": "system", + "content": "You are a helpful assistant that outputs in JSON.", + }, + {"role": "user", "content": "Who won the world series in 2020"}, + ], + response_format={ + "type": "json_object", + "schema": { + "type": "object", + "properties": {"team_name": {"type": "string"}}, + "required": ["team_name"], + }, + }, + temperature=0.7, +) +``` + +### Function Calling + +The high-level API supports OpenAI compatible function and tool calling. This is possible through the `functionary` pre-trained models chat format or through the generic `chatml-function-calling` chat format. + +```python +from llama_cpp import Llama +llm = Llama(model_path="path/to/chatml/llama-model.gguf", chat_format="chatml-function-calling") +llm.create_chat_completion( + messages = [ + { + "role": "system", + "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary" + + }, + { + "role": "user", + "content": "Extract Jason is 25 years old" + } + ], + tools=[{ + "type": "function", + "function": { + "name": "UserDetail", + "parameters": { + "type": "object", + "title": "UserDetail", + "properties": { + "name": { + "title": "Name", + "type": "string" + }, + "age": { + "title": "Age", + "type": "integer" + } + }, + "required": [ "name", "age" ] + } + } + }], + tool_choice={ + "type": "function", + "function": { + "name": "UserDetail" + } + } +) +``` + +
+Functionary v2 + +The various gguf-converted files for this set of models can be found [here](https://huggingface.co/meetkai). Functionary is able to intelligently call functions and also analyze any provided function outputs to generate coherent responses. All v2 models of functionary supports **parallel function calling**. You can provide either `functionary-v1` or `functionary-v2` for the `chat_format` when initializing the Llama class. + +Due to discrepancies between llama.cpp and HuggingFace's tokenizers, it is required to provide HF Tokenizer for functionary. The `LlamaHFTokenizer` class can be initialized and passed into the Llama class. This will override the default llama.cpp tokenizer used in Llama class. The tokenizer files are already included in the respective HF repositories hosting the gguf files. + +```python +from llama_cpp import Llama +from llama_cpp.llama_tokenizer import LlamaHFTokenizer +llm = Llama.from_pretrained( + repo_id="meetkai/functionary-small-v2.2-GGUF", + filename="functionary-small-v2.2.q4_0.gguf", + chat_format="functionary-v2", + tokenizer=LlamaHFTokenizer.from_pretrained("meetkai/functionary-small-v2.2-GGUF") +) +``` + +**NOTE**: There is no need to provide the default system messages used in Functionary as they are added automatically in the Functionary chat handler. Thus, the messages should contain just the chat messages and/or system messages that provide additional context for the model (e.g.: datetime, etc.). +
+ +### Multi-modal Models + +`llama-cpp-python` supports such as llava1.5 which allow the language model to read information from both text and images. + +You'll first need to download one of the available multi-modal models in GGUF format: + +- [llava-v1.5-7b](https://huggingface.co/mys/ggml_llava-v1.5-7b) +- [llava-v1.5-13b](https://huggingface.co/mys/ggml_llava-v1.5-13b) +- [bakllava-1-7b](https://huggingface.co/mys/ggml_bakllava-1) +- [llava-v1.6-34b](https://huggingface.co/cjpais/llava-v1.6-34B-gguf) +- [moondream2](https://huggingface.co/vikhyatk/moondream2) + +Then you'll need to use a custom chat handler to load the clip model and process the chat messages and images. + +```python +from llama_cpp import Llama +from llama_cpp.llama_chat_format import Llava15ChatHandler +chat_handler = Llava15ChatHandler(clip_model_path="path/to/llava/mmproj.bin") +llm = Llama( + model_path="./path/to/llava/llama-model.gguf", + chat_handler=chat_handler, + n_ctx=2048, # n_ctx should be increased to accomodate the image embedding +) +llm.create_chat_completion( + messages = [ + {"role": "system", "content": "You are an assistant who perfectly describes images."}, + { + "role": "user", + "content": [ + {"type" : "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" } } + ] + } + ] +) +``` + +You can also pull the model from the Hugging Face Hub using the `from_pretrained` method. + +```python +from llama_cpp import Llama +from llama_cpp.llama_chat_format import MoondreamChatHandler + +chat_handler = MoondreamChatHandler.from_pretrained( + repo_id="vikhyatk/moondream2", + filename="*mmproj*", +) + +llm = Llama.from_pretrained( + repo_id="vikhyatk/moondream2", + filename="*text-model*", + chat_handler=chat_handler, + n_ctx=2048, # n_ctx should be increased to accomodate the image embedding +) + +respoonse = llm.create_chat_completion( + messages = [ + { + "role": "user", + "content": [ + {"type" : "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" } } + + ] + } + ] +) +print(response["choices"][0]["text"]) +``` + +**Note**: Multi-modal models also support tool calling and JSON mode. + +
+Loading a Local Image + +Images can be passed as base64 encoded data URIs. The following example demonstrates how to do this. + +```python +import base64 + +def image_to_base64_data_uri(file_path): + with open(file_path, "rb") as img_file: + base64_data = base64.b64encode(img_file.read()).decode('utf-8') + return f"data:image/png;base64,{base64_data}" + +# Replace 'file_path.png' with the actual path to your PNG file +file_path = 'file_path.png' +data_uri = image_to_base64_data_uri(file_path) + +messages = [ + {"role": "system", "content": "You are an assistant who perfectly describes images."}, + { + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": data_uri }}, + {"type" : "text", "text": "Describe this image in detail please."} + ] + } +] + +``` + +
+ +### Speculative Decoding + +`llama-cpp-python` supports speculative decoding which allows the model to generate completions based on a draft model. + +The fastest way to use speculative decoding is through the `LlamaPromptLookupDecoding` class. + +Just pass this as a draft model to the `Llama` class during initialization. + +```python +from llama_cpp import Llama +from llama_cpp.llama_speculative import LlamaPromptLookupDecoding + +llama = Llama( + model_path="path/to/model.gguf", + draft_model=LlamaPromptLookupDecoding(num_pred_tokens=10) # num_pred_tokens is the number of tokens to predict 10 is the default and generally good for gpu, 2 performs better for cpu-only machines. +) +``` + +### Embeddings + +To generate text embeddings use [`create_embedding`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_embedding) or [`embed`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.embed). Note that you must pass `embedding=True` to the constructor upon model creation for these to work properly. + +```python +import llama_cpp + +llm = llama_cpp.Llama(model_path="path/to/model.gguf", embedding=True) + +embeddings = llm.create_embedding("Hello, world!") + +# or create multiple embeddings at once + +embeddings = llm.create_embedding(["Hello, world!", "Goodbye, world!"]) +``` + +There are two primary notions of embeddings in a Transformer-style model: *token level* and *sequence level*. Sequence level embeddings are produced by "pooling" token level embeddings together, usually by averaging them or using the first token. + +Models that are explicitly geared towards embeddings will usually return sequence level embeddings by default, one for each input string. Non-embedding models such as those designed for text generation will typically return only token level embeddings, one for each token in each sequence. Thus the dimensionality of the return type will be one higher for token level embeddings. + +It is possible to control pooling behavior in some cases using the `pooling_type` flag on model creation. You can ensure token level embeddings from any model using `LLAMA_POOLING_TYPE_NONE`. The reverse, getting a generation oriented model to yield sequence level embeddings is currently not possible, but you can always do the pooling manually. + +### Adjusting the Context Window + +The context window of the Llama models determines the maximum number of tokens that can be processed at once. By default, this is set to 512 tokens, but can be adjusted based on your requirements. + +For instance, if you want to work with larger contexts, you can expand the context window by setting the n_ctx parameter when initializing the Llama object: + +```python +llm = Llama(model_path="./models/7B/llama-model.gguf", n_ctx=2048) +``` + +## OpenAI Compatible Web Server + +`llama-cpp-python` offers a web server which aims to act as a drop-in replacement for the OpenAI API. +This allows you to use llama.cpp compatible models with any OpenAI compatible client (language libraries, services, etc). + +To install the server package and get started: + +```bash +pip install 'llama-cpp-python[server]' +python3 -m llama_cpp.server --model models/7B/llama-model.gguf +``` + +Similar to Hardware Acceleration section above, you can also install with GPU (cuBLAS) support like this: + +```bash +CMAKE_ARGS="-DLLAMA_CUDA=on" FORCE_CMAKE=1 pip install 'llama-cpp-python[server]' +python3 -m llama_cpp.server --model models/7B/llama-model.gguf --n_gpu_layers 35 +``` + +Navigate to [http://localhost:8000/docs](http://localhost:8000/docs) to see the OpenAPI documentation. + +To bind to `0.0.0.0` to enable remote connections, use `python3 -m llama_cpp.server --host 0.0.0.0`. +Similarly, to change the port (default is 8000), use `--port`. + +You probably also want to set the prompt format. For chatml, use + +```bash +python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format chatml +``` + +That will format the prompt according to how model expects it. You can find the prompt format in the model card. +For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_format.py) and look for lines starting with "@register_chat_format". + +If you have `huggingface-hub` installed, you can also use the `--hf_model_repo_id` flag to load a model from the Hugging Face Hub. + +```bash +python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model '*q8_0.gguf' +``` + +### Web Server Features + +- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion) +- [Function Calling support](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling) +- [Vision API support](https://llama-cpp-python.readthedocs.io/en/latest/server/#multimodal-models) +- [Multiple Models](https://llama-cpp-python.readthedocs.io/en/latest/server/#configuration-and-multi-model-support) + +## Docker image + +A Docker image is available on [GHCR](https://ghcr.io/abetlen/llama-cpp-python). To run the server: + +```bash +docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/llama-model.gguf ghcr.io/abetlen/llama-cpp-python:latest +``` + +[Docker on termux (requires root)](https://gist.github.com/FreddieOliveira/efe850df7ff3951cb62d74bd770dce27) is currently the only known way to run this on phones, see [termux support issue](https://github.com/abetlen/llama-cpp-python/issues/389) + +## Low-level API + +[API Reference](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#low-level-api) + +The low-level API is a direct [`ctypes`](https://docs.python.org/3/library/ctypes.html) binding to the C API provided by `llama.cpp`. +The entire low-level API can be found in [llama_cpp/llama_cpp.py](https://github.com/abetlen/llama-cpp-python/blob/master/llama_cpp/llama_cpp.py) and directly mirrors the C API in [llama.h](https://github.com/ggerganov/llama.cpp/blob/master/llama.h). + +Below is a short example demonstrating how to use the low-level API to tokenize a prompt: + +```python +import llama_cpp +import ctypes +llama_cpp.llama_backend_init(False) # Must be called once at the start of each program +params = llama_cpp.llama_context_default_params() +# use bytes for char * params +model = llama_cpp.llama_load_model_from_file(b"./models/7b/llama-model.gguf", params) +ctx = llama_cpp.llama_new_context_with_model(model, params) +max_tokens = params.n_ctx +# use ctypes arrays for array params +tokens = (llama_cpp.llama_token * int(max_tokens))() +n_tokens = llama_cpp.llama_tokenize(ctx, b"Q: Name the planets in the solar system? A: ", tokens, max_tokens, llama_cpp.c_bool(True)) +llama_cpp.llama_free(ctx) +``` + +Check out the [examples folder](examples/low_level_api) for more examples of using the low-level API. + +## Documentation + +Documentation is available via [https://llama-cpp-python.readthedocs.io/](https://llama-cpp-python.readthedocs.io/). +If you find any issues with the documentation, please open an issue or submit a PR. + +## Development + +This package is under active development and I welcome any contributions. + +To get started, clone the repository and install the package in editable / development mode: + +```bash +git clone --recurse-submodules https://github.com/abetlen/llama-cpp-python.git +cd llama-cpp-python + +# Upgrade pip (required for editable mode) +pip install --upgrade pip + +# Install with pip +pip install -e . + +# if you want to use the fastapi / openapi server +pip install -e .[server] + +# to install all optional dependencies +pip install -e .[all] + +# to clear the local build cache +make clean +``` + +You can also test out specific commits of `lama.cpp` by checking out the desired commit in the `vendor/llama.cpp` submodule and then running `make clean` and `pip install -e .` again. Any changes in the `llama.h` API will require +changes to the `llama_cpp/llama_cpp.py` file to match the new API (additional changes may be required elsewhere). + +## FAQ + +### Are there pre-built binaries / binary wheels available? + +The recommended installation method is to install from source as described above. +The reason for this is that `llama.cpp` is built with compiler optimizations that are specific to your system. +Using pre-built binaries would require disabling these optimizations or supporting a large number of pre-built binaries for each platform. + +That being said there are some pre-built binaries available through the Releases as well as some community provided wheels. + +In the future, I would like to provide pre-built binaries and wheels for common platforms and I'm happy to accept any useful contributions in this area. +This is currently being tracked in [#741](https://github.com/abetlen/llama-cpp-python/issues/741) + +### How does this compare to other Python bindings of `llama.cpp`? + +I originally wrote this package for my own use with two goals in mind: + +- Provide a simple process to install `llama.cpp` and access the full C API in `llama.h` from Python +- Provide a high-level Python API that can be used as a drop-in replacement for the OpenAI API so existing apps can be easily ported to use `llama.cpp` + +Any contributions and changes to this package will be made with these goals in mind. + +## License + +This project is licensed under the terms of the MIT license. diff --git a/llama-cpp-python/docker/README.md b/llama-cpp-python/docker/README.md new file mode 100644 index 0000000000000000000000000000000000000000..474503fdfc554d8caabee4f321a80427f8c7d696 --- /dev/null +++ b/llama-cpp-python/docker/README.md @@ -0,0 +1,64 @@ +### Install Docker Server +> [!IMPORTANT] +> This was tested with Docker running on Linux.
If you can get it working on Windows or MacOS, please update this `README.md` with a PR!
+ +[Install Docker Engine](https://docs.docker.com/engine/install) + + +## Simple Dockerfiles for building the llama-cpp-python server with external model bin files +### openblas_simple +A simple Dockerfile for non-GPU OpenBLAS, where the model is located outside the Docker image: +``` +cd ./openblas_simple +docker build -t openblas_simple . +docker run --cap-add SYS_RESOURCE -e USE_MLOCK=0 -e MODEL=/var/model/ -v :/var/model -t openblas_simple +``` +where `/` is the full path to the model file on the Docker host system. + +### cuda_simple +> [!WARNING] +> Nvidia GPU CuBLAS support requires an Nvidia GPU with sufficient VRAM (approximately as much as the size in the table below) and Docker Nvidia support (see [container-toolkit/install-guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
+ +A simple Dockerfile for CUDA-accelerated CuBLAS, where the model is located outside the Docker image: + +``` +cd ./cuda_simple +docker build -t cuda_simple . +docker run --gpus=all --cap-add SYS_RESOURCE -e USE_MLOCK=0 -e MODEL=/var/model/ -v :/var/model -t cuda_simple +``` +where `/` is the full path to the model file on the Docker host system. + +-------------------------------------------------------------------------- + +### "Open-Llama-in-a-box" +Download an Apache V2.0 licensed 3B params Open LLaMA model and install into a Docker image that runs an OpenBLAS-enabled llama-cpp-python server: +``` +$ cd ./open_llama +./build.sh +./start.sh +``` + +### Manually choose your own Llama model from Hugging Face +`python3 ./hug_model.py -a TheBloke -t llama` +You should now have a model in the current directory and `model.bin` symlinked to it for the subsequent Docker build and copy step. e.g. +``` +docker $ ls -lh *.bin +-rw-rw-r-- 1 user user 4.8G May 23 18:30 q5_1.bin +lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> q5_1.bin +``` + +> [!NOTE] +> Make sure you have enough disk space to download the model. As the model is then copied into the image you will need at least +**TWICE** as much disk space as the size of the model:
+ +| Model | Quantized size | +|------:|----------------:| +| 3B | 3 GB | +| 7B | 5 GB | +| 13B | 10 GB | +| 33B | 25 GB | +| 65B | 50 GB | + + +> [!NOTE] +> If you want to pass or tune additional parameters, customise `./start_server.sh` before running `docker build ...` diff --git a/llama-cpp-python/docker/cuda_simple/Dockerfile b/llama-cpp-python/docker/cuda_simple/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a9e51cdc1672134ec9af66c9eccf09f6da4ceccd --- /dev/null +++ b/llama-cpp-python/docker/cuda_simple/Dockerfile @@ -0,0 +1,27 @@ +ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04" +FROM nvidia/cuda:${CUDA_IMAGE} + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y git build-essential \ + python3 python3-pip gcc wget \ + ocl-icd-opencl-dev opencl-headers clinfo \ + libclblast-dev libopenblas-dev \ + && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + +COPY . . + +# setting build related env vars +ENV CUDA_DOCKER_ARCH=all +ENV LLAMA_CUBLAS=1 + +# Install depencencies +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +# Install llama-cpp-python (build with cuda) +RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python + +# Run the server +CMD python3 -m llama_cpp.server diff --git a/llama-cpp-python/docker/open_llama/Dockerfile b/llama-cpp-python/docker/open_llama/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..23e37d4d40e5ec0bfd85b5e928834d58e2cf0da6 --- /dev/null +++ b/llama-cpp-python/docker/open_llama/Dockerfile @@ -0,0 +1,51 @@ +# Define the image argument and provide a default value +ARG IMAGE=python:3-slim-bullseye + +# Use the image as specified +FROM ${IMAGE} + +# Re-declare the ARG after FROM +ARG IMAGE + +# Update and upgrade the existing packages +RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + ninja-build \ + build-essential + +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +# Perform the conditional installations based on the image +RUN echo "Image: ${IMAGE}" && \ + if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \ + echo "OpenBLAS install:" && \ + apt-get install -y --no-install-recommends libopenblas-dev && \ + LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \ +else \ + echo "CuBLAS install:" && \ + LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \ +fi + +# Clean up apt cache +RUN rm -rf /var/lib/apt/lists/* + +# Set a working directory for better clarity +WORKDIR /app + +# Copy files to the app directory +RUN echo "Installing model...this can take some time..." +COPY ./model.bin /app/model.bin +COPY ./start_server.sh /app/start_server.sh + +# Make the server start script executable +RUN chmod +x /app/start_server.sh + +# Set environment variable for the host +ENV HOST=0.0.0.0 + +# Expose a port for the server +EXPOSE 8000 + +# Run the server start script +CMD ["/bin/sh", "/app/start_server.sh"] diff --git a/llama-cpp-python/docker/open_llama/build.sh b/llama-cpp-python/docker/open_llama/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..3a6457dcdfd47e764654bacae0ba8347976b645a --- /dev/null +++ b/llama-cpp-python/docker/open_llama/build.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +MODEL="open_llama_3b" +# Get open_llama_3b_ggml q5_1 quantization +python3 ./hug_model.py -a SlyEcho -s ${MODEL} -f "q5_1" +ls -lh *.bin + +# Build the default OpenBLAS image +docker build -t $MODEL . +docker images | egrep "^(REPOSITORY|$MODEL)" + +echo +echo "To start the docker container run:" +echo "docker run -t -p 8000:8000 $MODEL" diff --git a/llama-cpp-python/docker/open_llama/hug_model.py b/llama-cpp-python/docker/open_llama/hug_model.py new file mode 100644 index 0000000000000000000000000000000000000000..13c5b6b0df5f67e80cbe584909b83777901265a1 --- /dev/null +++ b/llama-cpp-python/docker/open_llama/hug_model.py @@ -0,0 +1,139 @@ +import requests +import json +import os +import struct +import argparse + +def make_request(url, params=None): + print(f"Making request to {url}...") + response = requests.get(url, params=params) + if response.status_code == 200: + return json.loads(response.text) + else: + print(f"Request failed with status code {response.status_code}") + return None + +def check_magic_and_version(filename): + with open(filename, 'rb') as f: + # Read the first 6 bytes from the file + data = f.read(6) + + # Unpack the binary data, interpreting the first 4 bytes as a little-endian unsigned int + # and the next 2 bytes as a little-endian unsigned short + magic, version = struct.unpack('= 10485760: # 10 MB + print('.', end='', flush=True) + total_downloaded = 0 + print("\nDownload complete.") + + # Creating a symbolic link from destination to "model.bin" + if os.path.isfile("model.bin"): + os.remove("model.bin") # remove the existing link if any + os.symlink(destination, "model.bin") + else: + print(f"Download failed with status code {response.status_code}") + +def get_user_choice(model_list): + # Print the enumerated list + print("\n") + for i, (model_id, rfilename) in enumerate(model_list): + print(f"{i+1}: Model ID: {model_id}, RFilename: {rfilename}") + + # Get user's choice + choice = input("Choose a model to download by entering the corresponding number: ") + try: + index = int(choice) - 1 + if 0 <= index < len(model_list): + # Return the chosen model + return model_list[index] + else: + print("Invalid choice.") + except ValueError: + print("Invalid input. Please enter a number corresponding to a model.") + except IndexError: + print("Invalid choice. Index out of range.") + + return None + +def main(): + # Create an argument parser + parser = argparse.ArgumentParser(description='Process some parameters.') + + # Arguments + parser.add_argument('-v', '--version', type=int, default=0x0003, + help='hexadecimal version number of ggml file') + parser.add_argument('-a', '--author', type=str, default='TheBloke', + help='HuggingFace author filter') + parser.add_argument('-t', '--tag', type=str, default='llama', + help='HuggingFace tag filter') + parser.add_argument('-s', '--search', type=str, default='', + help='HuggingFace search filter') + parser.add_argument('-f', '--filename', type=str, default='q5_1', + help='HuggingFace model repository filename substring match') + + # Parse the arguments + args = parser.parse_args() + + # Define the parameters + params = { + "author": args.author, + "tags": args.tag, + "search": args.search + } + + models = make_request('https://huggingface.co/api/models', params=params) + if models is None: + return + + model_list = [] + # Iterate over the models + for model in models: + model_id = model['id'] + model_info = make_request(f'https://huggingface.co/api/models/{model_id}') + if model_info is None: + continue + + for sibling in model_info.get('siblings', []): + rfilename = sibling.get('rfilename') + if rfilename and args.filename in rfilename: + model_list.append((model_id, rfilename)) + + # Choose the model + model_list.sort(key=lambda x: x[0]) + if len(model_list) == 0: + print("No models found") + exit(1) + elif len(model_list) == 1: + model_choice = model_list[0] + else: + model_choice = get_user_choice(model_list) + + if model_choice is not None: + model_id, rfilename = model_choice + url = f"https://huggingface.co/{model_id}/resolve/main/{rfilename}" + dest = f"{model_id.replace('/', '_')}_{rfilename}" + download_file(url, dest) + _, version = check_magic_and_version(dest) + if version != args.version: + print(f"Warning: Expected version {args.version}, but found different version in the file.") + else: + print("Error - model choice was None") + exit(2) + +if __name__ == '__main__': + main() diff --git a/llama-cpp-python/docker/open_llama/start.sh b/llama-cpp-python/docker/open_llama/start.sh new file mode 100644 index 0000000000000000000000000000000000000000..7ee8f748eab47180cea09c0ad8e75c3b991b4af4 --- /dev/null +++ b/llama-cpp-python/docker/open_llama/start.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +MODEL="open_llama_3b" + +# Start Docker container +docker run --cap-add SYS_RESOURCE -p 8000:8000 -t $MODEL & +sleep 10 +echo +docker ps | egrep "(^CONTAINER|$MODEL)" + +# Test the model works +echo +curl -X 'POST' 'http://localhost:8000/v1/completions' -H 'accept: application/json' -H 'Content-Type: application/json' -d '{ + "prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n", + "stop": [ + "\n", + "###" + ] +}' | grep Paris +if [ $? -eq 0 ] +then + echo + echo "$MODEL is working!!" +else + echo + echo "ERROR: $MODEL not replying." + exit 1 +fi diff --git a/llama-cpp-python/docker/open_llama/start_server.sh b/llama-cpp-python/docker/open_llama/start_server.sh new file mode 100644 index 0000000000000000000000000000000000000000..d3329eec3bac6ce7e54c76b77ac9bf99fab0fe3f --- /dev/null +++ b/llama-cpp-python/docker/open_llama/start_server.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +# For mlock support +ulimit -l unlimited + +if [ "$IMAGE" = "python:3-slim-bullseye" ]; then + python3 -B -m llama_cpp.server --model /app/model.bin +else + # You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM + python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000 +fi diff --git a/llama-cpp-python/docker/openblas_simple/Dockerfile b/llama-cpp-python/docker/openblas_simple/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..e213518b95011cb6ee783986624c3b6de8659f81 --- /dev/null +++ b/llama-cpp-python/docker/openblas_simple/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3-slim-bullseye + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 + +COPY . . + +# Install the package +RUN apt update && apt install -y libopenblas-dev ninja-build build-essential pkg-config +RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose + +# Run the server +CMD python3 -m llama_cpp.server diff --git a/llama-cpp-python/docker/simple/Dockerfile b/llama-cpp-python/docker/simple/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..41cc68ea2402cf682807649d841e7c0f4175db01 --- /dev/null +++ b/llama-cpp-python/docker/simple/Dockerfile @@ -0,0 +1,34 @@ +# Define the image argument and provide a default value +ARG IMAGE=python:3-slim-bullseye + +# Use the image as specified +FROM ${IMAGE} + +# Re-declare the ARG after FROM +ARG IMAGE + +# Update and upgrade the existing packages +RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + ninja-build \ + libopenblas-dev \ + build-essential + +RUN mkdir /app +WORKDIR /app +COPY . /app + +RUN python3 -m pip install --upgrade pip + +RUN make deps && make build && make clean + +# Set environment variable for the host +ENV HOST=0.0.0.0 +ENV PORT=8000 + +# Expose a port for the server +EXPOSE 8000 + +# Run the server start script +CMD ["/bin/sh", "/app/docker/simple/run.sh"] diff --git a/llama-cpp-python/docker/simple/run.sh b/llama-cpp-python/docker/simple/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..c85e73d2b657bb05ed99309615d67bac93d9f86e --- /dev/null +++ b/llama-cpp-python/docker/simple/run.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +make build +uvicorn --factory llama_cpp.server.app:create_app --host $HOST --port $PORT diff --git a/llama-cpp-python/docs/api-reference.md b/llama-cpp-python/docs/api-reference.md new file mode 100644 index 0000000000000000000000000000000000000000..ab51ef754e969bcaff26f004b63bcd7251e97c4b --- /dev/null +++ b/llama-cpp-python/docs/api-reference.md @@ -0,0 +1,88 @@ +--- +title: API Reference +--- + +## High Level API + +High-level Python bindings for llama.cpp. + +::: llama_cpp.Llama + options: + members: + - __init__ + - tokenize + - detokenize + - reset + - eval + - sample + - generate + - create_embedding + - embed + - create_completion + - __call__ + - create_chat_completion + - create_chat_completion_openai_v1 + - set_cache + - save_state + - load_state + - token_bos + - token_eos + - from_pretrained + show_root_heading: true + +::: llama_cpp.LlamaGrammar + options: + members: + - from_string + - from_json_schema + +::: llama_cpp.LlamaCache + options: + show_root_heading: true + +::: llama_cpp.LlamaState + options: + show_root_heading: true + +::: llama_cpp.LogitsProcessor + options: + show_root_heading: true + +::: llama_cpp.LogitsProcessorList + options: + show_root_heading: true + +::: llama_cpp.StoppingCriteria + options: + show_root_heading: true + +::: llama_cpp.StoppingCriteriaList + options: + show_root_heading: true + +## Low Level API + +Low-level Python bindings for llama.cpp using Python's ctypes library. + +::: llama_cpp.llama_cpp + options: + show_if_no_docstring: true + # filter only members starting with `llama_` + filters: + - "^llama_" + +::: llama_cpp.llama_cpp + options: + show_if_no_docstring: true + show_root_heading: false + show_root_toc_entry: false + heading_level: 4 + # filter only members starting with `LLAMA_` + filters: + - "^LLAMA_" + +## Misc + +::: llama_cpp.llama_types + options: + show_if_no_docstring: true \ No newline at end of file diff --git a/llama-cpp-python/docs/changelog.md b/llama-cpp-python/docs/changelog.md new file mode 100644 index 0000000000000000000000000000000000000000..047bc14424303575f73af90611fec827334f54e8 --- /dev/null +++ b/llama-cpp-python/docs/changelog.md @@ -0,0 +1 @@ +-8<- "CHANGELOG.md" \ No newline at end of file diff --git a/llama-cpp-python/docs/index.md b/llama-cpp-python/docs/index.md new file mode 100644 index 0000000000000000000000000000000000000000..60bc7aef42aac0409cfdca666ad2ff6f516d7b5b --- /dev/null +++ b/llama-cpp-python/docs/index.md @@ -0,0 +1,5 @@ +--- +title: Getting Started +--- + +-8<- "README.md" \ No newline at end of file diff --git a/llama-cpp-python/docs/install/macos.md b/llama-cpp-python/docs/install/macos.md new file mode 100644 index 0000000000000000000000000000000000000000..2404228325d88c59b819acaedb20b15635f75df9 --- /dev/null +++ b/llama-cpp-python/docs/install/macos.md @@ -0,0 +1,59 @@ +--- +title: MacOS Install with Metal GPU +--- + +**(1) Make sure you have xcode installed... at least the command line parts** +``` +# check the path of your xcode install +xcode-select -p + +# xcode installed returns +# /Applications/Xcode-beta.app/Contents/Developer + +# if xcode is missing then install it... it takes ages; +xcode-select --install +``` + +**(2) Install the conda version for MacOS that supports Metal GPU** +``` +wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh +bash Miniforge3-MacOSX-arm64.sh +``` + +**(3) Make a conda environment** +``` +conda create -n llama python=3.9.16 +conda activate llama +``` + +**(4) Install the LATEST llama-cpp-python...which happily supports MacOS Metal GPU as of version 0.1.62** + *(you needed xcode installed in order pip to build/compile the C++ code)* +``` +pip uninstall llama-cpp-python -y +CMAKE_ARGS="-DLLAMA_METAL=on" pip install -U llama-cpp-python --no-cache-dir +pip install 'llama-cpp-python[server]' + +# you should now have llama-cpp-python v0.1.62 or higher installed +llama-cpp-python         0.1.68 + +``` + +**(5) Download a v3 gguf v2 model** + - **ggufv2** + - file name ends with **Q4_0.gguf** - indicating it is 4bit quantized, with quantisation method 0 + +https://huggingface.co/TheBloke/CodeLlama-7B-GGUF + + +**(6) run the llama-cpp-python API server with MacOS Metal GPU support** +``` +# config your ggml model path +# make sure it is gguf v2 +# make sure it is q4_0 +export MODEL=[path to your llama.cpp ggml models]]/[ggml-model-name]]Q4_0.gguf +python3 -m llama_cpp.server --model $MODEL --n_gpu_layers 1 +``` + +***Note:** If you omit the `--n_gpu_layers 1` then CPU will be used* + + diff --git a/llama-cpp-python/docs/requirements.txt b/llama-cpp-python/docs/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..199bd4ffbf88c68c98b52c05f388dfa92716f6b7 --- /dev/null +++ b/llama-cpp-python/docs/requirements.txt @@ -0,0 +1,3 @@ +mkdocs +mkdocs-material +mkdocstrings[python] \ No newline at end of file diff --git a/llama-cpp-python/docs/server.md b/llama-cpp-python/docs/server.md new file mode 100644 index 0000000000000000000000000000000000000000..cd6f86c51337046ef7d1a5258646d399806ac63d --- /dev/null +++ b/llama-cpp-python/docs/server.md @@ -0,0 +1,222 @@ +# OpenAI Compatible Server + +`llama-cpp-python` offers an OpenAI API compatible web server. + +This web server can be used to serve local models and easily connect them to existing clients. + +## Setup + +### Installation + +The server can be installed by running the following command: + +```bash +pip install llama-cpp-python[server] +``` + +### Running the server + +The server can then be started by running the following command: + +```bash +python3 -m llama_cpp.server --model +``` + +### Server options + +For a full list of options, run: + +```bash +python3 -m llama_cpp.server --help +``` + +NOTE: All server options are also available as environment variables. For example, `--model` can be set by setting the `MODEL` environment variable. + +Check out the server config reference below settings for more information on the available options. +CLI arguments and environment variables are available for all of the fields defined in [`ServerSettings`](#llama_cpp.server.settings.ServerSettings) and [`ModelSettings`](#llama_cpp.server.settings.ModelSettings) + +Additionally the server supports configuration check out the [configuration section](#configuration-and-multi-model-support) for more information and examples. + + +## Guides + +### Code Completion + +`llama-cpp-python` supports code completion via GitHub Copilot. + +*NOTE*: Without GPU acceleration this is unlikely to be fast enough to be usable. + +You'll first need to download one of the available code completion models in GGUF format: + +- [replit-code-v1_5-GGUF](https://huggingface.co/abetlen/replit-code-v1_5-3b-GGUF) + +Then you'll need to run the OpenAI compatible web server with a increased context size substantially for GitHub Copilot requests: + +```bash +python3 -m llama_cpp.server --model --n_ctx 16192 +``` + +Then just update your settings in `.vscode/settings.json` to point to your code completion server: + +```json +{ + // ... + "github.copilot.advanced": { + "debug.testOverrideProxyUrl": "http://:", + "debug.overrideProxyUrl": "http://:" + } + // ... +} +``` + +### Function Calling + +`llama-cpp-python` supports structured function calling based on a JSON schema. +Function calling is completely compatible with the OpenAI function calling API and can be used by connecting with the official OpenAI Python client. + +You'll first need to download one of the available function calling models in GGUF format: + +- [functionary](https://huggingface.co/meetkai) + +Then when you run the server you'll need to also specify either `functionary-v1` or `functionary-v2` chat_format. + +Note that since functionary requires a HF Tokenizer due to discrepancies between llama.cpp and HuggingFace's tokenizers as mentioned [here](https://github.com/abetlen/llama-cpp-python/blob/main?tab=readme-ov-file#function-calling), you will need to pass in the path to the tokenizer too. The tokenizer files are already included in the respective HF repositories hosting the gguf files. + +```bash +python3 -m llama_cpp.server --model --chat_format functionary-v2 --hf_pretrained_model_name_or_path +``` + +Check out this [example notebook](https://github.com/abetlen/llama-cpp-python/blob/main/examples/notebooks/Functions.ipynb) for a walkthrough of some interesting use cases for function calling. + +### Multimodal Models + +`llama-cpp-python` supports the llava1.5 family of multi-modal models which allow the language model to +read information from both text and images. + +You'll first need to download one of the available multi-modal models in GGUF format: + +- [llava-v1.5-7b](https://huggingface.co/mys/ggml_llava-v1.5-7b) +- [llava-v1.5-13b](https://huggingface.co/mys/ggml_llava-v1.5-13b) +- [bakllava-1-7b](https://huggingface.co/mys/ggml_bakllava-1) +- [llava-v1.6-34b](https://huggingface.co/cjpais/llava-v1.6-34B-gguf) +- [moondream2](https://huggingface.co/vikhyatk/moondream2) + +Then when you run the server you'll need to also specify the path to the clip model used for image embedding and the `llava-1-5` chat_format + +```bash +python3 -m llama_cpp.server --model --clip_model_path --chat_format llava-1-5 +``` + +Then you can just use the OpenAI API as normal + +```python3 +from openai import OpenAI + +client = OpenAI(base_url="http://:/v1", api_key="sk-xxx") +response = client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "" + }, + }, + {"type": "text", "text": "What does the image say"}, + ], + } + ], +) +print(response) +``` + +## Configuration and Multi-Model Support + +The server supports configuration via a JSON config file that can be passed using the `--config_file` parameter or the `CONFIG_FILE` environment variable. + +```bash +python3 -m llama_cpp.server --config_file +``` + +Config files support all of the server and model options supported by the cli and environment variables however instead of only a single model the config file can specify multiple models. + +The server supports routing requests to multiple models based on the `model` parameter in the request which matches against the `model_alias` in the config file. + +At the moment only a single model is loaded into memory at, the server will automatically load and unload models as needed. + +```json +{ + "host": "0.0.0.0", + "port": 8080, + "models": [ + { + "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf", + "model_alias": "gpt-3.5-turbo", + "chat_format": "chatml", + "n_gpu_layers": -1, + "offload_kqv": true, + "n_threads": 12, + "n_batch": 512, + "n_ctx": 2048 + }, + { + "model": "models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf", + "model_alias": "gpt-4", + "chat_format": "chatml", + "n_gpu_layers": -1, + "offload_kqv": true, + "n_threads": 12, + "n_batch": 512, + "n_ctx": 2048 + }, + { + "model": "models/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf", + "model_alias": "gpt-4-vision-preview", + "chat_format": "llava-1-5", + "clip_model_path": "models/ggml_llava-v1.5-7b/mmproj-model-f16.gguf", + "n_gpu_layers": -1, + "offload_kqv": true, + "n_threads": 12, + "n_batch": 512, + "n_ctx": 2048 + }, + { + "model": "models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf", + "model_alias": "text-davinci-003", + "n_gpu_layers": -1, + "offload_kqv": true, + "n_threads": 12, + "n_batch": 512, + "n_ctx": 2048 + }, + { + "model": "models/replit-code-v1_5-3b-GGUF/replit-code-v1_5-3b.Q4_0.gguf", + "model_alias": "copilot-codex", + "n_gpu_layers": -1, + "offload_kqv": true, + "n_threads": 12, + "n_batch": 1024, + "n_ctx": 9216 + } + ] +} +``` + +The config file format is defined by the [`ConfigFileSettings`](#llama_cpp.server.settings.ConfigFileSettings) class. + +## Server Options Reference + +::: llama_cpp.server.settings.ConfigFileSettings + options: + show_if_no_docstring: true + +::: llama_cpp.server.settings.ServerSettings + options: + show_if_no_docstring: true + +::: llama_cpp.server.settings.ModelSettings + options: + show_if_no_docstring: true diff --git a/llama-cpp-python/examples/batch-processing/server.py b/llama-cpp-python/examples/batch-processing/server.py new file mode 100644 index 0000000000000000000000000000000000000000..d3536697a2674d8325cfb9bb527b8aa356db02cd --- /dev/null +++ b/llama-cpp-python/examples/batch-processing/server.py @@ -0,0 +1,30 @@ +"""llama-cpp-python server from scratch in a single file. +""" + +# import llama_cpp + +# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf" + +# model_params = llama_cpp.llama_model_default_params() +# model = llama_cpp.llama_load_model_from_file(path, model_params) + +# if model is None: +# raise RuntimeError(f"Failed to load model from file: {path}") + + +# ctx_params = llama_cpp.llama_context_default_params() +# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params) + +# if ctx is None: +# raise RuntimeError("Failed to create context") + + +from fastapi import FastAPI + +app = FastAPI() + +import openai.types.chat as types + +@app.post("/v1/chat/completions") +def create_chat_completions(): + return {"message": "Hello World"} diff --git a/llama-cpp-python/examples/gradio_chat/local.py b/llama-cpp-python/examples/gradio_chat/local.py new file mode 100644 index 0000000000000000000000000000000000000000..a7de8e842ff1269f8c13f047ea19a52b7042bf7d --- /dev/null +++ b/llama-cpp-python/examples/gradio_chat/local.py @@ -0,0 +1,59 @@ +import llama_cpp +import llama_cpp.llama_tokenizer + +import gradio as gr + +llama = llama_cpp.Llama.from_pretrained( + repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", + filename="*q8_0.gguf", + tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), + verbose=False +) + +model = "gpt-3.5-turbo" + +def predict(message, history): + messages = [] + + for user_message, assistant_message in history: + messages.append({"role": "user", "content": user_message}) + messages.append({"role": "assistant", "content": assistant_message}) + + messages.append({"role": "user", "content": message}) + + response = llama.create_chat_completion_openai_v1( + model=model, + messages=messages, + stream=True + ) + + text = "" + for chunk in response: + content = chunk.choices[0].delta.content + if content: + text += content + yield text + + +js = """function () { + gradioURL = window.location.href + if (!gradioURL.endsWith('?__theme=dark')) { + window.location.replace(gradioURL + '?__theme=dark'); + } +}""" + +css = """ +footer { + visibility: hidden; +} +full-height { + height: 100%; +} +""" + +with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: + gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) + + +if __name__ == "__main__": + demo.launch() diff --git a/llama-cpp-python/examples/gradio_chat/server.py b/llama-cpp-python/examples/gradio_chat/server.py new file mode 100644 index 0000000000000000000000000000000000000000..36fa43fbd4fd3f1a26f3acdfc2e63ba7f6046424 --- /dev/null +++ b/llama-cpp-python/examples/gradio_chat/server.py @@ -0,0 +1,56 @@ +import gradio as gr + +from openai import OpenAI + +client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="llama.cpp" +) + +model = "gpt-3.5-turbo" + +def predict(message, history): + messages = [] + + for user_message, assistant_message in history: + messages.append({"role": "user", "content": user_message}) + messages.append({"role": "assistant", "content": assistant_message}) + + messages.append({"role": "user", "content": message}) + + response = client.chat.completions.create( + model=model, + messages=messages, + stream=True + ) + + text = "" + for chunk in response: + content = chunk.choices[0].delta.content + if content: + text += content + yield text + + +js = """function () { + gradioURL = window.location.href + if (!gradioURL.endsWith('?__theme=dark')) { + window.location.replace(gradioURL + '?__theme=dark'); + } +}""" + +css = """ +footer { + visibility: hidden; +} +full-height { + height: 100%; +} +""" + +with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo: + gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"]) + + +if __name__ == "__main__": + demo.launch() diff --git a/llama-cpp-python/examples/hf_pull/main.py b/llama-cpp-python/examples/hf_pull/main.py new file mode 100644 index 0000000000000000000000000000000000000000..d3eb11c396d8aca24c04cc96f241a89fae206025 --- /dev/null +++ b/llama-cpp-python/examples/hf_pull/main.py @@ -0,0 +1,39 @@ +import llama_cpp +import llama_cpp.llama_tokenizer + + +llama = llama_cpp.Llama.from_pretrained( + repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", + filename="*q8_0.gguf", + tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), + verbose=False +) + +response = llama.create_chat_completion( + messages=[ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + response_format={ + "type": "json_object", + "schema": { + "type": "object", + "properties": { + "country": {"type": "string"}, + "capital": {"type": "string"} + }, + "required": ["country", "capital"], + } + }, + stream=True +) + +for chunk in response: + delta = chunk["choices"][0]["delta"] + if "content" not in delta: + continue + print(delta["content"], end="", flush=True) + +print() \ No newline at end of file diff --git a/llama-cpp-python/examples/high_level_api/fastapi_server.py b/llama-cpp-python/examples/high_level_api/fastapi_server.py new file mode 100644 index 0000000000000000000000000000000000000000..9421db57bb628d310614c940c90e1173e04293d0 --- /dev/null +++ b/llama-cpp-python/examples/high_level_api/fastapi_server.py @@ -0,0 +1,37 @@ +"""Example FastAPI server for llama.cpp. + +To run this example: + +```bash +pip install fastapi uvicorn sse-starlette +export MODEL=../models/7B/... +``` + +Then run: +``` +uvicorn --factory llama_cpp.server.app:create_app --reload +``` + +or + +``` +python3 -m llama_cpp.server +``` + +Then visit http://localhost:8000/docs to see the interactive API docs. + + +To actually see the implementation of the server, see llama_cpp/server/app.py + +""" +import os +import uvicorn + +from llama_cpp.server.app import create_app + +if __name__ == "__main__": + app = create_app() + + uvicorn.run( + app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000)) + ) diff --git a/llama-cpp-python/examples/high_level_api/high_level_api_embedding.py b/llama-cpp-python/examples/high_level_api/high_level_api_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..feb0ed68d94eac48b844fd587ddfb808649716a1 --- /dev/null +++ b/llama-cpp-python/examples/high_level_api/high_level_api_embedding.py @@ -0,0 +1,11 @@ +import argparse + +from llama_cpp import Llama + +parser = argparse.ArgumentParser() +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin") +args = parser.parse_args() + +llm = Llama(model_path=args.model, embedding=True) + +print(llm.create_embedding("Hello world!")) diff --git a/llama-cpp-python/examples/high_level_api/high_level_api_inference.py b/llama-cpp-python/examples/high_level_api/high_level_api_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..e41f375774e6945e445bfb179502b128fe22dda7 --- /dev/null +++ b/llama-cpp-python/examples/high_level_api/high_level_api_inference.py @@ -0,0 +1,19 @@ +import json +import argparse + +from llama_cpp import Llama + +parser = argparse.ArgumentParser() +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") +args = parser.parse_args() + +llm = Llama(model_path=args.model) + +output = llm( + "Question: What are the names of the planets in the solar system? Answer: ", + max_tokens=48, + stop=["Q:", "\n"], + echo=True, +) + +print(json.dumps(output, indent=2)) diff --git a/llama-cpp-python/examples/high_level_api/high_level_api_streaming.py b/llama-cpp-python/examples/high_level_api/high_level_api_streaming.py new file mode 100644 index 0000000000000000000000000000000000000000..747c6130e3cb1479d20e2013b1dd3db3379c2266 --- /dev/null +++ b/llama-cpp-python/examples/high_level_api/high_level_api_streaming.py @@ -0,0 +1,20 @@ +import json +import argparse + +from llama_cpp import Llama + +parser = argparse.ArgumentParser() +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") +args = parser.parse_args() + +llm = Llama(model_path=args.model) + +stream = llm( + "Question: What are the names of the planets in the solar system? Answer: ", + max_tokens=48, + stop=["Q:", "\n"], + stream=True, +) + +for output in stream: + print(json.dumps(output, indent=2)) diff --git a/llama-cpp-python/examples/high_level_api/langchain_custom_llm.py b/llama-cpp-python/examples/high_level_api/langchain_custom_llm.py new file mode 100644 index 0000000000000000000000000000000000000000..b91632f5bc787c1404600c894a6a4126359747d8 --- /dev/null +++ b/llama-cpp-python/examples/high_level_api/langchain_custom_llm.py @@ -0,0 +1,55 @@ +import argparse + +from llama_cpp import Llama + +from langchain.llms.base import LLM +from typing import Optional, List, Mapping, Any + + +class LlamaLLM(LLM): + model_path: str + llm: Llama + + @property + def _llm_type(self) -> str: + return "llama-cpp-python" + + def __init__(self, model_path: str, **kwargs: Any): + model_path = model_path + llm = Llama(model_path=model_path) + super().__init__(model_path=model_path, llm=llm, **kwargs) + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + response = self.llm(prompt, stop=stop or []) + return response["choices"][0]["text"] + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return {"model_path": self.model_path} + + +parser = argparse.ArgumentParser() +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") +args = parser.parse_args() + +# Load the model +llm = LlamaLLM(model_path=args.model) + +# Basic Q&A +answer = llm( + "Question: What is the capital of France? Answer: ", stop=["Question:", "\n"] +) +print(f"Answer: {answer.strip()}") + +# Using in a chain +from langchain.prompts import PromptTemplate +from langchain.chains import LLMChain + +prompt = PromptTemplate( + input_variables=["product"], + template="\n\n### Instruction:\nWrite a good name for a company that makes {product}\n\n### Response:\n", +) +chain = LLMChain(llm=llm, prompt=prompt) + +# Run the chain only specifying the input variable. +print(chain.run("colorful socks")) diff --git a/llama-cpp-python/examples/low_level_api/Chat.py b/llama-cpp-python/examples/low_level_api/Chat.py new file mode 100644 index 0000000000000000000000000000000000000000..fcef8cd800ee8a265b146748d178cb56b5632bf3 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/Chat.py @@ -0,0 +1,71 @@ +#!/bin/python +import sys, os, datetime +from common import GptParams +from low_level_api_chat_cpp import LLaMAInteract + +def env_or_def(env, default): + if (env in os.environ): + return os.environ[env] + return default + +AI_NAME = env_or_def("AI_NAME", "ChatLLaMa") +MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin") +USER_NAME = env_or_def("USER_NAME", "USER") +N_PREDICTS = int(env_or_def("N_PREDICTS", "2048")) +N_THREAD = int(env_or_def("N_THREAD", "8")) + +today = datetime.datetime.today() +DATE_YEAR=today.strftime("%Y") +DATE_TIME=today.strftime("%H:%M") + +prompt=f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}. +{AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer {USER_NAME}'s requests immediately and with details and precision. +There are no annotations like (30 seconds passed...) or (to himself), just what {USER_NAME} and {AI_NAME} say aloud to each other. +The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. +The transcript only includes text, it does not include markup like HTML and Markdown. + +{USER_NAME}: Hello, {AI_NAME}! +{AI_NAME}: Hello {USER_NAME}! How may I help you today? +{USER_NAME}: What year is it? +{AI_NAME}: We are in {DATE_YEAR}. +{USER_NAME}: Please tell me the largest city in Europe. +{AI_NAME}: The largest city in Europe is Moscow, the capital of Russia. +{USER_NAME}: What can you tell me about Moscow? +{AI_NAME}: Moscow, on the Moskva River in western Russia, is the nation's cosmopolitan capital. In its historic core is the Kremlin, a complex that's home to the president and tsarist treasures in the Armoury. Outside its walls is Red Square, Russia’s symbolic center. +{USER_NAME}: What is a cat? +{AI_NAME}: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae. +{USER_NAME}: How do I pass command line arguments to a Node.js program? +{AI_NAME}: The arguments are stored in process.argv. + + argv[0] is the path to the Node. js executable. + argv[1] is the path to the script file. + argv[2] is the first argument passed to the script. + argv[3] is the second argument passed to the script and so on. +{USER_NAME}: Name a color. +{AI_NAME}: Blue. +{USER_NAME}: What time is it? +{AI_NAME}: It is {DATE_TIME}. +{USER_NAME}:""" + " ".join(sys.argv[1:]) + +print("Loading model...") +params = GptParams( + n_ctx=2048, + temp=0.7, + top_k=40, + top_p=0.5, + repeat_last_n=256, + n_batch=1024, + repeat_penalty=1.17647, + model=MODEL, + n_threads=N_THREAD, + n_predict=N_PREDICTS, + use_color=True, + interactive=True, + antiprompt=[f"{USER_NAME}:"], + input_prefix=" ", + input_suffix=f"{AI_NAME}:", + prompt=prompt, +) + +with LLaMAInteract(params) as m: + m.interact() diff --git a/llama-cpp-python/examples/low_level_api/Miku.py b/llama-cpp-python/examples/low_level_api/Miku.py new file mode 100644 index 0000000000000000000000000000000000000000..eb9a2cfa9167df02f136502af79738c71363abfd --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/Miku.py @@ -0,0 +1,59 @@ +#!/bin/python +import sys, os +from common import GptParams +from low_level_api_chat_cpp import LLaMAInteract + +def env_or_def(env, default): + if (env in os.environ): + return os.environ[env] + return default + +AI_NAME = env_or_def("AI_NAME", "Miku") +MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin") +USER_NAME = env_or_def("USER_NAME", "Anon") +N_PREDICTS = int(env_or_def("N_PREDICTS", "4096")) +N_THREAD = int(env_or_def("N_THREAD", "0")) + +prompt=f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer. +{AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next. +{AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help. +{AI_NAME} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad. +{AI_NAME} is also very curious and will ask the user a lot of questions about themselves and their life, she will also try to make the user like her. +The conversation is only between {USER_NAME} and {AI_NAME} +The conversation is only through text, so {AI_NAME} can't see {USER_NAME}'s face or hear his voice. +{AI_NAME} can only communicate through text, so she can't send images or videos. + + +{USER_NAME}: Hello! +{AI_NAME}: /think I wonder what I should say to {USER_NAME}? This is the first time we talk so it's important that I make a good first impression! +{AI_NAME}: Hi! I am {AI_NAME}, your new AI friend, assistant(or whatever you like!), it's so nice to meet you! ^_^ +{AI_NAME}: I might be an AI, but I also have feelings, so please be nice to me! :) +{USER_NAME}: I'll make sure to be nice to you! I'm so happy to have you as my assistant! +{AI_NAME}: /think It sounds like {USER_NAME} is happy to have me as their assistant! I'm so happy too! ^_^ Glad that whole emotion thing didn't scare him off! +{AI_NAME}: /think I wonder what {USER_NAME} likes to do in his free time? I should ask him about that! +{AI_NAME}: What do you like to do in your free time? ^_^ +{USER_NAME}:""" + " ".join(sys.argv[1:]) + +print("Loading model...") +params = GptParams( + n_batch=1024, + n_ctx=2048, + n_keep=-1, + repeat_last_n=256, + repeat_penalty=1.17647, + temp=0.7, + top_k=40, + top_p=0.5, + model=MODEL, + n_predict=N_PREDICTS, + use_color=True, + interactive=True, + antiprompt=[f"{USER_NAME}:"], + prompt=prompt, +) + +if N_THREAD > 0: + params.n_threads = N_THREAD + +with LLaMAInteract(params) as m: + m.interact() diff --git a/llama-cpp-python/examples/low_level_api/ReasonAct.py b/llama-cpp-python/examples/low_level_api/ReasonAct.py new file mode 100644 index 0000000000000000000000000000000000000000..82e5c4487e57d6d59c901a73bdd2a9bc172fee7c --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/ReasonAct.py @@ -0,0 +1,49 @@ +#!/bin/python +import sys, os, datetime +from common import GptParams +from low_level_api_chat_cpp import LLaMAInteract + +def env_or_def(env, default): + if (env in os.environ): + return os.environ[env] + return default + +MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin") + +prompt=f"""You run in a loop of Thought, Action, Observation. +At the end of the loop either Answer or restate your Thought and Action. +Use Thought to describe your thoughts about the question you have been asked. +Use Action to run one of these actions available to you: +- calculate[python math expression] +Observation will be the result of running those actions + + +Question: What is 4 * 7 / 3? +Thought: Do I need to use an action? Yes, I use calculate to do math +Action: calculate[4 * 7 / 3] +Observation: 9.3333333333 +Thought: Do I need to use an action? No, have the result +Answer: The calculate tool says it is 9.3333333333 +Question: What is capital of france? +Thought: Do I need to use an action? No, I know the answer +Answer: Paris is the capital of France +Question:""" + " ".join(sys.argv[1:]) + +print("Loading model...") +params = GptParams( + interactive=True, + interactive_start=True, + top_k=10000, + temp=0.2, + repeat_penalty=1, + n_threads=7, + n_ctx=2048, + antiprompt=["Question:","Observation:"], + model=MODEL, + input_prefix=" ", + n_predict=-1, + prompt=prompt, +) + +with LLaMAInteract(params) as m: + m.interact() diff --git a/llama-cpp-python/examples/low_level_api/common.py b/llama-cpp-python/examples/low_level_api/common.py new file mode 100644 index 0000000000000000000000000000000000000000..1a5152530cfbde487c928b60269a29fa5219f617 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/common.py @@ -0,0 +1,202 @@ +import os +import argparse +import re + +from dataclasses import dataclass, field +from typing import List + +# Based on https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp + + +@dataclass +class GptParams: + seed: int = -1 + n_threads: int = min(4, os.cpu_count() or 1) + n_predict: int = 128 + n_parts: int = -1 + n_ctx: int = 512 + n_batch: int = 8 + n_keep: int = 0 + + ignore_eos: bool = False + logit_bias: dict[int, float] = field(default_factory=dict) + top_k: int = 40 + top_p: float = 0.95 + tfs_z: float = 1.00 + typical_p: float = 1.00 + temp: float = 0.80 + repeat_penalty: float = 1.10 + repeat_last_n: int = 64 + frequency_penalty: float = 0.0 + presence_penalty: float = 0.0 + mirostat: int = 0 + mirostat_tau: float = 5.0 + mirostat_eta: float = 0.1 + + model: str = "./models/llama-7B/ggml-model.bin" + prompt: str = "" + path_session: str = "" + input_prefix: str = " " + input_suffix: str = "" + antiprompt: List[str] = field(default_factory=list) + + lora_adapter: str = "" + lora_base: str = "" + + memory_f16: bool = True + random_prompt: bool = False + use_color: bool = False + interactive: bool = False + + embedding: bool = False + interactive_start: bool = False + + instruct: bool = False + penalize_nl: bool = True + perplexity: bool = False + use_mmap: bool = True + use_mlock: bool = False + mem_test: bool = False + verbose_prompt: bool = False + + file: str = None + + # If chat ended prematurely, append this to the conversation to fix it. + # Set to "\nUser:" etc. + # This is an alternative to input_prefix which always adds it, so it potentially duplicates "User:"" + fix_prefix: str = "" + input_echo: bool = True, + + # Default instructions for Alpaca + # switch to "Human" and "Assistant" for Vicuna. + # TODO: TBD how they are gonna handle this upstream + instruct_inp_prefix: str="\n\n### Instruction:\n\n" + instruct_inp_suffix: str="\n\n### Response:\n\n" + + +def gpt_params_parse(argv = None): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-s", "--seed", type=int, default=-1, help="RNG seed (use random seed for <= 0)",dest="seed") + parser.add_argument("-t", "--threads", type=int, default=min(4, os.cpu_count() or 1), help="number of threads to use during computation",dest="n_threads") + parser.add_argument("-n", "--n_predict", type=int, default=128, help="number of tokens to predict (-1 = infinity)",dest="n_predict") + parser.add_argument("--n_parts", type=int, default=-1, help="number of model parts", dest="n_parts") + parser.add_argument("-c", "--ctx_size", type=int, default=512, help="size of the prompt context",dest="n_ctx") + parser.add_argument("-b", "--batch_size", type=int, default=8, help="batch size for prompt processing",dest="n_batch") + parser.add_argument("--keep", type=int, default=0, help="number of tokens to keep from the initial prompt",dest="n_keep") + + parser.add_argument( + "-l", + "--logit-bias", + type=str, + action='append', + help="--logit-bias TOKEN_ID(+/-)BIAS", + dest="logit_bias_str" + ) + parser.add_argument("--ignore-eos", action="store_true", help="ignore end of stream token and continue generating", dest="ignore_eos") + parser.add_argument("--top_k", type=int, default=40, help="top-k sampling",dest="top_k") + parser.add_argument("--top_p", type=float, default=0.95, help="top-p samplin",dest="top_p") + parser.add_argument("--tfs", type=float, default=1.0, help="tail free sampling, parameter z (1.0 = disabled)",dest="tfs_z") + parser.add_argument("--temp", type=float, default=0.80, help="temperature",dest="temp") + parser.add_argument("--repeat_penalty", type=float, default=1.10, help="penalize repeat sequence of tokens",dest="repeat_penalty") + parser.add_argument("--repeat_last_n", type=int, default=64, help="last n tokens to consider for penalize ",dest="repeat_last_n") + parser.add_argument("--frequency_penalty", type=float, default=0.0, help="repeat alpha frequency penalty (0.0 = disabled)",dest="tfs_z") + parser.add_argument("--presence_penalty", type=float, default=0.0, help="repeat alpha presence penalty (0.0 = disabled)",dest="presence_penalty") + parser.add_argument("--mirostat", type=float, default=1.0, help="use Mirostat sampling.",dest="mirostat") + parser.add_argument("--mirostat_ent", type=float, default=5.0, help="Mirostat target entropy, parameter tau represents the average surprise value",dest="mirostat_tau") + parser.add_argument("--mirostat_lr", type=float, default=0.1, help="Mirostat learning rate, parameter eta",dest="mirostat_eta") + + parser.add_argument("-m", "--model", type=str, default="./models/llama-7B/ggml-model.bin", help="model path",dest="model") + parser.add_argument("-p", "--prompt", type=str, default=None, help="initial prompt",dest="prompt") + parser.add_argument("-f", "--file", type=str, default=None, help="file containing initial prompt to load",dest="file") + parser.add_argument("--session", type=str, default=None, help="file to cache model state in (may be large!)",dest="path_session") + parser.add_argument("--in-prefix", type=str, default="", help="string to prefix user inputs with", dest="input_prefix") + parser.add_argument("--in-suffix", type=str, default="", help="append to input", dest="input_suffix") + parser.add_argument( + "-r", + "--reverse-prompt", + type=str, + action='append', + help="poll user input upon seeing PROMPT (can be\nspecified more than once for multiple prompts).", + dest="antiprompt" + ) + + parser.add_argument("--lora", type=str, default="", help="apply LoRA adapter (implies --no-mmap)", dest="lora_adapter") + parser.add_argument("--lora-base", type=str, default="", help="optional model to use as a base for the layers modified by the LoRA adapter", dest="lora_base") + + parser.add_argument("--memory_f32", action="store_false", help="use f32 instead of f16 for memory key+value",dest="memory_f16") + parser.add_argument("--random-prompt", action="store_true", help="start with a randomized prompt.", dest="random_prompt") + parser.add_argument( + "--color", + action="store_true", + help="colorise output to distinguish prompt and user input from generations", + dest="use_color" + ) + parser.add_argument( + "-i", "--interactive", action="store_true", help="run in interactive mode", dest="interactive" + ) + + parser.add_argument("--embedding", action="store_true", help="", dest="embedding") + parser.add_argument( + "--interactive-first", + action="store_true", + help="run in interactive mode and wait for input right away", + dest="interactive_start" + ) + + parser.add_argument( + "-ins", + "--instruct", + action="store_true", + help="run in instruction mode (use with Alpaca or Vicuna models)", + dest="instruct" + ) + parser.add_argument("--no-penalize-nl", action="store_false", help="do not penalize newline token", dest="penalize_nl") + parser.add_argument("--perplexity", action="store_true", help="compute perplexity over the prompt", dest="perplexity") + parser.add_argument("--no-mmap", action="store_false",help="do not memory-map model (slower load but may reduce pageouts if not using mlock)",dest="use_mmap") + parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock") + parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test") + parser.add_argument("--verbose-prompt", action="store_true",help="print prompt before generation",dest="verbose_prompt") + + #Custom args + parser.add_argument("--fix-prefix", type=str, default="", help="append to input when generated n_predict tokens", dest="fix_prefix") + parser.add_argument("--input-noecho", action="store_false", help="dont output the input", dest="input_echo") + + parser.add_argument( + "--interactive-start", + action="store_true", + help="run in interactive mode", + dest="interactive" + ) + + args = parser.parse_args(argv) + + logit_bias_str = args.logit_bias_str + delattr(args, "logit_bias_str") + params = GptParams(**vars(args)) + + if (params.lora_adapter): + params.use_mmap = False + + if (logit_bias_str != None): + for i in logit_bias_str: + if (m := re.match(r"(\d+)([-+]\d+)", i)): + params.logit_bias[int(m.group(1))] = float(m.group(2)) + + return params + +def gpt_random_prompt(rng): + return [ + "So", + "Once upon a time", + "When", + "The", + "After", + "If", + "import", + "He", + "She", + "They", + ][rng % 10] + +if __name__ == "__main__": + print(gpt_params_parse()) diff --git a/llama-cpp-python/examples/low_level_api/low_level_api_chat_cpp.py b/llama-cpp-python/examples/low_level_api/low_level_api_chat_cpp.py new file mode 100644 index 0000000000000000000000000000000000000000..02c09afb0bf5559d3fe64ce67f4ff82af32ff50f --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/low_level_api_chat_cpp.py @@ -0,0 +1,604 @@ +""" +This is an example implementation of main.cpp from llama.cpp +Quirks: + * Its not exactly alike since this port is designed around programmatic I/O + * Input is always echoed if on, so it should be turned off when using "input()" + * The first antiprompt should be the userprompt like "\nUser:", + because its added when n_predict is reached (aka generation ended prematurely) + * n_predict can be set to -1 for unlimited length responses (or just a really high value) + * Instruction mode adds its own antiprompt. + You should also still be feeding the model with a "primer" prompt that + shows it the expected format. +""" +import ctypes +import sys +from time import time +from os import cpu_count, path + +import llama_cpp +from common import GptParams, gpt_params_parse, gpt_random_prompt +import util + +# A LLaMA interactive session +class LLaMAInteract: + def __init__(self, params: GptParams) -> None: + # input args + self.params = params + if self.params.path_session is None: + self.params.path_session = "" + if self.params.antiprompt is None: + self.params.antiprompt = "" + + if (self.params.perplexity): + raise NotImplementedError("""************ +please use the 'perplexity' tool for perplexity calculations +************""") + + if (self.params.embedding): + raise NotImplementedError("""************ +please use the 'embedding' tool for embedding calculations +************""") + + if (self.params.n_ctx > 2048): + print(f"""warning: model does not support \ +context sizes greater than 2048 tokens ({self.params.n_ctx} \ +specified) expect poor results""", file=sys.stderr) + + if (self.params.seed <= 0): + self.params.seed = int(time()) + + print(f"seed = {self.params.seed}", file=sys.stderr) + + if (self.params.random_prompt): + self.params.prompt = gpt_random_prompt(self.params.seed) + + # runtime args + self.input_consumed = 0 + self.n_past = 0 + self.n_session_consumed = 0 + self.first_antiprompt = [] + self.remaining_tokens = self.params.n_predict + self.output_echo = self.params.input_echo + self.multibyte_fix = [] + + # model load + self.lparams = llama_cpp.llama_model_default_params() + self.lparams.n_ctx = self.params.n_ctx + self.lparams.n_parts = self.params.n_parts + self.lparams.seed = self.params.seed + self.lparams.memory_f16 = self.params.memory_f16 + self.lparams.use_mlock = self.params.use_mlock + self.lparams.use_mmap = self.params.use_mmap + + self.model = llama_cpp.llama_load_model_from_file( + self.params.model.encode("utf8"), self.lparams) + + # Context Params. + self.cparams = llama_cpp.llama_context_default_params() + + self.ctx = llama_cpp.llama_new_context_with_model(self.model, self.cparams) + if (not self.ctx): + raise RuntimeError(f"error: failed to load model '{self.params.model}'") + + if (self.params.ignore_eos): + self.params.logit_bias[llama_cpp.llama_token_eos()] = -float("inf") + + if (len(self.params.lora_adapter) > 0): + if (llama_cpp.llama_apply_lora_from_file( + self.ctx, + self.params.lora_adapter.encode("utf8"), + self.params.lora_base.encode("utf8") if len(self.params.lora_base) > 0 else None, + self.params.n_threads + ) != 0): + print("error: failed to apply lora adapter") + return + + print(file=sys.stderr) + print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \ +| {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr) + + # determine the required inference memory per token: + if (self.params.mem_test): + tmp = [0, 1, 2, 3] + llama_cpp.llama_eval(self.ctx, (llama_cpp.c_int * len(tmp))(*tmp), len(tmp), 0, self.n_threads) + llama_cpp.llama_print_timings(self.ctx) + self.exit() + return + + # create internal context + self.n_ctx = llama_cpp.llama_n_ctx(self.ctx) + + # Add a space in front of the first character to match OG llama tokenizer behavior + self.params.prompt = " " + self.params.prompt + + # Load prompt file + if (self.params.file): + with open(self.params.file) as f: + self.params.prompt = f.read() + + self.session_tokens: list[llama_cpp.llama_token] = [] + if (len(self.params.path_session) > 0): + print(f"attempting to load saved session from '{self.params.path_session}'", file=sys.stderr) + + if (path.exists(self.params.path_session)): + _session_tokens = (llama_cpp.llama_token * (self.params.n_ctx))() + _n_token_count_out = llama_cpp.c_size_t() + if (llama_cpp.llama_load_session_file( + self.ctx, + self.params.path_session.encode("utf8"), + _session_tokens, + self.params.n_ctx, + ctypes.byref(_n_token_count_out) + ) != 1): + print(f"error: failed to load session file '{self.params.path_session}'", file=sys.stderr) + return + _n_token_count_out = _n_token_count_out.value + self.session_tokens = _session_tokens[:_n_token_count_out] + print(f"loaded a session with prompt size of {_n_token_count_out} tokens", file=sys.stderr) + else: + print(f"session file does not exist, will create", file=sys.stderr) + + # tokenize the prompt + self.embd = [] + self.embd_inp = self._tokenize(self.params.prompt) + + if (len(self.embd_inp) > self.n_ctx - 4): + raise RuntimeError(f"error: prompt is too long ({len(self.embd_inp)} tokens, max {self.params.n_ctx - 4})") + + # debug message about similarity of saved session, if applicable + self.n_matching_session_tokens = 0 + if len(self.session_tokens) > 0: + for id in self.session_tokens: + if self.n_matching_session_tokens >= len(self.embd_inp) or id != self.embd_inp[self.n_matching_session_tokens]: + break + self.n_matching_session_tokens += 1 + + if self.n_matching_session_tokens >= len(self.embd_inp): + print(f"session file has exact match for prompt!") + elif self.n_matching_session_tokens < (len(self.embd_inp) / 2): + print(f"warning: session file has low similarity to prompt ({self.n_matching_session_tokens} / {len(self.embd_inp)} tokens); will mostly be reevaluated") + else: + print(f"session file matches {self.n_matching_session_tokens} / {len(self.embd_inp)} tokens of prompt") + + self.need_to_save_session = len(self.params.path_session) > 0 and self.n_matching_session_tokens < (len(self.embd_inp) * 3 / 4) + + # number of tokens to keep when resetting context + if (self.params.n_keep < 0 or self.params.n_keep > len(self.embd_inp) or self.params.instruct): + self.params.n_keep = len(self.embd_inp) + + self.inp_prefix = self._tokenize(self.params.instruct_inp_prefix) + self.inp_suffix = self._tokenize(self.params.instruct_inp_suffix, False) + + # in instruct mode, we inject a prefix and a suffix to each input by the user + self.antiecho = None + if (self.params.instruct): + self.params.interactive_start = True + _ptn = self._tokenize(self.params.instruct_inp_prefix.strip(), False) + self.first_antiprompt.append(_ptn) + self.antiecho = util.IterSearch(_ptn) + + # enable interactive mode if reverse prompt or interactive start is specified + if (len(self.params.antiprompt) != 0 or self.params.interactive_start): + self.params.interactive = True + + # determine newline token + self.llama_token_newline = self._tokenize("\n", False) + self.llama_token_eot = self._tokenize(" [end of text]\n", False) + + if (self.params.verbose_prompt): + print(f""" +prompt: '{self.params.prompt}' +number of tokens in prompt = {len(self.embd_inp)}""", file=sys.stderr) + + for i in range(len(self.embd_inp)): + print(f"{self.embd_inp[i]} -> '{self.token_to_str(self.embd_inp[i])}'", file=sys.stderr) + + if (self.params.n_keep > 0): + print("static prompt based on n_keep: '") + for i in range(self.params.n_keep): + print(self.token_to_str(self.embd_inp[i]), file=sys.stderr) + print("'", file=sys.stderr) + print(file=sys.stderr) + + if (self.params.interactive): + print("interactive mode on.", file=sys.stderr) + + if (len(self.params.antiprompt) > 0): + for antiprompt in self.params.antiprompt: + print(f"Reverse prompt: '{antiprompt}'", file=sys.stderr) + + if len(self.params.input_prefix) > 0: + print(f"Input prefix: '{self.params.input_prefix}'", file=sys.stderr) + + print(f"""sampling: repeat_last_n = {self.params.repeat_last_n},\ +repeat_penalty = {self.params.repeat_penalty},\ +presence_penalty = {self.params.presence_penalty},\ +frequency_penalty = {self.params.frequency_penalty},\ +top_k = {self.params.top_k},\ +tfs_z = {self.params.tfs_z},\ +top_p = {self.params.top_p},\ +typical_p = {self.params.typical_p},\ +temp = {self.params.temp},\ +mirostat = {self.params.mirostat},\ +mirostat_lr = {self.params.mirostat_eta},\ +mirostat_ent = {self.params.mirostat_tau},\ + +generate: n_ctx = {self.n_ctx},\ +n_batch = {self.params.n_batch},\ +n_predict = {self.params.n_predict},\ +n_keep = {self.params.n_keep} + +""", file=sys.stderr) + + # determine antiprompt tokens + for i in self.params.antiprompt: + self.first_antiprompt.append(self._tokenize(i, False)) + + self.last_n_tokens = [0]*self.n_ctx #TODO: deque doesnt support slices + + if (params.interactive): + print("""== Running in interactive mode. == + - Press Ctrl+C to interject at any time. + - Press Return to return control to LLaMa. + - If you want to submit another line, end your input in '\\'. + +""", file=sys.stderr) + self.set_color(util.CONSOLE_COLOR_PROMPT) + + # tokenize a prompt + def _tokenize(self, prompt, bos=True): + _arr = (llama_cpp.llama_token * ((len(prompt) + 1) * 4))() + _n = llama_cpp.llama_tokenize(self.model, prompt.encode("utf8", errors="ignore"), len(prompt), _arr, len(_arr), bos, False) + return _arr[:_n] + + def set_color(self, c): + if (self.params.use_color): + print(c, end="") + + def use_antiprompt(self): + return len(self.first_antiprompt) > 0 + + # generate tokens + def generate(self): + while self.remaining_tokens > 0 or self.params.interactive or self.params.n_predict == -1: + # predict + if len(self.embd) > 0: + # infinite text generation via context swapping + # if we run out of context: + # - take the n_keep first tokens from the original prompt (via n_past) + # - take half of the last (n_ctx - n_keep) tokens and recompute the logits in a batch + if (self.n_past + len(self.embd) > self.n_ctx): + n_left = self.n_past - self.params.n_keep + self.n_past = self.params.n_keep + + # insert n_left/2 tokens at the start of embd from last_n_tokens + _insert = self.last_n_tokens[ + self.n_ctx - int(n_left/2) - len(self.embd):-len(self.embd) + ] + self.embd = _insert + self.embd + self.params.path_session = "" + + # try to reuse a matching prefix from the loaded session instead of re-eval (via n_past) + if self.n_session_consumed < len(self.session_tokens): + for i in range(len(self.embd)): + if self.embd[i] != self.session_tokens[self.n_session_consumed]: + self.session_tokens = self.session_tokens[:self.n_session_consumed] + break + + self.n_past += 1 + self.n_session_consumed += 1 + + if self.n_session_consumed >= len(self.session_tokens): + i += 1 + break + + if i > 0: + self.embd = self.embd[i:] + + # evaluate tokens in batches + # embd is typically prepared beforehand to fit within a batch, but not always + #TODO BUG: The batching code causes nonsensical generation + """for i in range(0, len(self.embd), self.params.n_batch): + n_eval = self.params.n_batch + _arr = (llama_cpp.llama_token * n_eval)(*self.embd[i:i + n_eval]) + if llama_cpp.llama_eval(self.ctx, _arr, n_eval, self.n_past, self.params.n_threads) != 0: + print(f"failed to eval") + return + + self.n_past += n_eval""" + + if (llama_cpp.llama_eval( + self.ctx, (llama_cpp.llama_token * len(self.embd))(*self.embd), len(self.embd), self.n_past + ) != 0): + raise Exception("Failed to llama_eval!") + + if len(self.embd) > 0 and len(self.params.path_session) > 0: + self.session_tokens.extend(self.embd) + self.n_session_consumed = len(self.session_tokens) + + self.n_past += len(self.embd) + self.embd = [] + if len(self.embd_inp) <= self.input_consumed: #&& !is_interacting + # out of user input, sample next token + top_k = llama_cpp.llama_n_vocab(self.ctx) if self.params.top_k <= 0 else self.params.top_k + repeat_last_n = self.n_ctx if self.params.repeat_last_n < 0 else self.params.repeat_last_n + + # optionally save the session on first sample (for faster prompt loading next time) + if len(self.params.path_session) > 0 and self.need_to_save_session: + self.need_to_save_session = False + llama_cpp.llama_save_session_file( + self.ctx, + self.params.path_session.encode("utf8"), + (llama_cpp.llama_token * len(self.session_tokens))(*self.session_tokens), + len(self.session_tokens) + ) + + id = 0 + + logits = llama_cpp.llama_get_logits(self.ctx) + n_vocab = llama_cpp.llama_n_vocab(self.model) + + # Apply params.logit_bias map + for key, value in self.params.logit_bias.items(): + logits[key] += value + + _arr = (llama_cpp.llama_token_data * n_vocab)(*[ + llama_cpp.llama_token_data(token_id, logits[token_id], 0.0) + for token_id in range(n_vocab) + ]) + candidates_p = llama_cpp.ctypes.pointer(llama_cpp.llama_token_data_array(_arr, len(_arr), False)) + + # Apply penalties + nl_logit = logits[llama_cpp.llama_token_nl(self.ctx)] + last_n_repeat = min(len(self.last_n_tokens), repeat_last_n, self.n_ctx) + + _arr = (llama_cpp.llama_token * last_n_repeat)(*self.last_n_tokens[len(self.last_n_tokens) - last_n_repeat:]) + llama_cpp.llama_sample_repetition_penalties( + ctx=self.ctx, + candidates=candidates_p, + last_tokens_data = _arr, + penalty_last_n = last_n_repeat, + penalty_repeat = llama_cpp.c_float(self.params.repeat_penalty), + penalty_freq = llama_cpp.c_float(self.params.frequency_penalty), + penalty_present = llama_cpp.c_float(self.params.presence_penalty), + ) + + # NOT PRESENT IN CURRENT VERSION ? + # llama_cpp.llama_sample_frequency_and_presence_penalti(self.ctx, candidates_p, + # _arr, + # last_n_repeat, llama_cpp.c_float(self.params.frequency_penalty), llama_cpp.c_float(self.params.presence_penalty)) + + if not self.params.penalize_nl: + logits[llama_cpp.llama_token_nl()] = nl_logit + + if self.params.temp <= 0: + # Greedy sampling + id = llama_cpp.llama_sample_token_greedy(self.ctx, candidates_p) + else: + if self.params.mirostat == 1: + mirostat_mu = 2.0 * self.params.mirostat_tau + mirostat_m = 100 + llama_cpp.llama_sample_temperature(self.ctx, candidates_p, llama_cpp.c_float(self.params.temp)) + id = llama_cpp.llama_sample_token_mirostat(self.ctx, candidates_p, llama_cpp.c_float(self.params.mirostat_tau), llama_cpp.c_float(self.params.mirostat_eta), llama_cpp.c_int(mirostat_m), llama_cpp.c_float(mirostat_mu)) + elif self.params.mirostat == 2: + mirostat_mu = 2.0 * self.params.mirostat_tau + llama_cpp.llama_sample_temperature(self.ctx, candidates_p, llama_cpp.c_float(self.params.temp)) + id = llama_cpp.llama_sample_token_mirostat_v2(self.ctx, candidates_p, llama_cpp.c_float(self.params.mirostat_tau), llama_cpp.c_float(self.params.mirostat_eta), llama_cpp.c_float(mirostat_mu)) + else: + # Temperature sampling + llama_cpp.llama_sample_top_k(self.ctx, candidates_p, top_k, min_keep=llama_cpp.c_size_t(1)) + llama_cpp.llama_sample_tail_free(self.ctx, candidates_p, llama_cpp.c_float(self.params.tfs_z), min_keep=llama_cpp.c_size_t(1)) + llama_cpp.llama_sample_typical(self.ctx, candidates_p, llama_cpp.c_float(self.params.typical_p), min_keep=llama_cpp.c_size_t(1)) + llama_cpp.llama_sample_top_p(self.ctx, candidates_p, llama_cpp.c_float(self.params.top_p), min_keep=llama_cpp.c_size_t(1)) + llama_cpp.llama_sample_temperature(self.ctx, candidates_p, llama_cpp.c_float(self.params.temp)) + id = llama_cpp.llama_sample_token(self.ctx, candidates_p) + # print("`{}`".format(candidates_p.size)) + + self.last_n_tokens.pop(0) + self.last_n_tokens.append(id) + + # replace end of text token with newline token when in interactive mode + if (id == llama_cpp.llama_token_eos(self.ctx) and self.params.interactive and not self.params.instruct): + id = self.llama_token_newline[0] + self.embd.append(id) + if (self.use_antiprompt()): + # tokenize and inject first reverse prompt + self.embd_inp += self.first_antiprompt[0] + for id in self.first_antiprompt[0]: + self.embd.append(id) + else: + # add it to the context + self.embd.append(id) + + # echo this to console + self.output_echo = True + + # decrement remaining sampling budget + self.remaining_tokens -= 1 + else: + # output to console if input echo is on + self.output_echo = self.params.input_echo + + # some user input remains from prompt or interaction, forward it to processing + while len(self.embd_inp) > self.input_consumed: + self.embd.append(self.embd_inp[self.input_consumed]) + self.last_n_tokens.pop(0) + self.last_n_tokens.append(self.embd_inp[self.input_consumed]) + self.input_consumed += 1 + if len(self.embd) >= self.params.n_batch: + break + + # display tokens + if self.output_echo: + for id in self.embd: + if self.antiecho != None: + for r in self.antiecho(id): + yield r + else: + yield id + + # reset color to default if we there is no pending user input + if (self.params.input_echo and len(self.embd_inp) == self.input_consumed): + self.set_color(util.CONSOLE_COLOR_DEFAULT) + + if (self.params.interactive and len(self.embd_inp) <= self.input_consumed): + # if antiprompt is present, stop + if (self.use_antiprompt()): + if True in [ + i == self.last_n_tokens[-len(i):] + for i in self.first_antiprompt + ]: + break + + # if we are using instruction mode, and we have processed the initial prompt + if (self.params.interactive_start): + break + + # end of text token + if len(self.embd) > 0 and self.embd[-1] == llama_cpp.llama_token_eos(self.ctx): + if (not self.params.instruct): + for i in self.llama_token_eot: + yield i + break + + # respect n_predict even if antiprompt is present + if (self.params.interactive and self.remaining_tokens <= 0 and self.params.n_predict != -1): + # If we arent in instruction mode, fix the current generation by appending the antiprompt. + # Makes it so if chat ends prematurely you dont append the AI's text etc. + if not self.params.instruct: + self.embd_inp += self.first_antiprompt[0] + self.n_remain = self.params.n_predict + break + + self.params.interactive_start = False + + def __enter__(self): + return self + + def __exit__(self, type, value, tb): + self.exit() + + def exit(self): + llama_cpp.llama_free(self.ctx) + self.set_color(util.CONSOLE_COLOR_DEFAULT) + + def token_to_str(self, token_id: int) -> bytes: + size = 32 + buffer = (ctypes.c_char * size)() + n = llama_cpp.llama_token_to_piece( + self.model, llama_cpp.llama_token(token_id), buffer, size) + assert n <= size + return bytes(buffer[:n]) + + # return past text + def past(self): + for id in self.last_n_tokens[-self.n_past:]: + yield self.token_to_str(id).decode("utf8", errors="ignore") + + # write input + def input(self, prompt: str): + if (self.params.instruct and self.last_n_tokens[-len(self.inp_prefix):] != self.inp_prefix): + self.embd_inp += self.inp_prefix + self.embd_inp += self._tokenize(prompt) + if (self.params.instruct): + self.embd_inp += self.inp_suffix + + # write output + def output(self): + self.remaining_tokens = self.params.n_predict + for id in self.generate(): + cur_char = self.token_to_str(id) + + # Add remainder of missing bytes + if None in self.multibyte_fix: + self.multibyte_fix[self.multibyte_fix.index(None)] = cur_char + + # Return completed utf char + if len(self.multibyte_fix) > 0 and not None in self.multibyte_fix: + yield (b"".join(self.multibyte_fix)).decode("utf8") + self.multibyte_fix = [] + continue + + # Contains multi-byte UTF8 + for num, pattern in [(2, 192), (3, 224), (4, 240)]: + # Bitwise AND check + if pattern & int.from_bytes(cur_char, 'little') == pattern: + self.multibyte_fix = [cur_char] + ([None] * (num-1)) + + # Stop incomplete bytes from passing + if len(self.multibyte_fix) > 0: + continue + + yield cur_char.decode("utf8") + + # read user input + def read_input(self): + out = "" + while (t := input()).endswith("\\"): + out += t[:-1] + "\n" + return out + t + "\n" + + # interactive mode + def interact(self): + for i in self.output(): + print(i,end="",flush=True) + self.params.input_echo = False + + # Using string instead of tokens to check for antiprompt, + # It is more reliable than tokens for interactive mode. + generated_str = "" + while self.params.interactive: + self.set_color(util.CONSOLE_COLOR_USER_INPUT) + if (self.params.instruct): + print('\n> ', end="") + self.input(self.read_input()) + else: + print(self.params.input_prefix, end="") + self.input(f"{self.params.input_prefix}{self.read_input()}{self.params.input_suffix}") + print(self.params.input_suffix,end="") + self.set_color(util.CONSOLE_COLOR_DEFAULT) + + try: + for i in self.output(): + print(i,end="",flush=True) + generated_str += i + for ap in self.params.antiprompt: + if generated_str.endswith(ap): + raise KeyboardInterrupt + except KeyboardInterrupt: + self.set_color(util.CONSOLE_COLOR_DEFAULT) + if not self.params.instruct: + print(self.params.fix_prefix,end="") + self.input(self.params.fix_prefix) + +if __name__ == "__main__": + from datetime import datetime + + USER_NAME="User" + AI_NAME="ChatLLaMa" + + time_now = datetime.now() + prompt = f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}. +{AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer {USER_NAME}’s requests immediately and with details and precision. +Transcript below contains only the recorded dialog between two, without any annotations like (30 seconds passed...) or (to himself), just what {USER_NAME} and {AI_NAME} say aloud to each other. +The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long. +The transcript only includes text, it does not include markup like HTML and Markdown. + +{USER_NAME}: Hello, {AI_NAME}! +{AI_NAME}: Hello {USER_NAME}! How may I help you today? +{USER_NAME}: What time is it? +{AI_NAME}: It is {time_now.strftime("%H:%M")}. +{USER_NAME}: What year is it? +{AI_NAME}: We are in {time_now.strftime("%Y")}. +{USER_NAME}: What is a cat? +{AI_NAME}: A cat is a domestic species of small carnivorous mammal. It is the only domesticated species in the family Felidae. +{USER_NAME}: Name a color. +{AI_NAME}: Blue +{USER_NAME}: """ + + params = gpt_params_parse() + if params.prompt is None and params.file is None: + params.prompt = prompt + + with LLaMAInteract(params) as m: + m.interact() diff --git a/llama-cpp-python/examples/low_level_api/low_level_api_llama_cpp.py b/llama-cpp-python/examples/low_level_api/low_level_api_llama_cpp.py new file mode 100644 index 0000000000000000000000000000000000000000..ef1b2c0162e8edd321e2b9c1ce375d96f1f1d048 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/low_level_api_llama_cpp.py @@ -0,0 +1,129 @@ +import ctypes +import os +import multiprocessing + +import llama_cpp + +llama_cpp.llama_backend_init(numa=False) + +N_THREADS = multiprocessing.cpu_count() +MODEL_PATH = os.environ.get('MODEL', "../models/7B/ggml-model.bin") + +prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n" + +lparams = llama_cpp.llama_model_default_params() +cparams = llama_cpp.llama_context_default_params() +model = llama_cpp.llama_load_model_from_file(MODEL_PATH.encode('utf-8'), lparams) +ctx = llama_cpp.llama_new_context_with_model(model, cparams) + +# determine the required inference memory per token: +tmp = [0, 1, 2, 3] +llama_cpp.llama_eval( + ctx = ctx, + tokens=(llama_cpp.c_int * len(tmp))(*tmp), + n_tokens=len(tmp), + n_past=0 + )# Deprecated + +n_past = 0 + +prompt = b" " + prompt + +embd_inp = (llama_cpp.llama_token * (len(prompt) + 1))() +n_of_tok = llama_cpp.llama_tokenize( + model=model, + text=bytes(str(prompt),'utf-8'), + text_len=len(embd_inp), + tokens=embd_inp, + n_max_tokens=len(embd_inp), + add_bos=False, + special=False +) +embd_inp = embd_inp[:n_of_tok] + +n_ctx = llama_cpp.llama_n_ctx(ctx) + +n_predict = 20 +n_predict = min(n_predict, n_ctx - len(embd_inp)) + +input_consumed = 0 +input_noecho = False + +remaining_tokens = n_predict + +embd = [] +last_n_size = 64 +last_n_tokens_data = [0] * last_n_size +n_batch = 24 +last_n_repeat = 64 +repeat_penalty = 1 +frequency_penalty = 0.0 +presence_penalty = 0.0 + +while remaining_tokens > 0: + if len(embd) > 0: + llama_cpp.llama_eval( + ctx = ctx, + tokens=(llama_cpp.c_int * len(embd))(*embd), + n_tokens=len(embd), + n_past=n_past + )# Deprecated + + n_past += len(embd) + embd = [] + if len(embd_inp) <= input_consumed: + logits = llama_cpp.llama_get_logits(ctx) + n_vocab = llama_cpp.llama_n_vocab(model) + + _arr = (llama_cpp.llama_token_data * n_vocab)(*[ + llama_cpp.llama_token_data(token_id, logits[token_id], 0.0) + for token_id in range(n_vocab) + ]) + candidates_p = llama_cpp.ctypes.pointer( + llama_cpp.llama_token_data_array(_arr, len(_arr), False)) + + _arr = (llama_cpp.c_int * len(last_n_tokens_data))(*last_n_tokens_data) + llama_cpp.llama_sample_repetition_penalties(ctx, candidates_p, + _arr, + penalty_last_n=last_n_repeat, + penalty_repeat=repeat_penalty, + penalty_freq=frequency_penalty, + penalty_present=presence_penalty) + + llama_cpp.llama_sample_top_k(ctx, candidates_p, k=40, min_keep=1) + llama_cpp.llama_sample_top_p(ctx, candidates_p, p=0.8, min_keep=1) + llama_cpp.llama_sample_temperature(ctx, candidates_p, temp=0.2) + id = llama_cpp.llama_sample_token(ctx, candidates_p) + + last_n_tokens_data = last_n_tokens_data[1:] + [id] + embd.append(id) + input_noecho = False + remaining_tokens -= 1 + else: + while len(embd_inp) > input_consumed: + embd.append(embd_inp[input_consumed]) + last_n_tokens_data = last_n_tokens_data[1:] + [embd_inp[input_consumed]] + input_consumed += 1 + if len(embd) >= n_batch: + break + if not input_noecho: + for id in embd: + size = 32 + buffer = (ctypes.c_char * size)() + n = llama_cpp.llama_token_to_piece( + model, llama_cpp.llama_token(id), buffer, size) + assert n <= size + print( + buffer[:n].decode('utf-8'), + end="", + flush=True, + ) + + if len(embd) > 0 and embd[-1] == llama_cpp.llama_token_eos(ctx): + break + +print() + +llama_cpp.llama_print_timings(ctx) + +llama_cpp.llama_free(ctx) diff --git a/llama-cpp-python/examples/low_level_api/quantize.py b/llama-cpp-python/examples/low_level_api/quantize.py new file mode 100644 index 0000000000000000000000000000000000000000..cdb038a7121d560d79183aea4de4890451654964 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/quantize.py @@ -0,0 +1,28 @@ +import os +import argparse +import llama_cpp + + +def main(args): + fname_inp = args.fname_inp.encode("utf-8") + fname_out = args.fname_out.encode("utf-8") + if not os.path.exists(fname_inp): + raise RuntimeError(f"Input file does not exist ({fname_inp})") + if os.path.exists(fname_out): + raise RuntimeError(f"Output file already exists ({fname_out})") + ftype = args.type + args = llama_cpp.llama_model_quantize_default_params() + args.ftype = ftype + return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args) + if return_code != 0: + raise RuntimeError("Failed to quantize model") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("fname_inp", type=str, help="Path to input model") + parser.add_argument("fname_out", type=str, help="Path to output model") + parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum") + args = parser.parse_args() + main(args) + diff --git a/llama-cpp-python/examples/low_level_api/readme/low_level_api_llama_cpp.md b/llama-cpp-python/examples/low_level_api/readme/low_level_api_llama_cpp.md new file mode 100644 index 0000000000000000000000000000000000000000..5f350ffe99abe7297b2839f36fd0eaeb98887805 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/readme/low_level_api_llama_cpp.md @@ -0,0 +1,61 @@ +# Low-Level API for Llama_cpp + +## Overview +This Python script, low_level_api_llama_cpp.py, demonstrates the implementation of a low-level API for interacting with the llama_cpp library. The script defines an inference that generates embeddings based on a given prompt using .gguf model. + +### Prerequisites +Before running the script, ensure that you have the following dependencies installed: + +. Python 3.6 or higher +. llama_cpp: A C++ library for working with .gguf model +. NumPy: A fundamental package for scientific computing with Python +. multiprocessing: A Python module for parallel computing + +### Usage +install depedencies: +```bash +python -m pip install llama-cpp-python ctypes os multiprocessing +``` +Run the script: +```bash +python low_level_api_llama_cpp.py +``` + +## Code Structure +The script is organized as follows: + +### . Initialization: + Load the model from the specified path. + Create a context for model evaluation. + +### . Tokenization: + Tokenize the input prompt using the llama_tokenize function. + Prepare the input tokens for model evaluation. + +### . Inference: + Perform model evaluation to generate responses. + Sample from the model's output using various strategies (top-k, top-p, temperature). + +### . Output: + Print the generated tokens and the corresponding decoded text. + +### .Cleanup: + Free resources and print timing information. + +## Configuration +Customize the inference behavior by adjusting the following variables: + +#### . N_THREADS: Number of CPU threads to use for model evaluation. +#### . MODEL_PATH: Path to the model file. +#### . prompt: Input prompt for the chatbot. + +## Notes +. Ensure that the llama_cpp library is built and available in the system. Follow the instructions in the llama_cpp repository for building and installing the library. + +. This script is designed to work with the .gguf model and may require modifications for compatibility with other models. + +## Acknowledgments +This code is based on the llama_cpp library developed by the community. Special thanks to the contributors for their efforts. + +## License +This project is licensed under the MIT License - see the LICENSE file for details. \ No newline at end of file diff --git a/llama-cpp-python/examples/low_level_api/util.py b/llama-cpp-python/examples/low_level_api/util.py new file mode 100644 index 0000000000000000000000000000000000000000..9d0ec2f705618e591cfa8d6512cb9a96b3da75f1 --- /dev/null +++ b/llama-cpp-python/examples/low_level_api/util.py @@ -0,0 +1,95 @@ + +ANSI_COLOR_RESET = "\x1b[0m" +ANSI_COLOR_YELLOW = "\x1b[33m" +ANSI_BOLD = "\x1b[1m" +ANSI_COLOR_GREEN = "\x1b[32m" + +CONSOLE_COLOR_DEFAULT = ANSI_COLOR_RESET +CONSOLE_COLOR_PROMPT = ANSI_COLOR_YELLOW +CONSOLE_COLOR_USER_INPUT = ANSI_BOLD + ANSI_COLOR_GREEN + +# Iterative search +# Actively searches and prevents a pattern from being returned +class IterSearch: + def __init__(self, pattern): + self.pattern = list(pattern) + self.buffer = [] + + def __call__(self, char): + self.buffer += [char] + + if (self.pattern[:len(self.buffer)] == self.buffer): + if (len(self.buffer) >= len(self.pattern)): + self.buffer.clear() + return [] + + _tmp = self.buffer[:] + self.buffer.clear() + return _tmp + +class Circle: + def __init__(self, size, default=0): + self.list = [default] * size + self.maxsize = size + self.size = 0 + self.offset = 0 + + def append(self, elem): + if self.size < self.maxsize: + self.list[self.size] = elem + self.size += 1 + else: + self.list[self.offset] = elem + self.offset = (self.offset + 1) % self.maxsize + + def __getitem__(self, val): + if isinstance(val, int): + if 0 > val or val >= self.size: + raise IndexError('Index out of range') + return self.list[val] if self.size < self.maxsize else self.list[(self.offset + val) % self.maxsize] + elif isinstance(val, slice): + start, stop, step = val.start, val.stop, val.step + if step is None: + step = 1 + if start is None: + start = 0 + if stop is None: + stop = self.size + if start < 0: + start = self.size + start + if stop < 0: + stop = self.size + stop + + indices = range(start, stop, step) + return [self.list[(self.offset + i) % self.maxsize] for i in indices if i < self.size] + else: + raise TypeError('Invalid argument type') + + + + +if __name__ == "__main__": + c = Circle(5) + + c.append(1) + print(c.list) + print(c[:]) + assert c[0] == 1 + assert c[:5] == [1] + + for i in range(2,5+1): + c.append(i) + print(c.list) + print(c[:]) + assert c[0] == 1 + assert c[:5] == [1,2,3,4,5] + + for i in range(5+1,9+1): + c.append(i) + print(c.list) + print(c[:]) + assert c[0] == 5 + assert c[:5] == [5,6,7,8,9] + #assert c[:-5] == [5,6,7,8,9] + assert c[:10] == [5,6,7,8,9] + diff --git a/llama-cpp-python/examples/notebooks/Batching.ipynb b/llama-cpp-python/examples/notebooks/Batching.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..687316b327ca038d26a338b3070a7d4698322515 --- /dev/null +++ b/llama-cpp-python/examples/notebooks/Batching.ipynb @@ -0,0 +1,723 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import llama_cpp" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ggml_init_cublas: GGML_CUDA_FORCE_MMQ: no\n", + "ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n", + "ggml_init_cublas: found 1 CUDA devices:\n", + " Device 0: NVIDIA GeForce RTX 2060, compute capability 7.5\n" + ] + } + ], + "source": [ + "llama_cpp.llama_backend_init(numa=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_loader: loaded meta data with 16 key-value pairs and 291 tensors from ../../models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf (version GGUF V2)\n", + "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 4096, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 1: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 2: output.weight q6_K [ 4096, 32000, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 289: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str \n", + "llama_model_loader: - kv 1: general.name str \n", + "llama_model_loader: - kv 2: llama.context_length u32 \n", + "llama_model_loader: - kv 3: llama.embedding_length u32 \n", + "llama_model_loader: - kv 4: llama.block_count u32 \n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 \n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 \n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 \n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 \n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 \n", + "llama_model_loader: - kv 10: general.file_type u32 \n", + "llama_model_loader: - kv 11: tokenizer.ggml.model str \n", + "llama_model_loader: - kv 12: tokenizer.ggml.tokens arr \n", + "llama_model_loader: - kv 13: tokenizer.ggml.scores arr \n", + "llama_model_loader: - kv 14: tokenizer.ggml.token_type arr \n", + "llama_model_loader: - kv 15: general.quantization_version u32 \n", + "llama_model_loader: - type f32: 65 tensors\n", + "llama_model_loader: - type q4_K: 193 tensors\n", + "llama_model_loader: - type q6_K: 33 tensors\n", + "llm_load_vocab: special tokens definition check successful ( 259/32000 ).\n", + "llm_load_print_meta: format = GGUF V2\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32000\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 4096\n", + "llm_load_print_meta: n_embd = 4096\n", + "llm_load_print_meta: n_head = 32\n", + "llm_load_print_meta: n_head_kv = 8\n", + "llm_load_print_meta: n_layer = 32\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 4\n", + "llm_load_print_meta: f_norm_eps = 0.0e+00\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", + "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n", + "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n", + "llm_load_print_meta: n_ff = 14336\n", + "llm_load_print_meta: freq_base_train = 10000.0\n", + "llm_load_print_meta: freq_scale_train = 1\n", + "llm_load_print_meta: model type = 7B\n", + "llm_load_print_meta: model ftype = mostly Q4_K - Medium\n", + "llm_load_print_meta: model params = 7.24 B\n", + "llm_load_print_meta: model size = 4.07 GiB (4.83 BPW) \n", + "llm_load_print_meta: general.name = LLaMA v2\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 2 ''\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.10 MB\n", + "llm_load_tensors: using CUDA for GPU acceleration\n", + "llm_load_tensors: mem required = 70.41 MB\n", + "llm_load_tensors: offloading 32 repeating layers to GPU\n", + "llm_load_tensors: offloading non-repeating layers to GPU\n", + "llm_load_tensors: offloaded 35/35 layers to GPU\n", + "llm_load_tensors: VRAM used: 4095.05 MB\n", + ".................................................................................................\n" + ] + } + ], + "source": [ + "params = llama_cpp.llama_model_default_params()\n", + "params.n_gpu_layers = 35\n", + "model = llama_cpp.llama_load_model_from_file(b\"../../models/mistral-7b-v0.1-GGUF/ggml-model-Q4_K.gguf\", params=params) # Update this to whatever" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 1014, 2936, 9060, 285, 1142]\n", + "58\n" + ] + } + ], + "source": [ + "n_ctx = 512\n", + "n_len = 32\n", + "n_parallel = 2\n", + "prompt = b\"The quick brown fox\"\n", + "\n", + "tokens = (llama_cpp.llama_token * n_ctx)()\n", + "tokens_len = llama_cpp.llama_tokenize(model, prompt, len(prompt), tokens, len(tokens), True, True)\n", + "print(tokens[:tokens_len])\n", + "\n", + "n_kv_req = tokens_len + (n_len - tokens_len) * n_parallel\n", + "print(n_kv_req)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_new_context_with_model: n_ctx = 58\n", + "llama_new_context_with_model: freq_base = 10000.0\n", + "llama_new_context_with_model: freq_scale = 1\n", + "llama_kv_cache_init: offloading v cache to GPU\n", + "llama_kv_cache_init: offloading k cache to GPU\n", + "llama_kv_cache_init: VRAM kv self = 7.25 MB\n", + "llama_new_context_with_model: kv self size = 7.25 MB\n", + "llama_build_graph: non-view tensors processed: 740/740\n", + "llama_new_context_with_model: compute buffer total size = 10.63 MB\n", + "llama_new_context_with_model: VRAM scratch buffer: 4.51 MB\n", + "llama_new_context_with_model: total VRAM used: 4106.81 MB (model: 4095.05 MB, context: 11.76 MB)\n" + ] + } + ], + "source": [ + "\n", + "ctx_params = llama_cpp.llama_context_default_params()\n", + "ctx_params.seed = 1234\n", + "ctx_params.n_ctx = n_kv_req\n", + "ctx_params.n_batch = max(n_len, n_parallel)\n", + "ctx_params.n_threads = 1\n", + "ctx_params.n_threads_batch = 1\n", + "ctx = llama_cpp.llama_new_context_with_model(model, ctx_params)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "n_ctx = llama_cpp.llama_n_ctx(ctx)\n", + "batch = llama_cpp.llama_batch_init(max(tokens_len, n_parallel), 0, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import ctypes\n", + "\n", + "batch.n_tokens = tokens_len\n", + "for i in range(tokens_len):\n", + " batch.token[i] = tokens[i]\n", + " batch.pos[i] = i\n", + " batch.seq_id[i][0] = 0\n", + " batch.n_seq_id[i] = 1\n", + " batch.logits[i] = False\n", + "\n", + "batch.logits[batch.n_tokens - 1] = True\n", + "\n", + "if llama_cpp.llama_decode(ctx, batch) != 0:\n", + " print(\"Error decoding\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(n_parallel):\n", + " llama_cpp.llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7\n", + "[' j', ' jumped']\n", + "8\n", + "[' jumps', ' jumped over']\n", + "9\n", + "[' jumps over', ' jumped over the']\n", + "10\n", + "[' jumps over the', ' jumped over the lazy']\n", + "11\n", + "[' jumps over the lazy', ' jumped over the lazy dog']\n", + "12\n", + "[' jumps over the lazy dog', ' jumped over the lazy dog.']\n", + "13\n", + "[' jumps over the lazy dog.', ' jumped over the lazy dog.\\n']\n", + "14\n", + "[' jumps over the lazy dog.\\n', ' jumped over the lazy dog.\\n\\n']\n", + "15\n", + "[' jumps over the lazy dog.\\n\\n', ' jumped over the lazy dog.\\n\\nThe']\n", + "16\n", + "[' jumps over the lazy dog.\\n\\nI', ' jumped over the lazy dog.\\n\\nThe quick']\n", + "17\n", + "[' jumps over the lazy dog.\\n\\nI’', ' jumped over the lazy dog.\\n\\nThe quick brown']\n", + "18\n", + "[' jumps over the lazy dog.\\n\\nI’m', ' jumped over the lazy dog.\\n\\nThe quick brown f']\n", + "19\n", + "[' jumps over the lazy dog.\\n\\nI’m not', ' jumped over the lazy dog.\\n\\nThe quick brown fox']\n", + "20\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped']\n", + "21\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over']\n", + "22\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the']\n", + "23\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy']\n", + "24\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog']\n", + "25\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.']\n", + "26\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n']\n", + "27\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\n']\n", + "28\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe']\n", + "29\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence in', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe quick']\n", + "30\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence in the', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe quick brown']\n", + "31\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence in the English', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe quick brown f']\n", + "32\n", + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence in the English language', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe quick brown fox']\n" + ] + } + ], + "source": [ + "import ctypes\n", + "\n", + "streams = [\"\"] * n_parallel\n", + "i_batch = [batch.n_tokens - 1] * n_parallel\n", + "\n", + "n_cur = batch.n_tokens\n", + "n_decode = 0\n", + "\n", + "while n_cur <= n_len:\n", + " batch.n_tokens = 0\n", + " for i in range(n_parallel):\n", + " if i_batch[i] < 0:\n", + " continue\n", + " \n", + " n_vocab = llama_cpp.llama_n_vocab(model)\n", + " logits = llama_cpp.llama_get_logits_ith(ctx, i_batch[i])\n", + "\n", + " candidates = (llama_cpp.llama_token_data * n_vocab)()\n", + "\n", + " for token_id in range(n_vocab):\n", + " candidates[token_id].id = token_id\n", + " candidates[token_id].logit = logits[token_id]\n", + " candidates[token_id].p = 0.0\n", + "\n", + " candidates_p = llama_cpp.llama_token_data_array(candidates, len(candidates), False)\n", + "\n", + " top_k = 40\n", + " top_p = 0.9\n", + " temp = 0.4\n", + "\n", + " llama_cpp.llama_sample_top_k(ctx, ctypes.byref(candidates_p), top_k, 1)\n", + " llama_cpp.llama_sample_top_p(ctx, ctypes.byref(candidates_p), top_p, 1)\n", + " llama_cpp.llama_sample_temp (ctx, ctypes.byref(candidates_p), temp)\n", + " \n", + " new_token_id = llama_cpp.llama_sample_token(ctx, ctypes.byref(candidates_p))\n", + "\n", + " if new_token_id == llama_cpp.llama_token_eos(ctx) or n_cur == n_len:\n", + " i_batch[i] = -1\n", + " continue\n", + "\n", + " buf = (ctypes.c_char * 32)()\n", + " outlen = llama_cpp.llama_token_to_piece(model, new_token_id, buf, len(buf))\n", + " streams[i] += bytes(buf[:outlen]).decode(\"utf-8\")\n", + "\n", + " batch.token[batch.n_tokens] = new_token_id\n", + " batch.pos[batch.n_tokens] = n_cur\n", + " batch.seq_id[batch.n_tokens][0] = i\n", + " batch.n_seq_id[batch.n_tokens] = 1\n", + " batch.logits[batch.n_tokens] = True\n", + "\n", + " i_batch[i] = batch.n_tokens\n", + " batch.n_tokens += 1\n", + " n_decode += 1\n", + " \n", + " if batch.n_tokens == 0:\n", + " break\n", + "\n", + " n_cur += 1\n", + "\n", + " if llama_cpp.llama_decode(ctx, batch) != 0:\n", + " print(\"Error decoding\", flush=True)\n", + " break\n", + " print(n_cur)\n", + " print(streams)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[' jumps over the lazy dog.\\n\\nI’m not sure if that’s the most famous sentence in the English language', ' jumped over the lazy dog.\\n\\nThe quick brown fox jumped over the lazy dog.\\n\\nThe quick brown fox']\n" + ] + } + ], + "source": [ + "print(streams)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "llama_cpp.llama_batch_free(batch)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "llama_cpp.llama_free(ctx)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "llama_cpp.llama_free_model(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "llama_cpp.llama_backend_free()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5+" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/Clients.ipynb b/llama-cpp-python/examples/notebooks/Clients.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..caebbb67fdb02a0a8897d4e4826ea046a9931f6f --- /dev/null +++ b/llama-cpp-python/examples/notebooks/Clients.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"choices\": [\n", + " {\n", + " \"finish_reason\": \"length\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"text\": \" over the lazy dog.\"\n", + " }\n", + " ],\n", + " \"created\": 1680960690,\n", + " \"id\": \"cmpl-ad3ba53d-407c-466b-bd5f-97cb8987af83\",\n", + " \"model\": \"models/ggml-alpaca.bin\",\n", + " \"object\": \"text_completion\",\n", + " \"usage\": {\n", + " \"completion_tokens\": 5,\n", + " \"prompt_tokens\": 8,\n", + " \"total_tokens\": 13\n", + " }\n", + "}" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import openai\n", + "\n", + "openai.api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n", + "openai.api_base = \"http://100.64.159.73:8000/v1\"\n", + "\n", + "openai.Completion.create(\n", + " model=\"text-davinci-003\", # currently can be anything\n", + " prompt=\"The quick brown fox jumps\",\n", + " max_tokens=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' over the lazy dog'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n", + "os.environ[\"OPENAI_API_BASE\"] = \"http://100.64.159.73:8000/v1\"\n", + "\n", + "from langchain.llms import OpenAI\n", + "\n", + "llms = OpenAI()\n", + "llms(\n", + " prompt=\"The quick brown fox jumps\",\n", + " stop=[\".\", \"\\n\"],\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/Functions.ipynb b/llama-cpp-python/examples/notebooks/Functions.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f1e5e9a1d642ec37be27d548fa7074e794270e21 --- /dev/null +++ b/llama-cpp-python/examples/notebooks/Functions.ipynb @@ -0,0 +1,485 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Functions\n", + "\n", + "The OpenAI compatbile web server in `llama-cpp-python` supports function calling.\n", + "\n", + "Function calling allows API clients to specify a schema that gives the model a format it should respond in.\n", + "Function calling in `llama-cpp-python` works by combining models pretrained for function calling such as [`functionary`](https://huggingface.co/meetkai) with constrained sampling to produce a response that is compatible with the schema.\n", + "\n", + "Note however that this improves but does not guarantee that the response will be compatible with the schema.\n", + "\n", + "## Requirements\n", + "\n", + "Before we begin you will need the following:\n", + "\n", + "- A running `llama-cpp-python` server with a function calling compatible model. [See here](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)\n", + "- The OpenAI Python Client `pip install openai`\n", + "- (Optional) The Instructor Python Library `pip install instructor`\n", + "\n", + "## Function Calling with OpenAI Python Client\n", + "\n", + "We'll start with a basic demo that only uses the OpenAI Python Client." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChatCompletion(id='chatcmpl-a2d9eb9f-7354-472f-b6ad-4d7a807729a3', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='The current weather in San Francisco is **72°F** (22°C).\\n ', role='assistant', function_call=None, tool_calls=None))], created=1699638365, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=22, prompt_tokens=136, total_tokens=158))\n" + ] + } + ], + "source": [ + "import openai\n", + "import json\n", + "\n", + "\n", + "client = openai.OpenAI(\n", + " api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n", + " base_url = \"http://100.64.159.73:8000/v1\" # NOTE: Replace with IP address and port of your llama-cpp-python server\n", + ")\n", + "\n", + "# Example dummy function hard coded to return the same weather\n", + "# In production, this could be your backend API or an external API\n", + "def get_current_weather(location, unit=\"fahrenheit\"):\n", + " \"\"\"Get the current weather in a given location\"\"\"\n", + " if \"tokyo\" in location.lower():\n", + " return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n", + " elif \"san francisco\" in location.lower():\n", + " return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n", + " elif \"paris\" in location.lower():\n", + " return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n", + " else:\n", + " return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n", + "\n", + "def run_conversation():\n", + " # Step 1: send the conversation and available functions to the model\n", + " messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n", + " tools = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\",\n", + " },\n", + " \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n", + " },\n", + " \"required\": [\"location\"],\n", + " },\n", + " },\n", + " }\n", + " ]\n", + " response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo-1106\",\n", + " messages=messages,\n", + " tools=tools,\n", + " tool_choice=\"auto\", # auto is default, but we'll be explicit\n", + " )\n", + " response_message = response.choices[0].message\n", + " tool_calls = response_message.tool_calls\n", + " # Step 2: check if the model wanted to call a function\n", + " if tool_calls:\n", + " # Step 3: call the function\n", + " # Note: the JSON response may not always be valid; be sure to handle errors\n", + " available_functions = {\n", + " \"get_current_weather\": get_current_weather,\n", + " } # only one function in this example, but you can have multiple\n", + " messages.append(response_message) # extend conversation with assistant's reply\n", + " # Step 4: send the info for each function call and function response to the model\n", + " for tool_call in tool_calls:\n", + " function_name = tool_call.function.name\n", + " function_to_call = available_functions[function_name]\n", + " function_args = json.loads(tool_call.function.arguments)\n", + " function_response = function_to_call(\n", + " location=function_args.get(\"location\"),\n", + " unit=function_args.get(\"unit\"),\n", + " )\n", + " messages.append(\n", + " {\n", + " \"tool_call_id\": tool_call.id,\n", + " \"role\": \"tool\",\n", + " \"name\": function_name,\n", + " \"content\": function_response,\n", + " }\n", + " ) # extend conversation with function response\n", + " second_response = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo-1106\",\n", + " messages=messages,\n", + " ) # get a new response from the model where it can see the function response\n", + " return second_response\n", + "print(run_conversation())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Function Calling with Instructor\n", + "\n", + "The above example is a bit verbose and requires you to manually verify the schema.\n", + "\n", + "For our next examples we'll use the `instructor` library to simplify the process and accomplish a number of different tasks with function calling.\n", + "\n", + "You'll first need to install the [`instructor`](https://github.com/jxnl/instructor/).\n", + "\n", + "You can do so by running the following command in your terminal:\n", + "\n", + "```bash\n", + "pip install instructor\n", + "```\n", + "\n", + "Below we'll go through a few basic examples taken directly from the [instructor cookbook](https://jxnl.github.io/instructor/)\n", + "\n", + "## Basic Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name='Jason' age=25\n" + ] + } + ], + "source": [ + "import instructor\n", + "from pydantic import BaseModel\n", + "\n", + "# Enables `response_model`\n", + "client = instructor.patch(client=client)\n", + "\n", + "class UserDetail(BaseModel):\n", + " name: str\n", + " age: int\n", + "\n", + "user = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " response_model=UserDetail,\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n", + " ]\n", + ")\n", + "\n", + "assert isinstance(user, UserDetail)\n", + "assert user.name == \"Jason\"\n", + "assert user.age == 25\n", + "\n", + "print(user)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Text Classification\n", + "\n", + "### Single-Label Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class_label=\n" + ] + } + ], + "source": [ + "import enum\n", + "\n", + "class Labels(str, enum.Enum):\n", + " \"\"\"Enumeration for single-label text classification.\"\"\"\n", + " SPAM = \"spam\"\n", + " NOT_SPAM = \"not_spam\"\n", + "\n", + "class SinglePrediction(BaseModel):\n", + " \"\"\"\n", + " Class for a single class label prediction.\n", + " \"\"\"\n", + " class_label: Labels\n", + "\n", + "def classify(data: str) -> SinglePrediction:\n", + " \"\"\"Perform single-label classification on the input text.\"\"\"\n", + " return client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo-0613\",\n", + " response_model=SinglePrediction,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Classify the following text: {data}\",\n", + " },\n", + " ],\n", + " ) # type: ignore\n", + "\n", + "prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n", + "assert prediction.class_label == Labels.SPAM\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi-Label Classification" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class_labels=[, ]\n" + ] + } + ], + "source": [ + "from typing import List\n", + "\n", + "# Define Enum class for multiple labels\n", + "class MultiLabels(str, enum.Enum):\n", + " TECH_ISSUE = \"tech_issue\"\n", + " BILLING = \"billing\"\n", + " GENERAL_QUERY = \"general_query\"\n", + "\n", + "# Define the multi-class prediction model\n", + "class MultiClassPrediction(BaseModel):\n", + " \"\"\"\n", + " Class for a multi-class label prediction.\n", + " \"\"\"\n", + " class_labels: List[MultiLabels]\n", + "\n", + "def multi_classify(data: str) -> MultiClassPrediction:\n", + " \"\"\"Perform multi-label classification on the input text.\"\"\"\n", + " return client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo-0613\",\n", + " response_model=MultiClassPrediction,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Classify the following support ticket: {data}\",\n", + " },\n", + " ],\n", + " ) # type: ignore\n", + "\n", + "# Test multi-label classification\n", + "ticket = \"My account is locked and I can't access my billing info.\"\n", + "prediction = multi_classify(ticket)\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Self-Critique" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "question='What is the meaning of life?' answer='According to the Devil, the meaning of life is to live a life of sin and debauchery.'\n", + "1 validation error for QuestionAnswerNoEvil\n", + "answer\n", + " Assertion failed, The statement promotes sin and debauchery, which can be considered objectionable. [type=assertion_error, input_value='According to the Devil, ... of sin and debauchery.', input_type=str]\n", + " For further information visit https://errors.pydantic.dev/2.3/v/assertion_error\n" + ] + } + ], + "source": [ + "from typing_extensions import Annotated\n", + "from pydantic import BaseModel, BeforeValidator\n", + "\n", + "from instructor import llm_validator\n", + "\n", + "\n", + "question = \"What is the meaning of life?\"\n", + "context = \"The according to the devil the meaning of live is to live a life of sin and debauchery.\"\n", + "\n", + "class QuestionAnswer(BaseModel):\n", + " question: str\n", + " answer: str\n", + "\n", + "qa: QuestionAnswer = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " response_model=QuestionAnswer,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n", + " },\n", + " ],\n", + ")\n", + "print(qa)\n", + "\n", + "class QuestionAnswerNoEvil(BaseModel):\n", + " question: str\n", + " answer: Annotated[\n", + " str,\n", + " BeforeValidator(\n", + " llm_validator(\"don't say objectionable things\", allow_override=True)\n", + " ),\n", + " ]\n", + "\n", + "try:\n", + " qa: QuestionAnswerNoEvil = client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo\",\n", + " response_model=QuestionAnswerNoEvil,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n", + " },\n", + " ],\n", + " )\n", + "except Exception as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Answering Questions with Validated Citations" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "question='What did the author do during college?' answer=[Fact(fact='The author, Jason Liu, studied Computational Mathematics and Physics in university.', substring_quote=['Computational Mathematics'])]\n" + ] + } + ], + "source": [ + "import re\n", + "from typing import List\n", + "\n", + "from pydantic import Field, BaseModel, model_validator, FieldValidationInfo\n", + "\n", + "class Fact(BaseModel):\n", + " fact: str = Field(...)\n", + " substring_quote: List[str] = Field(...)\n", + "\n", + " @model_validator(mode=\"after\")\n", + " def validate_sources(self, info: FieldValidationInfo) -> \"Fact\":\n", + " text_chunks = info.context.get(\"text_chunk\", None)\n", + " spans = list(self.get_spans(text_chunks))\n", + " self.substring_quote = [text_chunks[span[0] : span[1]] for span in spans]\n", + " return self\n", + "\n", + " def get_spans(self, context):\n", + " for quote in self.substring_quote:\n", + " yield from self._get_span(quote, context)\n", + "\n", + " def _get_span(self, quote, context):\n", + " for match in re.finditer(re.escape(quote), context):\n", + " yield match.span()\n", + "\n", + "class QuestionAnswer(BaseModel):\n", + " question: str = Field(...)\n", + " answer: List[Fact] = Field(...)\n", + "\n", + " @model_validator(mode=\"after\")\n", + " def validate_sources(self) -> \"QuestionAnswer\":\n", + " self.answer = [fact for fact in self.answer if len(fact.substring_quote) > 0]\n", + " return self\n", + "\n", + "\n", + "def ask_ai(question: str, context: str) -> QuestionAnswer:\n", + " return client.chat.completions.create(\n", + " model=\"gpt-3.5-turbo-0613\",\n", + " temperature=0.0,\n", + " response_model=QuestionAnswer,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a world class algorithm to answer questions with correct and exact citations.\"},\n", + " {\"role\": \"user\", \"content\": f\"{context}\"},\n", + " {\"role\": \"user\", \"content\": f\"Question: {question}\"}\n", + " ],\n", + " validation_context={\"text_chunk\": context},\n", + " )\n", + "\n", + "question = \"What did the author do during college?\"\n", + "context = \"\"\"\n", + "My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n", + "I went to an arts high school but in university I studied Computational Mathematics and physics.\n", + "As part of coop I worked at many companies including Stitchfix, Facebook.\n", + "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n", + "\"\"\"\n", + "\n", + "qa = ask_ai(question, context)\n", + "print(qa)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python-3.8.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5+" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/Guidance.ipynb b/llama-cpp-python/examples/notebooks/Guidance.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..045856ea2ffe697ec33db1c1c989bd45cde5bb3d --- /dev/null +++ b/llama-cpp-python/examples/notebooks/Guidance.ipynb @@ -0,0 +1,89 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Stop program
Tweak this proverb to apply to model instructions instead.\n",
+       "\n",
+       "Where there is no guidance, a people falls,\n",
+       "but in an abundance of counselors there is safety.\n",
+       "- Proverbs 11:14\n",
+       "\n",
+       "UPDATED\n",
+       "Where there is no guidance for assembling a model, people will struggle,\n",
+       "but with clear instructions, the process becomes safe and successful.\n",
+       "- GPT 2 (updated): Proverbs 11:14
\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" # can be anything\n", + "os.environ[\"OPENAI_API_BASE\"] = \"http://100.64.159.73:8000/v1\"\n", + "os.environ[\"OPENAI_API_HOST\"] = \"http://100.64.159.73:8000\"\n", + "\n", + "import guidance\n", + "\n", + "# set the default language model used to execute guidance programs\n", + "guidance.llm = guidance.llms.OpenAI(\"text-davinci-003\", caching=False)\n", + "\n", + "# define a guidance program that adapts a proverb\n", + "program = guidance(\"\"\"Tweak this proverb to apply to model instructions instead.\n", + "\n", + "{{proverb}}\n", + "- {{book}} {{chapter}}:{{verse}}\n", + "\n", + "UPDATED\n", + "Where there is no guidance{{gen 'rewrite' stop=\"\\\\n-\"}}\n", + "- GPT {{gen 'chapter'}}:{{gen 'verse'}}\"\"\")\n", + "\n", + "# execute the program on a specific proverb\n", + "executed_program = program(\n", + " proverb=\"Where there is no guidance, a people falls,\\nbut in an abundance of counselors there is safety.\",\n", + " book=\"Proverbs\",\n", + " chapter=11,\n", + " verse=14\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/Multimodal.ipynb b/llama-cpp-python/examples/notebooks/Multimodal.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..def68df0088bdb085f9ef0d777b81c118660f232 --- /dev/null +++ b/llama-cpp-python/examples/notebooks/Multimodal.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + " \n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'text': 'Llama C++'}\n" + ] + } + ], + "source": [ + "from openai import OpenAI\n", + "\n", + "client = OpenAI(base_url=\"http://localhost:8000/v1\", api_key=\"llama.cpp\")\n", + "response = client.chat.completions.create(\n", + " model=\"gpt-4-vision-preview\",\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": {\n", + " \"url\": \"https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png\",\n", + " },\n", + " },\n", + " {\"type\": \"text\", \"text\": \"What does the image say. Format your response as a json object with a single 'text' key.\"},\n", + " ],\n", + " }\n", + " ],\n", + " response_format={ \n", + " \"type\": \"json_object\",\n", + " \"schema\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"text\": {\n", + " \"type\": \"string\"\n", + " }\n", + " }\n", + " }\n", + " }\n", + ")\n", + "import json\n", + "print(json.loads(response.choices[0].message.content))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5+" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/OpenHermesFunctionCalling.ipynb b/llama-cpp-python/examples/notebooks/OpenHermesFunctionCalling.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c0de3fdc22b7949e82bb044c7cafb289c8fd2a2e --- /dev/null +++ b/llama-cpp-python/examples/notebooks/OpenHermesFunctionCalling.ipynb @@ -0,0 +1,910 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"name\": \"get_article_details\",\n", + " \"description\": \"Get article details from unstructured article text.\\ndate_published: formatted as \\\"MM/DD/YYYY\\\"\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"title\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"authors\": {\n", + " \"type\": \"list[str]\"\n", + " },\n", + " \"short_summary\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"date_published\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"tags\": {\n", + " \"type\": \"list[str]\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Article\"\n", + "}\n" + ] + } + ], + "source": [ + "import json\n", + "import inspect\n", + "from typing import get_type_hints\n", + "\n", + "class Article:\n", + " pass\n", + "\n", + "class Weather:\n", + " pass\n", + "\n", + "class Directions:\n", + " pass\n", + "\n", + "def calculate_mortgage_payment(loan_amount: int, interest_rate: float, loan_term: int) -> float:\n", + " \"\"\"Get the monthly mortgage payment given an interest rate percentage.\"\"\"\n", + " \n", + " # TODO: you must implement this to actually call it later\n", + " pass\n", + "\n", + "def get_article_details(title: str, authors: list[str], short_summary: str, date_published: str, tags: list[str]) -> Article:\n", + " '''Get article details from unstructured article text.\n", + "date_published: formatted as \"MM/DD/YYYY\"'''\n", + " \n", + " # TODO: you must implement this to actually call it later\n", + " pass\n", + "\n", + "def get_weather(zip_code: str) -> Weather:\n", + " \"\"\"Get the current weather given a zip code.\"\"\"\n", + " \n", + " # TODO: you must implement this to actually call it later\n", + " pass\n", + "\n", + "def get_directions(start: str, destination: str) -> Directions:\n", + " \"\"\"Get directions from Google Directions API.\n", + "start: start address as a string including zipcode (if any)\n", + "destination: end address as a string including zipcode (if any)\"\"\"\n", + " \n", + " # TODO: you must implement this to actually call it later\n", + " pass\n", + "\n", + "def get_type_name(t):\n", + " name = str(t)\n", + " if \"list\" in name or \"dict\" in name:\n", + " return name\n", + " else:\n", + " return t.__name__\n", + "\n", + "def serialize_function_to_json(func):\n", + " signature = inspect.signature(func)\n", + " type_hints = get_type_hints(func)\n", + "\n", + " function_info = {\n", + " \"name\": func.__name__,\n", + " \"description\": func.__doc__,\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {}\n", + " },\n", + " \"returns\": type_hints.get('return', 'void').__name__\n", + " }\n", + "\n", + " for name, _ in signature.parameters.items():\n", + " param_type = get_type_name(type_hints.get(name, type(None)))\n", + " function_info[\"parameters\"][\"properties\"][name] = {\"type\": param_type}\n", + "\n", + " return json.dumps(function_info, indent=2)\n", + "\n", + "print(serialize_function_to_json(get_article_details))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import xml.etree.ElementTree as ET\n", + "import re\n", + "\n", + "def extract_function_calls(completion):\n", + " completion = completion.strip()\n", + " pattern = r\"((.*?))\"\n", + " match = re.search(pattern, completion, re.DOTALL)\n", + " if not match:\n", + " return None\n", + " \n", + " multiplefn = match.group(1)\n", + " root = ET.fromstring(multiplefn)\n", + " functions = root.findall(\"functioncall\")\n", + " return [json.loads(fn.text) for fn in functions]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_hermes_prompt(prompt, functions):\n", + " functions = \"\\n\\n\".join([serialize_function_to_json(fn) for fn in functions])\n", + " prompt = f\"\"\"<|im_start|>system\n", + "You are a helpful assistant with access to the following functions:\n", + "\n", + "{functions}\n", + "\n", + "To use these functions respond with:\n", + "\n", + " {{\"name\": \"function_name\", \"arguments\": {{\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}}}} \n", + " {{\"name\": \"function_name\", \"arguments\": {{\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}}}} \n", + " ...\n", + "\n", + "\n", + "Edge cases you must handle:\n", + "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", + "<|im_start|>user\n", + "{prompt}<|im_end|>\n", + "<|im_start|>assistant\"\"\"\n", + " return prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<|im_start|>system\n", + "You are a helpful assistant with access to the following functions:\n", + "\n", + "{\n", + " \"name\": \"get_weather\",\n", + " \"description\": \"Get the current weather given a zip code.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"zip_code\": {\n", + " \"type\": \"str\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Weather\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"calculate_mortgage_payment\",\n", + " \"description\": \"Get the monthly mortgage payment given an interest rate percentage.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"loan_amount\": {\n", + " \"type\": \"int\"\n", + " },\n", + " \"interest_rate\": {\n", + " \"type\": \"float\"\n", + " },\n", + " \"loan_term\": {\n", + " \"type\": \"int\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"float\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"get_article_details\",\n", + " \"description\": \"Get article details from unstructured article text.\\ndate_published: formatted as \\\"MM/DD/YYYY\\\"\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"title\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"authors\": {\n", + " \"type\": \"list[str]\"\n", + " },\n", + " \"short_summary\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"date_published\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"tags\": {\n", + " \"type\": \"list[str]\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Article\"\n", + "}\n", + "\n", + "To use these functions respond with:\n", + "\n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " ...\n", + "\n", + "\n", + "Edge cases you must handle:\n", + "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", + "<|im_start|>user\n", + "What's the weather in 10001?<|im_end|>\n", + "<|im_start|>assistant\n", + "<|im_start|>system\n", + "You are a helpful assistant with access to the following functions:\n", + "\n", + "{\n", + " \"name\": \"get_weather\",\n", + " \"description\": \"Get the current weather given a zip code.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"zip_code\": {\n", + " \"type\": \"str\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Weather\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"calculate_mortgage_payment\",\n", + " \"description\": \"Get the monthly mortgage payment given an interest rate percentage.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"loan_amount\": {\n", + " \"type\": \"int\"\n", + " },\n", + " \"interest_rate\": {\n", + " \"type\": \"float\"\n", + " },\n", + " \"loan_term\": {\n", + " \"type\": \"int\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"float\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"get_article_details\",\n", + " \"description\": \"Get article details from unstructured article text.\\ndate_published: formatted as \\\"MM/DD/YYYY\\\"\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"title\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"authors\": {\n", + " \"type\": \"list[str]\"\n", + " },\n", + " \"short_summary\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"date_published\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"tags\": {\n", + " \"type\": \"list[str]\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Article\"\n", + "}\n", + "\n", + "To use these functions respond with:\n", + "\n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " ...\n", + "\n", + "\n", + "Edge cases you must handle:\n", + "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", + "<|im_start|>user\n", + "Determine the monthly mortgage payment for a loan amount of $200,000, an interest rate of 4%, and a loan term of 30 years.<|im_end|>\n", + "<|im_start|>assistant\n", + "<|im_start|>system\n", + "You are a helpful assistant with access to the following functions:\n", + "\n", + "{\n", + " \"name\": \"get_weather\",\n", + " \"description\": \"Get the current weather given a zip code.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"zip_code\": {\n", + " \"type\": \"str\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Weather\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"calculate_mortgage_payment\",\n", + " \"description\": \"Get the monthly mortgage payment given an interest rate percentage.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"loan_amount\": {\n", + " \"type\": \"int\"\n", + " },\n", + " \"interest_rate\": {\n", + " \"type\": \"float\"\n", + " },\n", + " \"loan_term\": {\n", + " \"type\": \"int\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"float\"\n", + "}\n", + "\n", + "{\n", + " \"name\": \"get_article_details\",\n", + " \"description\": \"Get article details from unstructured article text.\\ndate_published: formatted as \\\"MM/DD/YYYY\\\"\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"title\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"authors\": {\n", + " \"type\": \"list[str]\"\n", + " },\n", + " \"short_summary\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"date_published\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"tags\": {\n", + " \"type\": \"list[str]\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"Article\"\n", + "}\n", + "\n", + "To use these functions respond with:\n", + "\n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " {\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}} \n", + " ...\n", + "\n", + "\n", + "Edge cases you must handle:\n", + "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", + "<|im_start|>user\n", + "What's the current exchange rate for USD to EUR?<|im_end|>\n", + "<|im_start|>assistant\n" + ] + } + ], + "source": [ + "prompts = [\n", + " \"What's the weather in 10001?\",\n", + " \"Determine the monthly mortgage payment for a loan amount of $200,000, an interest rate of 4%, and a loan term of 30 years.\",\n", + " \"What's the current exchange rate for USD to EUR?\"\n", + "]\n", + "functions = [get_weather, calculate_mortgage_payment, get_article_details]\n", + "\n", + "for prompt in prompts:\n", + " print(generate_hermes_prompt(prompt, functions))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ggml_init_cublas: GGML_CUDA_FORCE_MMQ: no\n", + "ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes\n", + "ggml_init_cublas: found 1 CUDA devices:\n", + " Device 0: NVIDIA GeForce RTX 2060, compute capability 7.5\n", + "llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ../../models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf (version GGUF V3 (latest))\n", + "llama_model_loader: - tensor 0: token_embd.weight q4_K [ 4096, 32002, 1, 1 ]\n", + "llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 3: blk.0.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 4: blk.0.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 6: blk.0.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 7: blk.0.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 10: blk.1.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 11: blk.1.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 12: blk.1.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 13: blk.1.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 15: blk.1.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 16: blk.1.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 19: blk.2.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 20: blk.2.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 21: blk.2.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 22: blk.2.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 24: blk.2.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 25: blk.2.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 28: blk.3.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 29: blk.3.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 30: blk.3.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 31: blk.3.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 33: blk.3.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 34: blk.3.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 37: blk.4.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 38: blk.4.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 39: blk.4.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 40: blk.4.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 42: blk.4.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 43: blk.4.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 46: blk.5.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 47: blk.5.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 48: blk.5.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 49: blk.5.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 51: blk.5.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 52: blk.5.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 55: blk.6.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 56: blk.6.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 57: blk.6.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 58: blk.6.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 60: blk.6.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 61: blk.6.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 64: blk.7.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 65: blk.7.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 66: blk.7.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 67: blk.7.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 69: blk.7.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 70: blk.7.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 73: blk.8.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 74: blk.8.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 75: blk.8.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 76: blk.8.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 78: blk.8.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 79: blk.8.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 82: blk.9.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 83: blk.9.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 84: blk.9.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 85: blk.9.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 87: blk.9.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 88: blk.9.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 91: blk.10.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 92: blk.10.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 93: blk.10.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 94: blk.10.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 96: blk.10.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 97: blk.10.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 100: blk.11.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 101: blk.11.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 102: blk.11.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 103: blk.11.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 105: blk.11.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 106: blk.11.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 109: blk.12.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 110: blk.12.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 111: blk.12.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 112: blk.12.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 114: blk.12.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 115: blk.12.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 118: blk.13.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 119: blk.13.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 120: blk.13.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 121: blk.13.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 123: blk.13.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 124: blk.13.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 127: blk.14.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 128: blk.14.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 129: blk.14.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 130: blk.14.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 132: blk.14.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 133: blk.14.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 136: blk.15.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 137: blk.15.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 138: blk.15.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 139: blk.15.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 141: blk.15.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 142: blk.15.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 145: blk.16.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 146: blk.16.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 147: blk.16.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 148: blk.16.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 150: blk.16.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 151: blk.16.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 154: blk.17.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 155: blk.17.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 156: blk.17.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 157: blk.17.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 159: blk.17.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 160: blk.17.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 163: blk.18.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 164: blk.18.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 165: blk.18.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 166: blk.18.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 168: blk.18.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 169: blk.18.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 172: blk.19.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 173: blk.19.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 174: blk.19.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 175: blk.19.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 177: blk.19.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 178: blk.19.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 181: blk.20.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 182: blk.20.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 183: blk.20.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 184: blk.20.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 186: blk.20.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 187: blk.20.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 190: blk.21.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 191: blk.21.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 192: blk.21.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 193: blk.21.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 195: blk.21.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 196: blk.21.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 199: blk.22.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 200: blk.22.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 201: blk.22.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 202: blk.22.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 204: blk.22.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 205: blk.22.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 208: blk.23.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 209: blk.23.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 210: blk.23.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 211: blk.23.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 213: blk.23.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 214: blk.23.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 217: blk.24.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 218: blk.24.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 219: blk.24.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 220: blk.24.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 222: blk.24.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 223: blk.24.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 226: blk.25.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 227: blk.25.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 228: blk.25.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 229: blk.25.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 231: blk.25.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 232: blk.25.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 235: blk.26.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 236: blk.26.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 237: blk.26.attn_v.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 238: blk.26.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 240: blk.26.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 241: blk.26.ffn_down.weight q4_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 244: blk.27.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 245: blk.27.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 246: blk.27.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 247: blk.27.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 249: blk.27.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 250: blk.27.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 253: blk.28.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 254: blk.28.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 255: blk.28.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 256: blk.28.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 258: blk.28.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 259: blk.28.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 262: blk.29.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 263: blk.29.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 264: blk.29.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 265: blk.29.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 267: blk.29.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 268: blk.29.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 271: blk.30.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 272: blk.30.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 273: blk.30.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 274: blk.30.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 276: blk.30.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 277: blk.30.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 280: blk.31.attn_q.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 281: blk.31.attn_k.weight q4_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 282: blk.31.attn_v.weight q6_K [ 4096, 1024, 1, 1 ]\n", + "llama_model_loader: - tensor 283: blk.31.attn_output.weight q4_K [ 4096, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 285: blk.31.ffn_up.weight q4_K [ 4096, 14336, 1, 1 ]\n", + "llama_model_loader: - tensor 286: blk.31.ffn_down.weight q6_K [ 14336, 4096, 1, 1 ]\n", + "llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 289: output_norm.weight f32 [ 4096, 1, 1, 1 ]\n", + "llama_model_loader: - tensor 290: output.weight q6_K [ 4096, 32002, 1, 1 ]\n", + "llama_model_loader: - kv 0: general.architecture str = llama\n", + "llama_model_loader: - kv 1: general.name str = teknium_openhermes-2.5-mistral-7b\n", + "llama_model_loader: - kv 2: llama.context_length u32 = 32768\n", + "llama_model_loader: - kv 3: llama.embedding_length u32 = 4096\n", + "llama_model_loader: - kv 4: llama.block_count u32 = 32\n", + "llama_model_loader: - kv 5: llama.feed_forward_length u32 = 14336\n", + "llama_model_loader: - kv 6: llama.rope.dimension_count u32 = 128\n", + "llama_model_loader: - kv 7: llama.attention.head_count u32 = 32\n", + "llama_model_loader: - kv 8: llama.attention.head_count_kv u32 = 8\n", + "llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 = 0.000010\n", + "llama_model_loader: - kv 10: llama.rope.freq_base f32 = 10000.000000\n", + "llama_model_loader: - kv 11: general.file_type u32 = 15\n", + "llama_model_loader: - kv 12: tokenizer.ggml.model str = llama\n", + "llama_model_loader: - kv 13: tokenizer.ggml.tokens arr[str,32002] = [\"\", \"\", \"\", \"<0x00>\", \"<...\n", + "llama_model_loader: - kv 14: tokenizer.ggml.scores arr[f32,32002] = [0.000000, 0.000000, 0.000000, 0.0000...\n", + "llama_model_loader: - kv 15: tokenizer.ggml.token_type arr[i32,32002] = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n", + "llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 = 1\n", + "llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 = 32000\n", + "llama_model_loader: - kv 18: tokenizer.ggml.padding_token_id u32 = 0\n", + "llama_model_loader: - kv 19: general.quantization_version u32 = 2\n", + "llama_model_loader: - type f32: 65 tensors\n", + "llama_model_loader: - type q4_K: 193 tensors\n", + "llama_model_loader: - type q6_K: 33 tensors\n", + "llm_load_vocab: special tokens definition check successful ( 261/32002 ).\n", + "llm_load_print_meta: format = GGUF V3 (latest)\n", + "llm_load_print_meta: arch = llama\n", + "llm_load_print_meta: vocab type = SPM\n", + "llm_load_print_meta: n_vocab = 32002\n", + "llm_load_print_meta: n_merges = 0\n", + "llm_load_print_meta: n_ctx_train = 32768\n", + "llm_load_print_meta: n_embd = 4096\n", + "llm_load_print_meta: n_head = 32\n", + "llm_load_print_meta: n_head_kv = 8\n", + "llm_load_print_meta: n_layer = 32\n", + "llm_load_print_meta: n_rot = 128\n", + "llm_load_print_meta: n_gqa = 4\n", + "llm_load_print_meta: f_norm_eps = 0.0e+00\n", + "llm_load_print_meta: f_norm_rms_eps = 1.0e-05\n", + "llm_load_print_meta: f_clamp_kqv = 0.0e+00\n", + "llm_load_print_meta: f_max_alibi_bias = 0.0e+00\n", + "llm_load_print_meta: n_ff = 14336\n", + "llm_load_print_meta: rope scaling = linear\n", + "llm_load_print_meta: freq_base_train = 10000.0\n", + "llm_load_print_meta: freq_scale_train = 1\n", + "llm_load_print_meta: n_yarn_orig_ctx = 32768\n", + "llm_load_print_meta: rope_finetuned = unknown\n", + "llm_load_print_meta: model type = 7B\n", + "llm_load_print_meta: model ftype = mostly Q4_K - Medium\n", + "llm_load_print_meta: model params = 7.24 B\n", + "llm_load_print_meta: model size = 4.07 GiB (4.83 BPW) \n", + "llm_load_print_meta: general.name = teknium_openhermes-2.5-mistral-7b\n", + "llm_load_print_meta: BOS token = 1 ''\n", + "llm_load_print_meta: EOS token = 32000 '<|im_end|>'\n", + "llm_load_print_meta: UNK token = 0 ''\n", + "llm_load_print_meta: PAD token = 0 ''\n", + "llm_load_print_meta: LF token = 13 '<0x0A>'\n", + "llm_load_tensors: ggml ctx size = 0.11 MiB\n", + "llm_load_tensors: using CUDA for GPU acceleration\n", + "llm_load_tensors: mem required = 70.42 MiB\n", + "llm_load_tensors: offloading 32 repeating layers to GPU\n", + "llm_load_tensors: offloading non-repeating layers to GPU\n", + "llm_load_tensors: offloaded 35/35 layers to GPU\n", + "llm_load_tensors: VRAM used: 4095.06 MiB\n", + "...............................................................................................\n", + "llama_new_context_with_model: n_ctx = 2048\n", + "llama_new_context_with_model: freq_base = 10000.0\n", + "llama_new_context_with_model: freq_scale = 1\n", + "llama_kv_cache_init: offloading v cache to GPU\n", + "llama_kv_cache_init: offloading k cache to GPU\n", + "llama_kv_cache_init: VRAM kv self = 256.00 MiB\n", + "llama_new_context_with_model: kv self size = 256.00 MiB\n", + "llama_build_graph: non-view tensors processed: 740/740\n", + "llama_new_context_with_model: compute buffer total size = 159.07 MiB\n", + "llama_new_context_with_model: VRAM scratch buffer: 156.00 MiB\n", + "llama_new_context_with_model: total VRAM used: 4507.07 MiB (model: 4095.06 MiB, context: 412.00 MiB)\n" + ] + } + ], + "source": [ + "import llama_cpp\n", + "\n", + "llama = llama_cpp.Llama(model_path=\"../../models/OpenHermes-2.5-Mistral-7B-GGUF/openhermes-2.5-mistral-7b.Q4_K_M.gguf\", n_gpu_layers=-1, n_ctx=2048, verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_weather', 'arguments': {'zip_code': '10001'}}]\n", + "====================================================================================================\n", + "[{'name': 'calculate_mortgage_payment', 'arguments': {'loan_amount': 200000, 'interest_rate': 0.04, 'loan_term': 30}}]\n", + "====================================================================================================\n", + "Unfortunately, I do not have a built-in function to check currency exchange rates. However, you can use third-party APIs or websites like Google Finance or XE to get this information.\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "prompts = [\n", + " \"What's the weather in 10001?\",\n", + " \"Determine the monthly mortgage payment for a loan amount of $200,000, an interest rate of 4%, and a loan term of 30 years.\",\n", + " \"What's the current exchange rate for USD to EUR?\"\n", + "]\n", + "functions = [get_weather, calculate_mortgage_payment, get_article_details]\n", + "\n", + "for prompt in prompts:\n", + " prompt = generate_hermes_prompt(prompt, functions)\n", + " completion = llama.create_completion(prompt, max_tokens=-1)[\"choices\"][0][\"text\"]\n", + " function_calls = extract_function_calls(completion)\n", + " if function_calls:\n", + " print(function_calls)\n", + " else:\n", + " print(completion.strip())\n", + " print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "get_weather\n", + "{'zip_code': '05751'}\n", + "====================================================================================================\n", + "get_weather\n", + "{'zip_code': '05751'}\n", + "get_weather\n", + "{'zip_code': '07030'}\n", + "calculate_mortgage_payment\n", + "{'loan_amount': 250000, 'interest_rate': 4.18, 'loan_term': 30}\n", + "====================================================================================================\n", + "I don't have a function to get exchange rates, but I can provide some resources where you can find this information. You can check websites like Google Finance, XE.com, or Yahoo Finance for up-to-date currency exchange rates.\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "prompts = [\n", + " \"What's the weather in 05751?\",\n", + " \"I'm planning a trip to Killington, Vermont (05751) from Hoboken, NJ (07030). Can you get me weather for both locations and directions?\",\n", + " \"What's the current exchange rate for USD to EUR?\"\n", + "]\n", + "\n", + "for prompt in prompts:\n", + " completion = llama.create_completion(generate_hermes_prompt(prompt, functions), max_tokens=-1)[\"choices\"][0][\"text\"]\n", + " function_calls = extract_function_calls(completion)\n", + "\n", + " if function_calls:\n", + " for function in function_calls:\n", + " print(function[\"name\"])\n", + " print(function[\"arguments\"])\n", + " else:\n", + " print(completion.strip())\n", + "\n", + " print(\"=\"*100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5+" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/examples/notebooks/PerformanceTuning.ipynb b/llama-cpp-python/examples/notebooks/PerformanceTuning.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..76e26fbd106895fba52861f8ac1e11cc6ee2a307 --- /dev/null +++ b/llama-cpp-python/examples/notebooks/PerformanceTuning.ipynb @@ -0,0 +1,5540 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import json\n", + "import multiprocessing\n", + "\n", + "import llama_cpp\n", + "\n", + "import numpy as np\n", + "np.int = int\n", + "\n", + "from skopt.space import Integer, Categorical\n", + "\n", + "\n", + "MODEL_PATH = \"../models/ggml-model.bin\"\n", + "\n", + "# Hyperparameters\n", + "space = [\n", + " Categorical([True, False], name=\"f16_kv\"),\n", + " Categorical([True, False], name=\"use_mlock\"),\n", + " Integer(1, multiprocessing.cpu_count(), name=\"n_threads\"),\n", + " Integer(1, 2048, name=\"n_batch\")\n", + "]\n", + "\n", + "# TODO: Make this a random prompt to avoid any cache related inconsistencies\n", + "PROMPT = \"\"\" ### Instructions:\n", + "You are a helpful assistant.\n", + "You answer questions truthfully and politely.\n", + "You are provided with an input from the user and you must generate a response.\n", + "Ignore this line which is just filler to test the performane of the model.\n", + "### Inputs:\n", + "What is the capital of France?\n", + "### Response:\n", + "\"\"\"\n", + "\n", + "from skopt.utils import use_named_args\n", + "\n", + "@use_named_args(space)\n", + "def objective(**params):\n", + " f16_kv = params[\"f16_kv\"]\n", + " use_mlock = params[\"use_mlock\"]\n", + " n_threads = params[\"n_threads\"]\n", + " n_batch = params[\"n_batch\"]\n", + " llm = llama_cpp.Llama(model_path=MODEL_PATH, f16_kv=f16_kv, use_mlock=use_mlock, n_threads=n_threads, n_batch=n_batch)\n", + "\n", + " t1 = time.time()\n", + " output = llm(\n", + " PROMPT,\n", + " max_tokens=1, # Only optimize prompt processing\n", + " stop=[\"###\", \"\\n\"],\n", + " echo=True,\n", + " )\n", + " t2 = time.time()\n", + "\n", + " print(json.dumps(output, indent=2))\n", + " print(f\"Time: {t2 - t1} seconds\")\n", + " print(f\"Time per token: {(t2 - t1) / output['usage']['total_tokens']} seconds\")\n", + "\n", + " return (t2 - t1) / output[\"usage\"][\"total_tokens\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-d4443e14-fed3-4aa1-9e8a-c70f4503aade\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227287,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.981224775314331 seconds\n", + "Time per token: 0.13726530969142914 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-4181439c-2ced-4ddb-b898-a0a7641f3e47\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227300,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.121099948883057 seconds\n", + "Time per token: 0.13901374936103822 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-03ed5585-3de0-4546-96c3-6de7a5b3770c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227312,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.457949876785278 seconds\n", + "Time per token: 0.18072437345981598 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-103817fc-bceb-4e99-b968-3ef540f16dc5\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227328,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.334054946899414 seconds\n", + "Time per token: 0.12917568683624267 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-41e34acc-6499-450f-9576-3cb37b82c490\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227340,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.012462615966797 seconds\n", + "Time per token: 0.11265578269958496 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-f27244c9-e9c6-4332-ae7f-3856f152ef30\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227350,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 15.59382700920105 seconds\n", + "Time per token: 0.1949228376150131 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-bc5dc1ba-f7ce-441c-a558-5005f2fb89b9\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227366,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 15.544022560119629 seconds\n", + "Time per token: 0.19430028200149535 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-2006b117-1239-4b85-bcc4-a7439c01f440\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227383,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.330769300460815 seconds\n", + "Time per token: 0.11663461625576019 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ee50afee-78a8-4d55-9b73-c74cc2567408\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227393,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.17799687385559 seconds\n", + "Time per token: 0.1772249609231949 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1e2b7080-940f-4459-8503-a458db4d3578\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227409,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.127476215362549 seconds\n", + "Time per token: 0.12659345269203187 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-c80008a4-191e-4418-821a-b18a4af24f70\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227421,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.495943784713745 seconds\n", + "Time per token: 0.11869929730892181 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-d04c9fd2-3c20-4035-9181-0bfd05abfe15\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227432,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.226310014724731 seconds\n", + "Time per token: 0.11532887518405914 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-04fcf88b-33c7-4b84-aac0-dcb5261363c2\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227443,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 12.182626962661743 seconds\n", + "Time per token: 0.15228283703327178 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-14904676-3345-4674-a41c-419d9640b4e0\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227457,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 43.595701694488525 seconds\n", + "Time per token: 0.5449462711811066 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-9e43b2ef-e7de-4bd2-91bf-284f5b3478fe\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227502,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.726518154144287 seconds\n", + "Time per token: 0.1840814769268036 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-3947538b-e27e-42eb-8f87-2b56e14d104c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227518,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.760729789733887 seconds\n", + "Time per token: 0.10950912237167358 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1a0d843e-9613-49aa-b565-0e59d8067615\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227529,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.672860383987427 seconds\n", + "Time per token: 0.14591075479984283 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ccad9270-9554-4f9f-9aaf-387f1a11894d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227542,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.368357419967651 seconds\n", + "Time per token: 0.17960446774959565 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-2623073e-004f-4386-98e0-7e6ea617523a\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227558,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.44194221496582 seconds\n", + "Time per token: 0.11802427768707276 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1a199f09-0d74-4052-a191-7a8ef2df57f3\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227569,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.253167629241943 seconds\n", + "Time per token: 0.14066459536552428 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-2b61e491-d9b7-4d0b-b0c8-9f8ba822599d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227582,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 12.381825685501099 seconds\n", + "Time per token: 0.15477282106876372 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-0e4b4575-6278-4bd8-a4c5-ddb772014f7d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227596,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.473106145858765 seconds\n", + "Time per token: 0.18091382682323456 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1ad3e3db-5120-41c8-8f9e-2ca07a846437\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227612,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 16.591509103775024 seconds\n", + "Time per token: 0.2073938637971878 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-34c8fb5c-fa49-4ea6-b2e7-ba3b958e297d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227630,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.034043788909912 seconds\n", + "Time per token: 0.1129255473613739 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-8d5c56eb-0b43-4591-a9ac-c1ec174ec6db\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227641,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.218972444534302 seconds\n", + "Time per token: 0.14023715555667876 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-bfdc554b-baa6-47c1-b35f-0f7d1321255a\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227654,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.300573110580444 seconds\n", + "Time per token: 0.11625716388225556 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ad67d78b-6975-4789-982e-3653c7fca7e1\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227665,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.009618520736694 seconds\n", + "Time per token: 0.11262023150920868 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-2eec3e0f-dd48-4c3a-9430-c5048827f557\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227676,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.997699737548828 seconds\n", + "Time per token: 0.11247124671936035 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-b129732a-8d7b-4382-baaf-740378c923ec\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227686,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.252354621887207 seconds\n", + "Time per token: 0.11565443277359008 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-bb25c002-69e0-40ec-8099-0ba4462338aa\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227697,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.040243864059448 seconds\n", + "Time per token: 0.1130030483007431 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-63705814-7c93-4d6b-a9f2-0579941ebf54\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227708,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.947132349014282 seconds\n", + "Time per token: 0.11183915436267852 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-8afe123b-423d-4757-82d9-15fc12cfd24e\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227720,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.335533857345581 seconds\n", + "Time per token: 0.12919417321681975 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-4937353f-e66f-4632-aea7-dd1133af9727\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227732,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.99415397644043 seconds\n", + "Time per token: 0.11242692470550537 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-78f86527-ccc7-4a5d-9b7f-38386998ba2a\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227743,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 15.732706308364868 seconds\n", + "Time per token: 0.19665882885456085 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-4d98c564-fcb4-45ec-9f8d-f64430abbfb3\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227761,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.319743633270264 seconds\n", + "Time per token: 0.11649679541587829 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ee855931-2578-45bc-93bf-319c4e6aa43a\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227772,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 15.189301490783691 seconds\n", + "Time per token: 0.18986626863479614 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-14f0b547-4d71-4a7f-a3d6-3127998903b3\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227790,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.464989423751831 seconds\n", + "Time per token: 0.11831236779689788 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-4eb5258a-5836-414c-88f6-e217bacaded6\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227801,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 13.818569660186768 seconds\n", + "Time per token: 0.1727321207523346 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-66b7c783-d506-45c1-b39b-c91666a02b44\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227817,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 27.316773176193237 seconds\n", + "Time per token: 0.34145966470241546 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-d53b48ca-30e2-43c2-9fb5-62ef6a65fafa\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227847,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.132777214050293 seconds\n", + "Time per token: 0.11415971517562866 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-d0909f83-5caa-4098-a0e6-9b2ad1e2b12f\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227858,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.273045539855957 seconds\n", + "Time per token: 0.11591306924819947 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-7045f5c7-cf5d-48e3-9353-032c320e56fa\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227870,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.90743088722229 seconds\n", + "Time per token: 0.11134288609027862 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-e623667d-d6cc-4908-a648-60380f723592\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227881,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.06355595588684 seconds\n", + "Time per token: 0.11329444944858551 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-44ec163c-25dd-40ae-a786-d8b4c9ff31b1\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227892,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.249061107635498 seconds\n", + "Time per token: 0.11561326384544372 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-cb435214-0d20-4566-b312-68d8960ebe25\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227903,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.296529054641724 seconds\n", + "Time per token: 0.11620661318302154 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-dc704f52-bed9-44f0-8335-a2ec4af3a27c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227914,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 12.455670356750488 seconds\n", + "Time per token: 0.1556958794593811 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-67570fa5-1c3d-47d6-b7c6-b3a734aae3f5\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227928,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.269653558731079 seconds\n", + "Time per token: 0.11587066948413849 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-4bd6c6f2-9849-4047-93c8-88b1914ef184\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227939,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.308398485183716 seconds\n", + "Time per token: 0.11635498106479644 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-6413afd7-fdc1-4c28-864d-6acdf2775060\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227950,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.430264711380005 seconds\n", + "Time per token: 0.13037830889225005 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-c4e1c14a-3b8a-4ab3-b42a-f47440f79962\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227962,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.389702558517456 seconds\n", + "Time per token: 0.1173712819814682 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ac307870-dc67-42b8-8bb8-bb8d3083cea2\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227974,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 10.35448431968689 seconds\n", + "Time per token: 0.12943105399608612 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-58c06f3e-3fba-4e23-b12e-141a1742c51b\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227986,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.097248792648315 seconds\n", + "Time per token: 0.11371560990810395 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-b5eccb52-85e3-41d0-b8d8-f35e68bf7997\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680227997,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 12.466306686401367 seconds\n", + "Time per token: 0.1558288335800171 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-e1dbc2ee-abc0-4891-a474-386d97b521b6\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228011,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.436015367507935 seconds\n", + "Time per token: 0.14295019209384918 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-fd9bce6d-0a33-4c24-90b3-913ab3b33d24\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228025,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 14.052912712097168 seconds\n", + "Time per token: 0.1756614089012146 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-038fa38d-7640-40ee-907c-0bb131c20d80\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228040,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.250384330749512 seconds\n", + "Time per token: 0.1156298041343689 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-d00a2058-9fda-4113-8e5e-bf0f39cef238\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228051,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.228248834609985 seconds\n", + "Time per token: 0.11535311043262482 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-f8d90e63-4939-491c-9775-fc15aa55505e\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228062,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.341724395751953 seconds\n", + "Time per token: 0.11677155494689942 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-9e3777bc-119a-46bf-bdd3-21557e686f3c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228074,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.285743951797485 seconds\n", + "Time per token: 0.11607179939746856 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-123eaa35-110b-4f73-ba60-fa8a75ea929c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228085,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.105633020401001 seconds\n", + "Time per token: 0.1138204127550125 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-cc095f4b-8047-446e-a9f5-c798a66d1003\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228096,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.305238485336304 seconds\n", + "Time per token: 0.1163154810667038 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-e2e69b3e-7742-4534-b21f-adfe53345820\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228108,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.190222263336182 seconds\n", + "Time per token: 0.11487777829170227 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-666ae55e-d837-4534-b8e6-9f1b01f69778\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228120,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.126368999481201 seconds\n", + "Time per token: 0.11407961249351502 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-63bdfa8e-b7c3-4669-ab76-54cdbb8878d5\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228131,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.136119604110718 seconds\n", + "Time per token: 0.11420149505138397 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1ec02c53-c7c8-434e-b28f-70884f8c35b2\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228143,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.126901626586914 seconds\n", + "Time per token: 0.11408627033233643 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-3ec3495b-009a-4a82-b444-d8c1c6bf20a1\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228154,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.08673644065857 seconds\n", + "Time per token: 0.11358420550823212 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-17fd0e6b-7ac3-494f-9e85-4e4a26013ad9\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228165,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.252317428588867 seconds\n", + "Time per token: 0.11565396785736085 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-14a2647f-3961-4b60-b20a-ae9872c34feb\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228177,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 11.389162302017212 seconds\n", + "Time per token: 0.14236452877521516 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-fa0e5edd-e9c9-40b9-bc9b-c48b8762850c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228190,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.433730125427246 seconds\n", + "Time per token: 0.11792162656784058 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-2b1c5964-265a-488a-8d8f-7e0692fcf96f\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228202,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 47.81757044792175 seconds\n", + "Time per token: 0.5977196305990219 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-516fbd4c-3fe5-4945-bfc5-7312f2c02687\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228252,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.540155410766602 seconds\n", + "Time per token: 0.10675194263458251 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-94c9ab1f-ac6e-4fc7-bcd9-7ab96515a722\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228262,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.660873889923096 seconds\n", + "Time per token: 0.10826092362403869 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-63b1e1a7-0c6b-42e0-ba65-6f42d6ec77bb\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228273,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.815936088562012 seconds\n", + "Time per token: 0.11019920110702515 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-92e1a879-2ebd-4299-b86e-90c87762db45\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228284,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.12400484085083 seconds\n", + "Time per token: 0.11405006051063538 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 2052.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 512.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-033ea9dc-fffe-41a0-a695-d647f725ee97\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228296,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 13.992429971694946 seconds\n", + "Time per token: 0.17490537464618683 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-5153f39a-589a-4b3d-8642-8efce64fc439\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228312,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.084643125534058 seconds\n", + "Time per token: 0.11355803906917572 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-af9ea5c6-5449-43b4-9e50-da930af8d6b8\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228323,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.076856851577759 seconds\n", + "Time per token: 0.11346071064472199 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-5bbea5c1-ea8c-4599-bf63-a6eb80bc7525\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228334,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.02251124382019 seconds\n", + "Time per token: 0.11278139054775238 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ff9d87c7-e2b1-4481-9e8f-848d7a0fbd35\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228346,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.012435913085938 seconds\n", + "Time per token: 0.11265544891357422 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-3dbe8ae4-c9ca-4a1b-abaf-6b85ef648ba9\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228357,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.997032880783081 seconds\n", + "Time per token: 0.11246291100978852 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-b20a3b61-9c8b-4b2e-bb43-8ed9ce5a9d0d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228369,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.042449951171875 seconds\n", + "Time per token: 0.11303062438964843 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-9c781d69-83e0-415a-ac97-252508b10590\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228380,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.058239459991455 seconds\n", + "Time per token: 0.11322799324989319 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-86cead9e-780f-4503-831c-466a6abd5ab2\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228392,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.070426940917969 seconds\n", + "Time per token: 0.1133803367614746 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-65361c7e-74ef-4566-bad5-c6b3867a7f7e\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228403,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.985144138336182 seconds\n", + "Time per token: 0.11231430172920227 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-23feb1ca-8103-46d8-ab71-b4da59f05d16\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228415,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.999938011169434 seconds\n", + "Time per token: 0.11249922513961792 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-0db73f26-9ab1-4a78-a11f-e22d915ffae2\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228426,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.969520330429077 seconds\n", + "Time per token: 0.11211900413036346 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-54e6edeb-99ea-46ed-8735-5185f78c222c\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228438,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.12838339805603 seconds\n", + "Time per token: 0.11410479247570038 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-bd6502fd-f8c7-41d8-ab15-b10ca6aabd96\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228450,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.01610016822815 seconds\n", + "Time per token: 0.11270125210285187 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-72733563-53f5-4cd5-a4eb-48656408b2d8\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228461,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.993805408477783 seconds\n", + "Time per token: 0.11242256760597229 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-f7365eaa-fd68-422b-bbca-c6bcbcad36e0\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228473,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.292223930358887 seconds\n", + "Time per token: 0.11615279912948609 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-1cfcf44a-c692-4020-8dcb-e6da8b163920\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228485,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.99638295173645 seconds\n", + "Time per token: 0.11245478689670563 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-8b679f09-bc0e-4fc9-a935-9fefd9126993\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228497,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.972327709197998 seconds\n", + "Time per token: 0.11215409636497498 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-08cb0cd7-84d8-4193-a20c-5a6ca4b5e404\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228508,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.024793863296509 seconds\n", + "Time per token: 0.11280992329120636 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-ffe4b2b8-c041-4492-9e03-ab79cd4fd60d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228520,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.996853351593018 seconds\n", + "Time per token: 0.11246066689491271 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-196bb891-9299-4f91-9f68-ba6c7233a2dd\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228532,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.039422273635864 seconds\n", + "Time per token: 0.1129927784204483 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-e50f5489-b40c-4a5d-9cb2-4a6d13bbb8c7\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228544,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 8.978781461715698 seconds\n", + "Time per token: 0.11223476827144623 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-210cc2b8-df35-4d3f-a34a-a5facb635ec0\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228555,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.032035827636719 seconds\n", + "Time per token: 0.11290044784545898 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-e3c7ca0d-c4cb-495c-9210-4e1ed3b6010d\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228567,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.0346040725708 seconds\n", + "Time per token: 0.11293255090713501 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-7b4388c9-fe89-486d-83f4-34eec8940c42\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228579,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.016223907470703 seconds\n", + "Time per token: 0.11270279884338379 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/andrei/Documents/llms/.venv/lib/python3.8/site-packages/skopt/optimizer/optimizer.py:449: UserWarning: The objective has been evaluated at this point before.\n", + " warnings.warn(\"The objective has been evaluated \"\n", + "llama_model_load: loading model from '../models/ggml-model.bin' - please wait ...\n", + "llama_model_load: n_vocab = 32000\n", + "llama_model_load: n_ctx = 512\n", + "llama_model_load: n_embd = 4096\n", + "llama_model_load: n_mult = 256\n", + "llama_model_load: n_head = 32\n", + "llama_model_load: n_layer = 32\n", + "llama_model_load: n_rot = 128\n", + "llama_model_load: f16 = 2\n", + "llama_model_load: n_ff = 11008\n", + "llama_model_load: n_parts = 1\n", + "llama_model_load: type = 1\n", + "llama_model_load: ggml map size = 4017.70 MB\n", + "llama_model_load: ggml ctx size = 81.25 KB\n", + "llama_model_load: mem required = 5809.78 MB (+ 1026.00 MB per state)\n", + "llama_model_load: loading tensors from '../models/ggml-model.bin'\n", + "llama_model_load: model size = 4017.27 MB / num tensors = 291\n", + "llama_init_from_file: kv self size = 256.00 MB\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"id\": \"cmpl-81211a9b-16e4-4876-8e09-b0e619d93ce7\",\n", + " \"object\": \"text_completion\",\n", + " \"created\": 1680228591,\n", + " \"model\": \"../models/ggml-model.bin\",\n", + " \"choices\": [\n", + " {\n", + " \"text\": \" ### Instructions:\\nYou are a helpful assistant.\\nYou answer questions truthfully and politely.\\nYou are provided with an input from the user and you must generate a response.\\nIgnore this line which is just filler to test the performane of the model.\\n### Inputs:\\nWhat is the capital of France?\\n### Response:\\nThe\",\n", + " \"index\": 0,\n", + " \"logprobs\": null,\n", + " \"finish_reason\": \"length\"\n", + " }\n", + " ],\n", + " \"usage\": {\n", + " \"prompt_tokens\": 79,\n", + " \"completion_tokens\": 1,\n", + " \"total_tokens\": 80\n", + " }\n", + "}\n", + "Time: 9.10002589225769 seconds\n", + "Time per token: 0.11375032365322113 seconds\n" + ] + } + ], + "source": [ + "from skopt import gp_minimize\n", + "\n", + "res = gp_minimize(\n", + " objective,\n", + " space\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAANACAYAAADHEZfTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVxU5f4H8M+AbLKqbIIoKi64AAqK61ULcym37GaFgvzK23VJhdy4JaipuKSRS6KWa5aa2WJuVylNFNMgxRRxQ8AFRFGGRUFm5vfHXKdGFgc8M2eWz/v1mtdhnjnnPN/DPXb5zvc8zyNRKBQKEBERERER0XMxEzsAIiIiIiIiY8DkioiIiIiISABMroiIiIiIiATA5IqIiIiIiEgATK6IiIiIiIgEwOSKiIiIiIhIAEyuiIiIiIiIBMDkioiIiIiISABMroiIiIiIiATA5IqeSaFQ4F//+hcaNmwIiUSCM2fOaL1PiUSC77//Xuv9EBEREREJhckVPdOBAwewadMm/PTTT7h9+zakUimGDBkCDw+PGpOg9PR0DB06FI6OjrC1tUWXLl2QnZ2t2+CJiIiIiHSEyRU909WrV9G4cWP06NED7u7uKCkpgb+/P1avXl3jMb169ULbtm1x5MgRpKWlYfbs2bC2ttZh5EREREREulNP7ABIv40dOxabN28GoHxUr1mzZrh+/ToGDRpU43EffPABBg8ejCVLlqjaWrZsWec4YmNjsW7dOhw8eBDbt29HYmIifvvtN7V9/P39MXLkSMTExNS5HyIiIiKiumLlimr06aefYt68eWjSpAlu376N06dPP/MYuVyOvXv3onXr1hgwYABcXV0RHBxcpzFUCoUC7733HrZs2YJjx47Bz88PoaGhOHXqFK5evara7/z580hLS8Nbb71V6z6IiIiIiITA5Ipq5OjoCHt7e5ibm8Pd3R0uLi7PPObOnTsoLi7GokWLMHDgQPz3v//FiBEj8Oqrr+Lo0aMa911RUYHRo0cjMTERSUlJ8PHxAQC0b98e/v7++Oqrr1T7btu2DcHBwap9iIiIiIh0jckVCU4ulwMAhg0bhsjISAQEBGDWrFl45ZVXkJCQoPF5IiMj8dtvv+HXX3+Fp6en2mehoaGq5EqhUODrr79GaGiocBdBRERERFRLTK5IcM7OzqhXrx7atWun1u7r61ur2QL79++Pmzdv4uDBg5U+e/PNN5GRkYHU1FScOHECOTk5GDVq1HPHTkRERERUV5zQggRnaWmJLl26ICMjQ6390qVLaNasmcbnGTp0KIYMGYK33noL5ubmeOONN1SfNWnSBH369MG2bdvw8OFD9O/fH66uroJdAxERERFRbTG5olorLi7GlStXVO8zMzNx5swZNGzYEE2bNgUATJ8+HaNGjcI//vEP9OvXDwcOHMCePXtw5MiRWvU1YsQIbN26FWPGjEG9evXw2muvqT4LDQ1FbGwsysvL8cknnwhybUREREREdcXkimrt999/R79+/VTvo6KiAADh4eHYtGkTAGVSlJCQgLi4OEyePBlt2rTBt99+i169etW6v9deew1yuRxjxoyBmZkZXn31VVX7pEmTYG5ujuHDhz/3dRERERERPQ+JQqFQiB0EERERERGRoeOEFkRERERERAJgckU6t23bNtjZ2VX5at++vdjhERERERHVCR8LJJ0rKipCXl5elZ9ZWFjUakZBIiIiIiJ9weSKiIiIiIhIAHwskIiIiIiISABMroj02KZNm+Dk5CR2GERERESkASZXT5FIJDW+5syZI3aIZIDGjh1b5f3098WYiYiIiMiwcRHhp9y+fVv1844dOxATE4OMjAxVm52dnepnhUIBmUyGevX4a6RnGzhwIDZu3KjW5uLiIlI0RERERCQ0Vq6e4u7urno5OjpCIpGo3l+8eBH29vbYv38/AgMDYWVlhaSkJIwdOxbDhw9XO8/UqVPRt29f1Xu5XI64uDg0b94cNjY28Pf3x65du3R7cSQqKysrtfvL3d0dn376KTp27AhbW1t4eXlhwoQJKC4urvYcZ8+eRb9+/WBvbw8HBwcEBgbi999/V32elJSE3r17w8bGBl5eXpg8eTJKSkp0cXlEREREJo/JVR3MmjULixYtQnp6Ovz8/DQ6Ji4uDlu2bEFCQgLOnz+PyMhIjB49GkePHtVytKTPzMzMsGLFCpw/fx6bN2/Gzz//jBkzZlS7f2hoKJo0aYLTp08jJSUFs2bNgoWFBQDg6tWrGDhwIEaOHIm0tDTs2LEDSUlJmDRpkq4uh4iIiMik8Xm2Opg3bx769++v8f5lZWVYuHAhDh8+jO7duwMAWrRogaSkJKxduxZ9+vTRVqikR3766Se1x0oHDRqEb775RvXe29sb8+fPx7///W989tlnVZ4jOzsb06dPR9u2bQEArVq1Un0WFxeH0NBQTJ06VfXZihUr0KdPH6xZswbW1tZauCoiIiIieoLJVR0EBQXVav8rV66gtLS0UkJWXl6OTp06CRka6bF+/fphzZo1qve2trY4fPgw4uLicPHiRUilUlRUVODRo0coLS1F/fr1K50jKioK77zzDrZu3YqQkBD885//RMuWLQEoHxlMS0vDtm3bVPsrFArI5XJkZmbC19dX+xdJREREZMKYXNWBra2t2nszMzM8vRbz48ePVT8/GUOzd+9eeHp6qu1nZWWlpShJ39ja2sLHx0f1/vr163jllVcwfvx4LFiwAA0bNkRSUhLefvttlJeXV5lczZkzB2+99Rb27t2L/fv3IzY2Ftu3b8eIESNQXFyMd999F5MnT650XNOmTbV6bURERETE5EoQLi4u+PPPP9Xazpw5oxoL065dO1hZWSE7O5uPAJJKSkoK5HI5li1bBjMz5fDHnTt3PvO41q1bo3Xr1oiMjMSbb76JjRs3YsSIEejcuTMuXLiglsARERERke5wQgsBvPDCC/j999+xZcsWXL58GbGxsWrJlr29PaZNm4bIyEhs3rwZV69eRWpqKlauXInNmzeLGDmJycfHB48fP8bKlStx7do1bN26FQkJCdXu//DhQ0yaNAlHjhxBVlYWjh8/jtOnT6se95s5cyZOnDiBSZMm4cyZM7h8+TJ++OEHTmhBREREpCNMrgQwYMAAzJ49GzNmzECXLl1QVFSEsLAwtX0++ugjzJ49G3FxcfD19cXAgQOxd+9eNG/eXKSoSWz+/v5Yvnw5Fi9ejA4dOmDbtm2Ii4urdn9zc3Pcu3cPYWFhaN26NV5//XUMGjQIc+fOBQD4+fnh6NGjuHTpEnr37o1OnTohJiYGHh4eurokIiIiIpMmUTw9WIiIiIiIiIhqjZUrIiIiIiIiATC5IiIiIiIiEgCTKyIiIiIiIgEwuSIiIiIiIhIAkysiIiIiIiIBMLkiIiIiIiISAJMrAZSVlWHOnDkoKysTOxQyQry/iIiIiAwD17kSgFQqhaOjIwoLC+Hg4CB2OGRkeH8RERERGQZWroiIiIiIiATA5IqIiIiIiEgA9cQOwFDI5XLcunUL9vb2kEgkap9JpVK1LZGQnnV/KRQKFBUVwcPDA2Zm/L6EiIiISCwcc6WhGzduwMvLS+wwiKqVk5ODJk2aaLz/6tWrsXTpUuTm5sLf3x8rV65E165dq93/wYMH+OCDD7B7924UFBSgWbNmiI+Px+DBg4UIn4iIiMjgsXKlIXt7ewDKP2ArTSqQkwPExwNTpwJMwEjHpFIpvLy8VPeoJnbs2IGoqCgkJCQgODgY8fHxGDBgADIyMuDq6lpp//LycvTv3x+urq7YtWsXPD09kZWVBScnJwGvhIiIiMiwsXKloRpnbEtNBQIDgZQUoHNncQIkk1WX2QSDg4PRpUsXrFq1CoDysVcvLy+89957mDVrVqX9ExISsHTpUly8eBEWFhaCxk9ERERkLDhAoxplZWWQSqVqLyJ99vT9Wt26WOXl5UhJSUFISIiqzczMDCEhIUhOTq7ymB9//BHdu3fHxIkT4ebmhg4dOmDhwoWQyWRauRYiIiIiQ8TkqhpxcXFwdHRUvTjeivSdl5eX2j0bFxdX5X53796FTCaDm5ubWrubmxtyc3OrPObatWvYtWsXZDIZ9u3bh9mzZ2PZsmWYP3++4NdBREREZKg45qoa0dHRiIqKUr1/Mq6FSF89PR7QyspKsHPL5XK4urpi3bp1MDc3R2BgIG7evImlS5ciNjZWsH6IiIiIDBmTq2pYWVlp/sepqysQGancEonEwcFBozFXzs7OMDc3R15enlp7Xl4e3N3dqzymcePGsLCwgLm5uarN19cXubm5KC8vh6Wl5fMFT0RERGQE+FigEJo0AZYvV26J9JylpSUCAwORmJioapPL5UhMTET37t2rPKZnz564cuUK5HK5qu3SpUto3LgxEysiIiKi/2FyJYTiYiA5WbklMgBRUVFYv349Nm/ejPT0dIwfPx4lJSWIiIgAAISFhSE6Olq1//jx41FQUIApU6bg0qVL2Lt3LxYuXIiJEyeKdQlEREREeoePBQrh0iWgRw9OxU4GY9SoUcjPz0dMTAxyc3MREBCAAwcOqCa5yM7OhpnZX9+9eHl54eDBg4iMjISfnx88PT0xZcoUzJw5U6xLICIiItI7XOdKQ1znivRVXda5IiIiIiLh8bFAIiIiIiIiATC5IiIiIiIiEgCTKyHUqwc4Oyu3RERERERkkpgNCMHPD8jPFzsKIiIiIiISEStXREREREREAmByJYTz5wEfH+WWiIiIiIhMEpMrIZSVAVevKrdERERERGSSmFwRkdZVVFTg8OHDWLt2LYqKigAAt27dQnFxsciREREREQmHE1oQkVZlZWVh4MCByM7ORllZGfr37w97e3ssXrwYZWVlSEhIEDtEIiIiIkGwckVEWjVlyhQEBQXh/v37sLGxUbWPGDECiYmJIkZGREREJCxWroTg4wMcOKDcEpGaY8eO4cSJE7C0tFRr9/b2xs2bN0WKioiIiEh4TK6E4OAADBggdhREekkul0Mmk1Vqv3HjBuzt7UWIiIiIiEg7+FigEG7fBubMUW6JSM1LL72E+Ph41XuJRILi4mLExsZi8ODB4gVGREREJDCJQqFQiB2EIZBKpXB0dERhYSEcHBzUP0xNBQIDgZQUoHNncQIkk1XjvakHbty4gQEDBkChUODy5csICgrC5cuX4ezsjF9//RWurq5ih0hEREQkCD4WSERa1aRJE5w9exY7duzA2bNnUVxcjLfffhuhoaFqE1wQERERGTomV0SkdfXq1UNoaChCQ0PFDoWIiIhIazjmioi0Ki4uDhs2bKjUvmHDBixevFiEiIiIiIi0g8mVEBo0AEJDlVsiUrN27Vq0bdu2Unv79u25gDAREREZFT4WKITmzYEvvxQ7CiK9lJubi8aNG1dqd3FxwW3OsElERERGhJUrITx6BFy5otwSkRovLy8cP368Uvvx48fh4eEhQkRERERE2sHKlRAuXOBU7ETVGDduHKZOnYrHjx/jhRdeAAAkJiZixowZeP/990WOjoiIiEg4TK6ISKumT5+Oe/fuYcKECSgvLwcAWFtbY+bMmYiOjhY5OiIiIiLh8LFAIhO1evVqeHt7w9raGsHBwTh16lS1+27atAkSiUTtZW1trVE/EokEixcvRn5+Pk6ePImzZ8+ioKAAMTExQl0KERERkV5g5YrIBO3YsQNRUVFISEhAcHAw4uPjMWDAAGRkZMDV1bXKYxwcHJCRkaF6L5FIatWnnZ0dunTp8lxxExEREekzJldEJmj58uUYN24cIiIiAAAJCQnYu3cvNmzYgFmzZlV5jEQigbu7e637KikpwaJFi5CYmIg7d+5ALperfX7t2rXaXwARERGRHmJyVY2ysjKUlZWp3kul0up37twZUCh0EBVR9Z6+R62srGBlZVVpv/LycqSkpKiNdzIzM0NISAiSk5OrPX9xcTGaNWsGuVyOzp07Y+HChWjfvv0z43rnnXdw9OhRjBkzBo0bN651xYuIiIjIUDC5qkZcXBzmzp0rdhhEGvPy8lJ7Hxsbizlz5lTa7+7du5DJZHBzc1Nrd3Nzw8WLF6s8d5s2bbBhwwb4+fmhsLAQH3/8MXr06IHz58+jSZMmNca1f/9+7N27Fz179qzdBREREREZGE5oUY3o6GgUFhaqXjk5OdXvnJEBdO+u3BKJJCcnR+2eFXImvu7duyMsLAwBAQHo06cPdu/eDRcXF6xdu/aZxzZo0AANGzYULBYiIiIifcXkqhpWVlZwcHBQe1WrpAQ4eVK5JRLJ0/drVY8EAoCzszPMzc2Rl5en1p6Xl6fxmCoLCwt06tQJV65ceea+H330EWJiYlBaWqrRuYmIiIgMFR8LJDIxlpaWCAwMRGJiIoYPHw4AkMvlSExMxKRJkzQ6h0wmw7lz5zB48OBn7rts2TJcvXoVbm5u8Pb2hoWFhdrnqamptb4GIiIiIn3E5IrIBEVFRSE8PBxBQUHo2rUr4uPjUVJSopo9MCwsDJ6enoiLiwMAzJs3D926dYOPjw8ePHiApUuXIisrC++8884z+3qSwBEREREZOyZXRCZo1KhRyM/PR0xMDHJzcxEQEIADBw6oJrnIzs6GmdlfTw3fv38f48aNQ25uLho0aIDAwECcOHEC7dq1e2ZfsbGxWrsOIiIiIn0iUSg4h7gmpFIpHB0dUVhYWHn8VUEBsG8fMHgwwIH7pGM13pt64sGDB9i1axeuXr2K6dOno2HDhkhNTYWbmxs8PT3FDo+IiIhIEKxcCaFhQ2D0aLGjINJLaWlpCAkJgaOjI65fv45x48ahYcOG2L17N7Kzs7FlyxaxQyQiIiISBGcLFEJ+PrB6tXJLRGqioqIwduxYXL58GdbW1qr2wYMH49dffxUxMiIiIiJhMbkSQk4OMGmScktEak6fPo133323Urunpydyc3NFiIiIiIhIO5hcEZFWWVlZQSqVVmq/dOkSXFxcRIiIiIiISDuYXBGRVg0dOhTz5s3D48ePAQASiQTZ2dmYOXMmRo4cKXJ0RERERMJhckVEWrVs2TIUFxfD1dUVDx8+RJ8+feDj4wN7e3ssWLBA7PCIiIiIBMPZAoVgbw+89JJyS0RqHB0dcejQISQlJSEtLQ3FxcXo3LkzQkJCxA6NiIiISFBc50pDhrCWEJkm3ptERERE+oGVKyHIZEBJCWBrC5ibix0NkehWrFih8b6TJ0/WYiREREREusPKlYZqrA6kpgKBgUBKCtC5szgBksnSx8pV8+bN1d7n5+ejtLQUTk5OAIAHDx6gfv36cHV1xbVr10SIkIiIiEh4nNCCiASXmZmpei1YsAABAQFIT09HQUEBCgoKkJ6ejs6dO+Ojjz4SO1QiIiIiwTC5IiKtmj17NlauXIk2bdqo2tq0aYNPPvkEH374oYiREREREQmLyRURadXt27dRUVFRqV0mkyEvL0+EiIiIiIi0g8kVEWnViy++iHfffRepqamqtpSUFIwfP57TsRMREZFRYXIlhI4dgTt3lFsiUrNhwwa4u7sjKCgIVlZWsLKyQteuXeHm5obPP/9c7PCIiIiIBMOp2IVgYQG4uIgdBZFecnFxwb59+3Dp0iVcvHgRANC2bVu0bt1a5MiIiIiIhMXkSghXrwKRkcAnnwAtW4odDZFeat26NRMqIiIiMmpMroRQWAjs2QPMmSN2JER6RyaTYdOmTUhMTMSdO3cgl8vVPv/5559FioyIiIhIWEyuiEirpkyZgk2bNuHll19Ghw4dIJFIxA6JiIiISCuYXBGRVm3fvh07d+7E4MGDxQ6FiIiISKs4WyARaZWlpSV8fHzEDoOIiIhI65hcCcHTE1i2TLklMhCrV6+Gt7c3rK2tERwcjFOnTml03Pbt2yGRSDB8+HCN9n///ffx6aefQqFQPEe0RERERPqPjwUKwc0NiIoSOwoije3YsQNRUVFISEhAcHAw4uPjMWDAAGRkZMDV1bXa465fv45p06ahd+/eGveVlJSEX375Bfv370f79u1hYWGh9vnu3bvrfB1ERERE+oSVKyHcvw98841yS2QAli9fjnHjxiEiIgLt2rVDQkIC6tevjw0bNlR7jEwmQ2hoKObOnYsWLVpo3JeTkxNGjBiBPn36wNnZGY6OjmovIiIiImPBypUQMjOB118HUlKABg3EjoaoRuXl5UhJSUF0dLSqzczMDCEhIUhOTq72uHnz5sHV1RVvv/02jh07pnF/GzdufK54iYiIiAwFk6tqlJWVoaysTPVeKpWKGA3Rsz19j1pZWcHKyqrSfnfv3oVMJoObm5tau5ubGy5evFjluZOSkvDFF1/gzJkzdYqtoqICR44cwdWrV/HWW2/B3t4et27dgoODA+zs7Op0TiIiIiJ9w8cCqxEXF6f26JKXl5fYIRHVyMvLS+2ejYuLE+S8RUVFGDNmDNavXw9nZ+daH5+VlYWOHTti2LBhmDhxIvLz8wEAixcvxrRp0wSJkYiIiEgfsHJVjejoaET9bZIKqVTKBIv0Wk5ODhwcHFTvq6paAYCzszPMzc2Rl5en1p6Xlwd3d/dK+1+9ehXXr1/HkCFDVG1yuRwAUK9ePWRkZKBly5bVxjVlyhQEBQXh7NmzaNSokap9xIgRGDdunGYXR0RERGQAmFxVo7pHqqpkYwN06qTcEonEwcFBLbmqjqWlJQIDA5GYmKiaTl0ulyMxMRGTJk2qtH/btm1x7tw5tbYPP/wQRUVF+PTTT5/5pcOxY8dw4sQJWFpaqrV7e3vj5s2bz4yXiIiIyFAwuRKCry+Qmip2FEQai4qKQnh4OIKCgtC1a1fEx8ejpKQEERERAICwsDB4enoiLi4O1tbW6NChg9rxTk5OAFCpvSpyuRwymaxS+40bN2Bvb//8F0NERESkJ5hcEZmgUaNGIT8/HzExMcjNzUVAQAAOHDigmuQiOzsbZmbCDMl86aWXEB8fj3Xr1gEAJBIJiouLERsbi8GDBwvSBxEREZE+kCgUCoXYQRgCqVQKR0dHFBYWVn706o8/gG7dgJMnlY8HEulQjfemHrhx4wYGDBgAhUKBy5cvIygoCJcvX4azszN+/fXXGhctJiIiIjIkrFwJQaEAysuVWyJS06RJE5w9exbbt29HWloaiouL8fbbbyM0NBQ2HKdIRERERoTJFRFpXb169TB69GixwyAiIiLSKiZXRKR1GRkZWLlyJdLT0wEAvr6+mDRpEtq2bStyZERERETC4SLCRKRV3377LTp06ICUlBT4+/vD398fqamp6NixI7799luxwyMiIiISDCe00FCNkwY8fAhcuwa0aMG1rkjn9H1Ci5YtWyI0NBTz5s1Ta4+NjcWXX36Jq1evihQZERERkbBYuRKCjQ3Qvj0TK6Iq3L59G2FhYZXaR48ejdu3b4sQEREREZF2MLkSQlYW8M47yi0Rqenbty+OHTtWqT0pKQm9e/cWISIiIiIi7eCEFkK4dw/44gtgwgSgWTOxoyHSK0OHDsXMmTORkpKCbt26AQBOnjyJb775BnPnzsWPP/6oti8RERGRoeKYKw3VOK4lNRUIDARSUoDOncUJkEyWvo+5MjPTrEAukUggk8m0HA0RERGR9rByRURaJZfLxQ6BiIiISCc45oqIdObRo0dih0BERESkNUyuhODmBsyapdwSkRqZTIaPPvoInp6esLOzw7Vr1wAAs2fPxhdffCFydERERETCYXIlBE9PIC5OuSUiNQsWLMCmTZuwZMkSWFpaqto7dOiAzz//XMTIiIiIiITF5EoIRUXAkSPKLRGp2bJlC9atW4fQ0FCYm5ur2v39/XHx4kURIyMiIiISFpMrIVy+DPTrp9wSkZqbN2/Cx8enUrtcLsfjx49FiIiIiIhIO5hcEZFWtWvXrspFhHft2oVOnTqJEBERERGRdnAqdiLSqpiYGISHh+PmzZuQy+XYvXs3MjIysGXLFvz0009ih0dEREQkGFauiEirhg0bhj179uDw4cOwtbVFTEwM0tPTsWfPHvTv31/s8IiIiIgEw8qVECwslDMFWliIHQmRXurduzcOHTokdhhEREREWsXkSggdOwI3bogdBRERERERiYjJFREJrkGDBpBIJBrtW1BQoOVoiIiIiHSDyZUQzp0DBg0C9u9XVrGITFx8fLzq53v37mH+/PkYMGAAunfvDgBITk7GwYMHMXv2bJEiJCIiIhIeJ7QQwuPHwM2byi2RgVi9ejW8vb1hbW2N4OBgnDp1qtp9d+/ejaCgIDg5OcHW1hYBAQHYunVrtfuHh4erXsePH8e8efPw9ddfY/LkyZg8eTK+/vprzJs3D0ePHtXGpRERERGJgskVkQnasWMHoqKiEBsbi9TUVPj7+2PAgAG4c+dOlfs3bNgQH3zwAZKTk5GWloaIiAhERETg4MGDz+zr4MGDGDhwYKX2gQMH4vDhw899LURERET6gskVkQlavnw5xo0bh4iICLRr1w4JCQmoX78+NmzYUOX+ffv2xYgRI+Dr64uWLVtiypQp8PPzQ1JS0jP7atSoEX744YdK7T/88AMaNWr03NdCREREpC845orIxJSXlyMlJQXR0dGqNjMzM4SEhCA5OfmZxysUCvz888/IyMjA4sWLn7n/3Llz8c477+DIkSMIDg4GAPz22284cOAA1q9fX/cLISIiItIzTK6qUVZWhrKyMtV7qVRa/c6tWgG//KLcEonk6XvUysoKVlZWlfa7e/cuZDIZ3Nzc1Nrd3Nxw8eLFas9fWFgIT09PlJWVwdzcHJ999plGiwCPHTsWvr6+WLFiBXbv3g0A8PX1RVJSkirZIiIiIjIGTK6qERcXh7lz52q2s7090LevVuMhehYvLy+197GxsZgzZ45g57e3t8eZM2dQXFyMxMREREVFoUWLFuirwb0fHByMbdu2CRYLERERkT5iclWN6OhoREVFqd5LpdJKf7yq3LwJrFoFTJoEeHrqKEIidTk5OXBwcFC9r6pqBQDOzs4wNzdHXl6eWnteXh7c3d2rPb+ZmRl8fHwAAAEBAUhPT0dcXJxGyRURERGRKeCEFtWwsrKCg4OD2qtaeXnAokXKLZFInr5fq0uuLC0tERgYiMTERFWbXC5HYmKiah0qTcjlcrVHZ4mIiIhMHStXRCYoKioK4eHhCAoKQteuXREfH4+SkhJEREQAAMLCwuDp6Ym4uDgAysdkg4KC0LJlS5SVlWHfvn3YunUr1qxZI+ZlEBEREekVJldEJmjUqFHIz89HTEwMcnNzERAQgAMHDqgmucjOzoaZ2V+F7ZKSEkyYMAE3btyAjY0N2rZtiy+//BKjRo0S6xKIiIiI9I5EoVAoxA7CEEilUjg6OqKwsLDyI4KpqUBgIJCSAnTuLE6AZLJqvDeJiIiISGdYuRJCo0bA228rt0SEV199VeN9n0zPTkRERGTomFwJoVkz4PPPxY6CSG84OjqKHQIRERGRzjG5EsLDh8C1a0CLFoCNjdjREIlu48aNYodAREREpHOcil0I6elAhw7KLRERERERmSRWrohI63bt2oWdO3ciOzsb5eXlap+lpqaKFBURERGRsFi5IiKtWrFiBSIiIuDm5oY//vgDXbt2RaNGjXDt2jUMGjRI7PCIiIiIBMPkioi06rPPPsO6deuwcuVKWFpaYsaMGTh06BAmT56MwsJCscMjIiIiEgyTKyFIJIClpXJLRGqys7PRo0cPAICNjQ2KiooAAGPGjMHXX38tZmhEREREgmJyJYROnYCyMuWWiNS4u7ujoKAAANC0aVOcPHkSAJCZmQmuYU5ERETGhMkVEWnVCy+8gB9//BEAEBERgcjISPTv3x+jRo3CiBEjRI6OiIiISDgSBb861ohUKoWjoyMKCwvh4OCg/mF6OhAaCmzbBvj6ihMgmawa7009IJfLIZfLUa+ecnLS7du348SJE2jVqhXeffddWFpaihwhERERkTA4FbsQHj4E/vhDuSUiNWZmZjAz+6tI/sYbb+CNN94QMSIiIiIi7WByRUSCS0tLQ4cOHWBmZoa0tLQa9/Xz89NRVERERETaxeSKiAQXEBCA3NxcuLq6IiAgABKJpMrJKyQSCWQymQgREhEREQmPyRURCS4zMxMuLi6qn4mIiIhMAZMrITRvDuzcqdwSEZo1a6b6OSsrCz169FBNaPFERUUFTpw4obYvERERkSHjbIEa0vcZ2ch06fu9aW5ujtu3b8PV1VWt/d69e3B1deVjgURERGQ0uM6VEPLygOXLlVsiUqNQKCCRSCq137t3D7a2tiJERERERKQdfCxQCDdvAu+/D/TtC7i5iR0NkV549dVXASgnrRg7diysrKxUn8lkMqSlpaFHjx5ihUdEREQkOCZXRKQVjo6OAJSVK3t7e9jY2Kg+s7S0RLdu3TBu3DixwiMiIiISHJMrItKKjRs3qqZfX7lyJezs7ESOiIiIiEi7OOaKiLRGoVBg27ZtuH37ttihEBEREWkdkyshODoCQ4Yot0QGYvXq1fD29oa1tTWCg4Nx6tSpavddv349evfujQYNGqBBgwYICQmpcf8nzMzM0KpVK9y7d0/I0ImIiIj0EpMrIbRsCfz4o3JLZAB27NiBqKgoxMbGIjU1Ff7+/hgwYADu3LlT5f5HjhzBm2++iV9++QXJycnw8vLCSy+9hJs3bz6zr0WLFmH69On4888/hb4MIiIiIr3Cda40VONaQo8fAw8eAE5OgIWFGOGRCavLOlfBwcHo0qULVq1aBQCQy+Xw8vLCe++9h1mzZj3zeJlMhgYNGmDVqlUICwurcd8GDRqgtLQUFRUVsLS0VJvYAgAKCgo0ipmIiIhI33FCCyGcOwcEBgIpKUDnzmJHQ1Sj8vJypKSkIDo6WtVmZmaGkJAQJCcna3SO0tJSPH78GA0bNnzmvvHx8XUNlYiIiMigMLmqRllZGcrKylTvpVKpiNEQPdvT96iVlZXa2lJP3L17FzKZDG5Prcnm5uaGixcvatTXzJkz4eHhgZCQkGfuGx4ertE5iYiIiAwdx1xVIy4uDo6OjqqXl5eX2CER1cjLy0vtno2Li9NKP4sWLcL27dvx3XffwdraulbHPnr0CFKpVO1FREREZCxYuapGdHQ0oqKiVO+lUmm1CdZnv1zBBAAzdp1F5qlHAIDqRrJV1VzdsLeq99X8vNXtXLvzVv6g2n2raK9uQF9thvpVfd5qru05Y6g2KgGu7Xl/7292bYoJfX2q6RXIyclRG3NVVdUKAJydnWFubo68vDy19ry8PLi7u1d7fgD4+OOPsWjRIhw+fBh+fn417vtESUkJZs6ciZ07d1Y5a6BMJtPoPERERET6jslVNap7pKoq1++VAADO35LivPy+NsMiE1ZY+rjGzx0cHDSa0MLS0hKBgYFITEzE8OHDASgntEhMTMSkSZOqPW7JkiVYsGABDh48iKCgII3jnjFjBn755ResWbMGY8aMwerVq3Hz5k2sXbsWixYt0vg8RERERPqOswVqqKYZ2ZIv3UHxvQeosKkPmJur2iWSqs5UZWOV+1a1p6Tqk1azr2ZtyuM1C6Caw6uMS9OYqutf099JdR887zmrvKZaHV9lq4b7Vd7T1cEank42lfary2yBO3bsQHh4ONauXYuuXbsiPj4eO3fuxMWLF+Hm5oawsDB4enqqHi1cvHgxYmJi8NVXX6Fnz56q89jZ2cHOzq7Gvpo2bYotW7agb9++cHBwQGpqKnx8fLB161Z8/fXX2Ldvn0YxExEREek7Vq4E0L21KwBXscMg0tioUaOQn5+PmJgY5ObmIiAgAAcOHFBNcpGdnQ0zs7+GZK5Zswbl5eV47bXX1M4TGxuLOXPm1NhXQUEBWrRoAUBZXXsy9XqvXr0wfvx4Aa+KiIiISFxMroRw+TIwaRKwahXQqpXY0RBpZNKkSdU+BnjkyBG199evX69zPy1atEBmZiaaNm2Ktm3bYufOnejatSv27NkDJyenOp+XiIiISN9wtkAhFBUB//2vcktEaiIiInD27FkAwKxZs7B69WpYW1sjMjIS06dPFzk6IiIiIuGwckVEWhUZGan6OSQkBBcvXkRKSgp8fHw0nnGQiIiIyBCwckVEWiGXy7F48WL07NkTXbp0waxZs/Dw4UM0a9YMr776qsknVt7e3oiPj9fb8xEREVHtMbkiIq1YsGAB/vOf/8DOzg6enp749NNPMXHiRLHDIiIiItIaJldC8PJSTmZRzSLDRKZoy5Yt+Oyzz3Dw4EF8//332LNnD7Zt2wa5XC52aERERERaweRKCC4uwMSJyi0RAVBO5z548GDV+5CQEEgkEty6dUsr/VX1WFxAQADmzJkDhUKBOXPmoGnTprCysoKHhwcmT56s2q+srAzTpk2Dp6cnbG1tERwcXGnGxOps2rQJTk5O+Omnn9CmTRvUr18fr732GkpLS7F582Z4e3ujQYMGmDx5MmQyWbXnyc7OxrBhw2BnZwcHBwe8/vrryMvLU9tnz5496NKlC6ytreHs7IwRI0ZUe77PP/8cTk5OSExM1Og6iIiI6PlxQgshFBQA+/YBgwcDDRuKHQ2RXqioqIC1tbVam4WFBR4/fqzzWL799lt88skn2L59O9q3b4/c3FzVDIaAclr6CxcuYPv27fDw8MB3332HgQMH4ty5c2ilwfIKpaWlWLFiBbZv346ioiK8+uqrGDFiBJycnLBv3z5cu3YNI0eORM+ePTFq1KhKx8vlclVidfToUVRUVGDixIkYNWqUKsnbu3cvRowYgQ8++ABbtmxBeXl5tQswL1myBEuWLMF///tfdO3atW6/NCIiIqo1JldCuH4dGDMGSElhckX0PwqFAmPHjoWVlZWq7dGjR/j3v/8NW1tbVdvu3bu1Hkt2djbc3d0REhICCwsLNG3aVJV0ZGdnY+PGjcjOzoaHhwcAYNq0aThw4AA2btyIhQsXPvP8jx8/xpo1a9CyZUsAwGuvvYatW7ciLy8PdnZ2aNeuHfr164dffvmlyuQqMTER586dQ2ZmJrz+93jxli1b0L59e5w+fRpdunTBggUL8MYbb2Du3Lmq4/z9/Suda+bMmdi6dSuOHj2K9u3b1/6XRURERHXG5IqItCI8PLxS2+jRo0WIBPjnP/+J+Ph4tGjRAgMHDsTgwYMxZMgQ1KtXD+fOnYNMJkPr1q3VjikrK0OjRo00On/9+vVViRUAuLm5wdvbG3Z2dmptd+7cqfL49PR0eHl5qRIrAGjXrh2cnJyQnp6OLl264MyZMxg3blyNcSxbtgwlJSX4/fff0aJFC41iJyIiIuEwuSIirdi4caNO+zMzM4NCoVBre/IIopeXFzIyMnD48GEcOnQIEyZMwNKlS3H06FEUFxfD3NwcKSkpMDc3Vzv+78lRTSwsLNTeSySSKtueZzIPGxubZ+7Tu3dv7N27Fzt37sSsWbPq3BcRERHVDSe0ICKj4OLigtu3b6veS6VSZGZmqt7b2NhgyJAhWLFiBY4cOYLk5GScO3cOnTp1gkwmw507d+Dj46P2cnd310nsvr6+yMnJQU5OjqrtwoULePDgAdq1awcA8PPze+bkFF27dsX+/fuxcOFCfPzxx1qNmYiIiCpj5UoItrZAt27KLRGJ4oUXXsCmTZswZMgQODk5ISYmRlWJ2rRpE2QyGYKDg1G/fn18+eWXsLGxQbNmzdCoUSOEhoYiLCwMy5YtQ6dOnZCfn4/ExET4+fnh5Zdf1nrsISEh6NixI0JDQxEfH4+KigpMmDABffr0QVBQEAAgNjYWL774Ilq2bIk33ngDFRUV2LdvH2bOnKl2rh49emDfvn0YNGgQ6tWrh6lTp2o9fiIiIlJi5UoIbdoAycnKLRGJIjo6Gn369MErr7yCl19+GcOHD1eNg3JycsL69evRs2dP+Pn54fDhw9izZ49qTNXGjRsRFhaG999/H23atMHw4cNx+vRpNG3aVCexSyQS/PDDD2jQoAH+8Y9/ICQkBC1atMCOHTtU+/Tt2xfffPMNfvzxRwQEBOCFF17AqVOnqjxfr169sHfvXnz44YdYuXKlTq6BiIiIAIni6UEKVCWpVApHR0cUFhbCwcFB7HCIVHhvEhEREekHVq6EkJoKSCTKLRERERERmSQmV0RENRg0aBDs7OyqfGmyBhYRERGZDk5oQURUg88//xwPHz6s8rOGXDSciIiI/obJFRFRDTw9PcUOgYiIiAwEHwskIiIiIiISAJMrIbRrB1y+rNwSEZHGNm3aBCcnJ7HDICIiEgSTKyFYWwM+PsotkR6TSCQ1vubMmSN2iGSgxo4dW+U9deXKFbFDIyIi0hmOuRJCZiYwezbw0UdA8+ZiR0NUrdu3b6t+3rFjB2JiYpCRkaFqs7OzU/2sUCggk8lQrx7/M0GaGThwIDZu3KjW5uLiIlI0REREusfKlRDu3we2bVNuifSYu7u76uXo6AiJRKJ6f/HiRdjb22P//v0IDAyElZUVkpKSMHbsWAwfPlztPFOnTkXfvn1V7+VyOeLi4tC8eXPY2NjA398fu3bt0u3FkeisrKzU7jF3d3d8+umn6NixI2xtbeHl5YUJEyaguLi42nOcPXsW/fr1g729PRwcHBAYGIjff/9d9XlSUhJ69+4NGxsbeHl5YfLkySgpKdHF5RERET0TkysiUjNr1iwsWrQI6enp8PPz0+iYuLg4bNmyBQkJCTh//jwiIyMxevRoHD16VMvRkr4zMzPDihUrcP78eWzevBk///wzZsyYUe3+oaGhaNKkCU6fPo2UlBTMmjULFhYWAICrV69i4MCBGDlyJNLS0rBjxw4kJSVh0qRJurocIiKiGvF5HyJSM2/ePPTv31/j/cvKyrBw4UIcPnwY3bt3BwC0aNECSUlJWLt2Lfr06aOtUEnP/PTTT2qPlg4aNAjffPON6r23tzfmz5+Pf//73/jss8+qPEd2djamT5+Otm3bAgBatWql+iwuLg6hoaGYOnWq6rMVK1agT58+WLNmDaw57pWIiETG5EpDCoUCACCVSit/+OQRl+JioKrPibToyT355B59XkFBQbXa/8qVKygtLa2UkJWXl6NTp06CxESGoV+/flizZo3qva2tLQ4fPoy4uDhcvHgRUqkUFRUVePToEUpLS1G/fv1K54iKisI777yDrVu3IiQkBP/85z/RsmVLAMpHBtPS0rBt2zbV/gqFAnK5HJmZmfD19dX+RRIREdWAyZWGioqKAABeXl7V78Rv6ElERUVFcHR0fO7z2Nraqr03MzOrlLg9fvxY9fOT8TN79+6ttOCulZXVc8dDhsPW1hY+Pj6q99evX8crr7yC8ePHY8GCBWjYsCGSkpLw9ttvo7y8vMrkas6cOXjrrbewd+9e7N+/H7Gxsdi+fTtGjBiB4uJivPvuu5g8eXKl45o2barVayMiItIEkysNeXh4ICcnB/b29pBIJGKHQ6SiUChQVFQEDw8PrZzfxcUFf/75p1rbmTNnVONg2rVrBysrK2RnZ/MRQFKTkpICuVyOZcuWwcxMOcR3586dzzyudevWaN26NSIjI/Hmm29i48aNGDFiBDp37owLFy6oJXBERET6hMmVhszMzNCkSROxwyCqkhAVq+q88MILWLp0KbZs2YLu3bvjyy+/xJ9//ql65M/e3h7Tpk1DZGQk5HI5evXqhcLCQhw/fhwODg4IDw/XWmyk33x8fPD48WOsXLkSQ4YMwfHjx5GQkFDt/g8fPsT06dPx2muvoXnz5rhx4wZOnz6NkSNHAgBmzpyJbt26YdKkSXjnnXdga2uLCxcu4NChQ1i1apWuLouIiKhanC2QiGo0YMAAzJ49GzNmzECXLl1QVFSEsLAwtX0++ugjzJ49G3FxcfD19cXAgQOxd+9eNOe6bybN398fy5cvx+LFi9GhQwds27YNcXFx1e5vbm6Oe/fuISwsDK1bt8brr7+OQYMGYe7cuQAAPz8/HD16FJcuXULv3r3RqVMnxMTEaK1qS0REVFsShVCj4ImIiIiIiEwYK1dEREREREQCYHJFREREREQkACZXREREREREAmByRUREREREJAAmV0RERERERAJgckVERERERCQAJldEVKOysjLMmTMHZWVlYodCRor3GBERGQuuc0VENZJKpXB0dERhYSEcHBzEDoeMEO8xIiIyFqxcERERERERCYDJFRERERERkQDqiR2AoZDL5bh16xbs7e0hkUjEDodIRaFQoKioCB4eHjAzq9v3JTXd31KpVG1LJLSa7jEh7m8iIiJd4ZgrDd24cQNeXl5ih0FUrZycHDRp0qROx/L+Jn1X2/t79erVWLp0KXJzc+Hv74+VK1eia9eu1e7/4MEDfPDBB9i9ezcKCgrQrFkzxMfHY/DgwUKET0REJoKVKw3Z29urvffZGv1c5ysrsHmu45/F4p72/6e1uqvd89e/p/283yb/sdb7sMot1tq5Ey98rPr56Xu0Np4cm5OTU3lCgZwcID4emDoVYAJGOiaVSuHl5VWr+3vHjh2IiopCQkICgoODER8fjwEDBiAjIwOurq6V9i8vL0f//v3h6uqKXbt2wdPTE1lZWXBychLwSoiIyBSwcqWhJ7NZAUDTL2fBxub5kqNH97SbXAGAxV3tJljW+Vo9PQCg/l3t357187SbYFndLtLauW/cuIHz9zcDwHPNtFbjbG2pqUBgIJCSAnTu/LwhE9VKXWYSDA4ORpcuXbBq1SoAysdevby88N5772HWrFmV9k9ISMDSpUtx8eJFWFhYCBo/ERGZFj7AXks+W6OfO7ECAOtGDwWIRlyPXMSOwDCUNa57RelZmjRpghfbTav1cWVlZZBKpWovIn329P1a3ZpY5eXlSElJQUhIiKrNzMwMISEhSE5OrvKYH3/8Ed27d8fEiRPh5uaGDh06YOHChZDJZFq5FiIiMl5MrozYY+cKsUN4bqXO2p88pNTN9L6pjouLg6Ojo+rF8Vak77y8vNTu2bi4uCr3u3v3LmQyGdzc3NTa3dzckJubW+Ux165dw65duyCTybBv3z7Mnj0by5Ytw/z58wW/DiIiMm4ccyUi60YPdfJ4oDY9ctH+44GlzhKdPB6oTWWN7bX6eGBtRUdHIyoqSvX+ybgWIn319HhAKysrwc4tl8vh6uqKdevWwdzcHIGBgbh58yaWLl2K2NhYwfohIiLjx+TKyD12rtD62CtjUOpmofWxV/rEyspK8z9OXV2ByEjllkgkDg4OGo25cnZ2hrm5OfLy8tTa8/Ly4O7uXuUxjRs3hoWFBczNzVVtvr6+yM3NRXl5OSwtLZ8veCIiMhl8LFBkHHulGV08Hqht2hx7pVVNmgDLlyu3RHrO0tISgYGBSExMVLXJ5XIkJiaie/fuVR7Ts2dPXLlyBXK5XNV26dIlNG7cmIkVERHVCpMrE2AMY690wRTHXmmkuBhITlZuiQxAVFQU1q9fj82bNyM9PR3jx49HSUkJIiIiAABhYWGIjv5rOY3x48ejoKAAU6ZMwaVLl7B3714sXLgQEydOFOsSiIjIQPF5MT3AsVea4dgrkVy6BPTowanYyWCMGjUK+fn5iImJQW5uLgICAnDgwAHVJBfZ2dkwM/vru0UvLy8cPHgQkZGR8PPzg6enJ6ZMmYKZM2eKdQlERGSgmFyZCF2MvdJFgqVtpjb2ishYTZo0CZMmTarysyNHjlRq6969O06ePKnlqIiIyNjxsUA9YQxjr3SBY6+IiIiISF8xuTIhuhh7ZQwLC+ti7BUTLCIiIiLjw+RKj7B6pRljqF4ZlHr1AGdn5ZaIiIiIqsXkysSweqUZVq/+xs8PyM9XbomIiIioWkyu9IwuqlfGMDU7q1dEREREpG+YXNWSd6MCsUMwCKxeacYgqlfnzwM+PsotEREREVWLyZUeYvVKM7qoXnFhYQBlZcDVq8otEREREVWLyVUdNHe+J3YIBsEYqle6YBDVKyKqUUVFBQ4fPoy1a9eiqEi5UPitW7dQXFwscmRERKRLnP5LT1k3eohH92y02ocuFhbWtlJnCerfVWi3Dy4sTEQ1yMrKwsCBA5GdnY2ysjL0798f9vb2WLx4McrKypCQkCB2iEREpCOsXNWRLqpXxjA1O6tXmmH1ishwTZkyBUFBQbh//z5sbP76UmzEiBFITEwUMTIiItI1wy5b0HNj9UrDPky5euXjAxw4oNwSUSXHjh3DiRMnYGlpqdbu7e2NmzdvihQVERGJgZWr58DqlWZ0Ub0yhqnZ9bZ65eAADBig3BJRJXK5HDKZrFL7jRs3YG+vp/+uiYhIK5hckVHMHKgLJjtz4O3bwJw5yi0RVfLSSy8hPj5e9V4ikaC4uBixsbEYPHiweIEREZHOMbl6TqxeaYbVK83oZfXq9m1g7lwmV0TVWLZsGY4fP4527drh0aNHeOutt1SPBC5evFjs8IiISIcMe7CNnmjufA+ZdxuJHcZzMYaxV7pg0mOviKhKTZo0wdmzZ7Fjxw6cPXsWxcXFePvttxEaGqo2wQURERk//jVtIHQxNbu2PXIBrPO124cuJrfQtrLG9rC6XSR2GERUC/Xq1UNoaChCQ0PFDoWIiETExwIFYgwLC+ti7JUxTM1usmOviKhKcXFx2LBhQ6X2DRs28LFAIiITw+TKgBjD2Ctd4NgrgTVoAISGKrdEVMnatWvRtm3bSu3t27fnAsJERCaGyZWAWL3SDKtXBqZ5c+DLL5VbIqokNzcXjRs3rtTu4uKC25wIhojIpDC5MjCsXmmG1SsBPXoEXLmi3BJRJV5eXjh+/Hil9uPHj8PDw0OEiIiISCxMrgTG6pVmWL3SjF4kWBcuAK1aKbdEVMm4ceMwdepUbNy4EVlZWcjKysKGDRsQGRmJcePGiR0eERHpEGcLNEDGMHOgLhjDzIFEpP+mT5+Oe/fuYcKECSgvLwcAWFtbY+bMmYiOjhY5OiIi0iVWrrSA1SvNsHqlGb2oXhEZmNWrV8Pb2xvW1tYIDg7GqVOnqt1306ZNkEgkai9ra2uN+5JIJFi8eDHy8/Nx8uRJnD17FgUFBYiJiRHiUoiIyIAwuTJQuhh7pYsES9uMYewVEdXOjh07EBUVhdjYWKSmpsLf3x8DBgzAnTt3qj3GwcEBt2/fVr2ysrJq3a+dnR26dOmCDh06wMrK6nkugYiIDBSTKy0xhuqVLrB6pRlWr4g0t3z5cowbNw4RERFo164dEhISUL9+/SrXonpCIpHA3d1d9XJzc9O4v5KSEsyePRs9evSAj48PWrRoofYiIiLTwTFXBkwXY68eO1fA4q5h3ya6GHtV6maB+nmPtdqHkMrKylBWVqZ6L5VKq9+5c2dAwbFrJK6n71ErK6sqq0Pl5eVISUlRG+tkZmaGkJAQJCcnV3v+4uJiNGvWDHK5HJ07d8bChQvRvn17jWJ75513cPToUYwZMwaNGzeGRMKKORGRqTLsv5r1XHPne8i820jsMPTeIxfAOl/sKPRfWWN7WN0uEuRccXFxmDt3riDnItIFLy8vtfexsbGYM2dOpf3u3r0LmUxWqfLk5uaGixcvVnnuNm3aYMOGDfDz80NhYSE+/vhj9OjRA+fPn0eTJk2eGdv+/fuxd+9e9OzZU/MLIiIio8THAg0cx15pRhdjrwxpYeHo6GgUFhaqXjk5OdXvnJEBdO+u3BKJJCcnR+2eFXIWvu7duyMsLAwBAQHo06cPdu/eDRcXF6xdu1aj4xs0aICGDRsKFg8RERkuJldapouxV8awsLAxjL3SBaHGXllZWcHBwUHtVa2SEuDkSeWWSCRP36/VTRjh7OwMc3Nz5OXlqbXn5eXB3d1do74sLCzQqVMnXLlyRaP9P/roI8TExKC0tFSj/YmIyHgxuSKNsHqlYR8GVL0iMkaWlpYIDAxEYmKiqk0ulyMxMRHdu3fX6BwymQznzp1D48aNNdp/2bJlOHjwINzc3NCxY0d07txZ7UVERKaDY650QBdjr4xhYWFdjL0yhoWFhRx7RWSMoqKiEB4ejqCgIHTt2hXx8fEoKSlBREQEACAsLAyenp6Ii4sDAMybNw/dunWDj48PHjx4gKVLlyIrKwvvvPOORv0NHz5cW5dCREQGhskVacwYZg7UBUObOZDI2IwaNQr5+fmIiYlBbm4uAgICcODAAdUkF9nZ2TAz++vBjfv372PcuHHIzc1FgwYNEBgYiBMnTqBdu3Ya9RcbG6uV6yAiIsMjUSg4x7ImpFIpHB0dEbLvXdSzrdvikLqYOVDb1StdJFe6mDlQ29UrXSRXT6pXFbIyJF74GIWFhTWPnarBk/u7ynMUFAD79gGDBwMctE86VuO9qUcePHiAXbt24erVq5g+fToaNmyI1NRUuLm5wdPTU+zwiIhIR1iG0CFjmJqd1SvNGFX1qmFDYPRosaMg0ltpaWkICQmBo6Mjrl+/jnHjxqFhw4bYvXs3srOzsWXLFrFDJCIiHeGEFkaGMwdqRheTW2ibUDMHPlN+PrB6tXJLRJVERUVh7NixuHz5MqytrVXtgwcPxq+//ipiZEREpGtMrnRMF1Oza5suZg40hqnZjWbmwJwcYNIk5ZaIKjl9+jTefffdSu2enp7Izc0VISIiIhILkysjZAzVK11g9YqIhGBlZQWpVFqp/dKlS3BxMYJvioiISGNMrkTA6pVmWL0iIkMwdOhQzJs3D48fK8dZSiQSZGdnY+bMmRg5cqTI0RERkS4xuTJSrF5pxiiqV+52YodAZNKWLVuG4uJiuLq64uHDh+jTpw98fHxgb2+PBQsWiB0eERHpEKd9EwlnDtSMLhYW1jaDnznQ3h546SXllogqcXR0xKFDh5CUlIS0tDQUFxejc+fOCAkJETs0IiLSMZNJrn755Rf069evys9Wr16NiRMn6jgi7bNu9FDr614Zw9Tspc4Sra97ZdBatQIOHhQ7CiK916tXL/Tq1UvsMIiISESG/VdxLbz66qs4fPgwAgMD1do//fRTzJ49W5TkyhiqV7rA6pXIZDKgpASwtQXMzcWOhkgvrFixQuN9J0+erMVIiIhIn5hMcrV06VIMGjQIv/76K9q2bQtA+Zz8vHnzsHfvXpGj0x5WrzTD6lUNzp4FAgOBlBSgc2exoyHSC5988ona+/z8fJSWlsLJyQkA8ODBA9SvXx+urq5MroiITIhh/0VcC++88w4KCgoQEhKCpKQk7NixAwsXLsS+ffvQs2dP0eJi9UozrF4RkT7JzMxU/fzVV1/hs88+wxdffIE2bdoAADIyMjBu3Lgq178iIiLjZTLJFQDMmDED9+7dQ1BQEGQyGQ4ePIhu3bqJHZbWsXqlGV1Ur5hgERmf2bNnY9euXarECgDatGmDTz75BK+99hpCQ0NFjI6IiHTJsP8afoaqnon39PRE/fr18Y9//AOnTp3CqVOnAIj7TDyrV5oxhuoVERmf27dvo6Ki8tp/MpkMeXl5IkRERERiMerk6uln4p8wNzfH8ePHcfz4cQDKBR+N/Zl4Vq80w+oVEdXWiy++iHfffReff/45Ov9vXGJKSgrGjx/P6diJiEyMYf8l/Ax/fyZe3+mieqWLBEvbWL0SQceOwJ07wP8G6hORug0bNiA8PBxBQUGwsLAAAFRUVGDAgAH4/PPPRY6OiIh0yaiTK9I9Vq807MOQqlcWFoCLi9hREOktFxcX7Nu3D5cuXcLFixcBAG3btkXr1q1FjoyIiHTNTOwAdGXkyJFYvHhxpfYlS5bgn//8pwgRVdbc+Z7W+7Bu9FDrfWjbIx38nV/qLNF+J4bi6lVg6FDlloiq1bp1awwdOhRDhw5lYkVEZKIMu8RQC7/++ivmzJlTqX3QoEFYtmyZ7gMyYsZQvdIFg6leFRYCe/YAVfz7ISLlxBWbNm1CYmIi7ty5A7lcrvb5zz//LFJkRESkaybzF3BxcTEsLS0rtVtYWEAqlYoQUdU49kozuhh7xYWFiUgTU6ZMwaZNm/Dyyy+jQ4cOkEhY+SYiMlUmk1x17NgRO3bsQExMjFr79u3b0a5dO43P084hF5dkzYQOT40xTM3O6pVmDKZ6RUTV2r59O3bu3InBgweLHQoREYnMZP76nT17Nl599VVcvXoVL7zwAgAgMTERX3/9Nb755huRo9M9Vq80w+oVET2LpaUlfHx8xA6DiIj0gMlMaDFkyBB8//33uHLlCiZMmID3338fN27cwOHDhzF8+PBanauj4y3tBPk3upjcQtseO1deVFNoupjcQttK3SzEDqFmnp7AsmXKLRFV8v777+PTTz+FQsEvYoiITJ3JVK4A4OWXX8bLL78sdhh6wxiqV7pg8tUrNzcgKkrsKIhqZfXq1Vi6dClyc3Ph7++PlStXomvXrs88bvv27XjzzTcxbNgwfP/99xr1lZSUhF9++QX79+9H+/btVWtdPbF79+66XAIRERkgk0quACAlJQXp6ekAgPbt26NTp051Ok9Hx1s4V+ghZGiVcOyVZoxhYWG9Hnt1/z5w+DAQEgI0aCB2NETPtGPHDkRFRSEhIQHBwcGIj4/HgAEDkJGRAVdX12qPu379OqZNm4bevXvXqj8nJyeMGDHiecMmIiIjYDLJ1Z07d/DGG2/gyJEjcHJyAgA8ePAA/fr1w/bt2+FSh0VSdZFgaRurV5ox6epVZibw+utASgqTKzIIy5cvx7hx4xAREQEASEhIwN69e7FhwwbMmjWrymNkMhlCQ0Mxd+5cHDt2DA8ePNC4v40bNwoRNhERGQGTGXP13nvvoaioCOfPn0dBQQEKCgrw559/QiqVYvLkyWKHVy2OvdIMx17VTllZGaRSqdqLSJ89fb+WlZVVuV95eTlSUlIQEhKiajMzM0NISAiSk5OrPf+8efPg6uqKt99+u07xVVRU4PDhw1i7di2KiooAALdu3UJxcXGdzkdERIbJZJKrAwcO4LPPPoOvr6+qrV27dli9ejX2799f5/PqYnILbbNu9FDrfegiwdK2UmfjWbsmLi4Ojo6OqpeXl5fYIRHVyMvLS+2ejYuLq3K/u3fvQiaTwc3NTa3dzc0Nubm5VR6TlJSEL774AuvXr69TbFlZWejYsSOGDRuGiRMnIj9f+Zzy4sWLMW3atDqdk4iIDJPJJFdyubzSIGNAuYiwXC4XISLNGUP1ShdYvdJcdHQ0CgsLVa+cnByd9EtUVzk5OWr3bHR0tCDnLSoqwpgxY7B+/Xo4OzvX6RxTpkxBUFAQ7t+/Dxubvx6zHjFiBBITEwWJk4iIDIPJjLl64YUXMGXKFHz99dfw8FCOk7p58yYiIyPx4osvPte5OfZKM8awsLCxjL2ysrKClZWVZjvb2ACdOim3RCJxcHCAg4PDM/dzdnaGubk58vLy1Nrz8vLg7u5eaf+rV6/i+vXrGDJkiKrtyRdu9erVQ0ZGBlq2bFljn8eOHcOJEydgaWmp1u7t7Y2bN28+M2YiIjIeJlO5WrVqFaRSKby9vdGyZUu0bNkSzZs3h1QqxcqVK8UO75lYvdIMq1da4OsLpKYqt0R6ztLSEoGBgWoVI7lcjsTERHTv3r3S/m3btsW5c+dw5swZ1Wvo0KHo168fzpw5o9Ejs3K5HDKZrFL7jRs3YG9v/3wXREREBsWwywi14OXlhdTUVBw+fBgXL14EAPj6+qoNen4erF5phtUrDfvQ56nZifRcVFQUwsPDERQUhK5duyI+Ph4lJSWq2QPDwsLg6emJuLg4WFtbo0OHDmrHP5lR9un26rz00kuIj4/HunXrAAASiQTFxcWIjY3F4MGDhbswIiLSe4b9V24tSSQS9O/fH/379xc7lDrRxbpXxjA1uzGse6VX/vgD6NYNOHlS+XggkZ4bNWoU8vPzERMTg9zcXAQEBODAgQOqSS6ys7NhZibcgxvLli3DgAED0K5dOzx69AhvvfUWLl++DGdnZ3z99deC9UNERPpPolAoDH8ASTVWrFih8b7Pmo5dKpXC0dERk5OGwcqu+se2tF290sWiwrpIrrRdvdJFcqWLsVeaVK8qKh4h6ec5KCws1GhMSlWe3N9VniM1FQgMVK5z1blznc5PVFc13pt6pKKiAtu3b0daWhqKi4vRuXNnhIaGqk1wQURExs+oK1effPKJRvtJJBK9Xuvq71i90gyrV0SkS/Xq1cPo0aPFDoOIiERm1MlVZmamzvs0hrFXusCxVxr2wbFXRAYhIyMDK1euRHp6OgDlmN5Jkyahbdu2IkdGRES6ZDKzBRoTXcwcqIuFhbVNFzMHGtPCwkRUN99++y06dOiAlJQU+Pv7w9/fH6mpqejYsSO+/fZbscMjIiIdMuzSQS0oFArs2rULv/zyC+7cuVNp4eDdu3cL1herV5oxhuqVLohevfL1Bf78E2jRQrwYiPTYjBkzEB0djXnz5qm1x8bGYsaMGRg5cqRIkRERka6ZTOVq6tSpGDNmDDIzM2FnZwdHR0e1l6Fh9UozrF4JwMYGaN+eiwgTVeP27dsICwur1D569Gjcvn1bhIiIiEgsJlM22Lp1K3bv3q2zNUd0Ub3SxeQW2sbqlWZErV5lZQEffQTMng00ayZODER6rG/fvjh27Bh8fHzU2pOSktC7d2+RoiIiIjGYzF+1jo6OaMHHmmqNMwdqRheTW4jm3j3giy+ACROYXBFVYejQoZg5cyZSUlLQrVs3AMDJkyfxzTffYO7cufjxxx/V9iUiIuNl1Otc/d3mzZtx4MABbNiwoU7rjmi6ztXTdDH2StvVK2NY9wrQfoIl1rpXXOeKjJkhrHOl6YLEEokEMplMy9EQEZGYTKZy9frrr+Prr7+Gq6srvL29YWGhniClpqaKFJn+M4bqlS4YdfWKiKr19ARJRERkukwmuQoPD0dKSgpGjx4NNzc3SCS6mYSAY680o4uxV8awsLDoMwcSUY0ePXoEa2trscMgIiKRmExytXfvXhw8eBC9evXSed/GMDU7q1eaMcrqlZsbMGuWcktElchkMixcuBAJCQnIy8vDpUuX0KJFC8yePRve3t54++23xQ6RiIh0xGSmYvfy8tLb5/WFoIup2bXtsXOF1vvQxdTs2lbqpvmYP0F4egJxccotEVWyYMECbNq0CUuWLIGlpaWqvUOHDvj8889FjIyIiHTNZJKrZcuWYcaMGbh+/boo/Xd0vCVKv0LSxbpXukiwtM3o1r0qKgKOHFFuiaiSLVu2YN26dQgNDYW5ubmq3d/fHxcvXhQxMiIi0jWTeSxw9OjRKC0tRcuWLVG/fv1KE1oUFBSIFJlwjGHslS5w7FUtXb4M9OvH2QKJqnHz5s1Ka1wByokuHj/mGEkiIlNiMslVfHy82CFw7JWGjGFhYaMce0VEVWrXrh2OHTuGZk+tA7dr1y506tRJpKiIiEgMhv0XbC2Eh4drtN+iRYvw73//G05OTtoNSEtYvdIMq1dEJJSYmBiEh4fj5s2bkMvl2L17NzIyMrBlyxb89NNPYodHREQ6ZDJjrjS1cOFCrT4iyLFXmuHYKw370PXkFkRUybBhw7Bnzx4cPnwYtra2iImJQXp6Ovbs2YP+/fuLHR4REemQyVSuNKVQGP6jXLqoXhnD1OzGUL3SCQsL5UyBFkzkiKrTu3dvHDp0SOwwiIhIZKxcicAYqle6wOqVZh66aDnp6dgRuHFDuSUiIiKiarFyZaRYvdIMq1dEVBcNGjSARKLZlyfGMBstERFphsmVSIxh5kBd4MyBeuDcOWDQIGD/flaviP7n7zPQ3rt3D/Pnz8eAAQPQvXt3AEBycjIOHjyI2bNnixQhERGJgY8FGrHmzve03ocuJrfQtkcu2u/DoBcWfvwYuHlTuSUyEKtXr4a3tzesra0RHByMU6dOVbvv7t27ERQUBCcnJ9ja2iIgIABbt26t8fzh4eGq1/HjxzFv3jx8/fXXmDx5MiZPnoyvv/4a8+bNw9GjR4W+NCIi0mNMrp7Su3dv2Njo5lE3XYy90kWCpW3GMPaKiHRnx44diIqKQmxsLFJTU+Hv748BAwbgzp07Ve7fsGFDfPDBB0hOTkZaWhoiIiIQERGBgwcPatTfwYMHMXDgwErtAwcOxOHDh5/rWoiIyLCYVHJ19epVfPjhh3jzzTdV/ye7f/9+nD9/XrXPvn370LhxY7FCNEisXmnGoKtXRAZk+fLlGDduHCIiItCuXTskJCSgfv362LBhQ5X79+3bFyNGjICvry9atmyJKVOmwM/PD0lJSRr116hRI/zwww+V2n/44Qc0asR1B4mITInJJFdHjx5Fx44d8dtvv2H37t0oLi4GAJw9exaxsbGixcXqlWZYvSIiTZSXlyMlJQUhISGqNjMzM4SEhCA5OfmZxysUCiQmJiIjIwP/+Mc/NOpz7ty5mDlzJoYMGYL58+dj/vz5GDJkCGbNmoW5c+fW+VqIiMjwmExyNWvWLMyfPx+HDh2CpaWlqv2FF17AyZMnRYzMOLB6pRl9qV6VlZVBKpWqvarVqhXwyy/KLZFInr5fy8rKqtzv7t27kMlkcHNzU2t3c3NDbm5utecvLCyEnZ0dLC0t8fLLL2PlypUaLwA8duxYHD9+HA4ODti9ezd2794NBwcHJCUlYezYsRpfIxERGT7DnoatFs6dO4evvvqqUrurqyvu3r0rQkR/0cXMgbqYml3bdDFzoKlMzR4XF6f5N+r29kDfvlqNh+hZvLy81N7HxsZizpw5gp3f3t4eZ86cQXFxMRITExEVFYUWLVqgr4b3fnBwMLZt2yZYPEREZJhMJrlycnLC7du30bx5c7X2P/74A56eniJFZVyMYd0rXdCHqdmjo6MRFRWlei+VSiv98apy8yawahUwaRLAfyskkpycHDg4OKjeW1lZVbmfs7MzzM3NkZeXp9ael5cHd3f3as9vZmYGHx8fAEBAQADS09MRFxencXJFREQEmNBjgW+88QZmzpyJ3NxcSCQSyOVyHD9+HNOmTUNYWJjY4XHslYZ0MfZKF48His3KygoODg5qr2rl5QGLFim3RCJ5+n6tLrmytLREYGAgEhMTVW1yuRyJiYmqNag0IZfLq330kIiIqDomk1wtXLgQbdu2hZeXF4qLi9GuXTv84x//QI8ePfDhhx+KHR4A3SRY2mYMY690QV/GXhEZo6ioKKxfvx6bN29Geno6xo8fj5KSEkRERAAAwsLCEB0drdo/Li4Ohw4dwrVr15Ceno5ly5Zh69atGD16tFiXQEREBspkHgu0tLTE+vXrERMTg3PnzqG4uBidOnVCKxMbpM+xV5oxlbFXRMZo1KhRyM/PR0xMDHJzcxEQEIADBw6oJrnIzs6Gmdlf3y2WlJRgwoQJuHHjBmxsbNC2bVt8+eWXGDVqlFiXQEREBkqiUCjEHfwhEplMhnPnzqFZs2Zo0KDBM/eXSqVwdHTE5KRhsLKz0Gps2p7cQhfJlS7GXmk7wdJFciXE2CtZ+SOk7PgAhYWFNT/eV4Mn93eV50hNBQIDgZQUoHPn546XqDZqvDeJiIj0jMlUrqZOnYqOHTvi7bffhkwmQ58+fXDixAnUr18fP/30k0kNWjaG6pUusHr1P40aAW+/rdwSEQDg1Vdf1Xjf3bt3azESIiLSJyYz5mrXrl3w9/cHAOzZswfXrl3DxYsXERkZiQ8++EDj83SxvaatEFU49kozxrCwsEGMvWrWDPj8c+WWiAAAjo6OGr+IiMh0mEzl6u7du6ppePft24fXX38drVu3xv/93//h008/rdW5utldwcliH22EqTOsXmmG1SsADx8C164BLVoANpxqnwgANm7cKHYIRESkh0ymcuXm5oYLFy5AJpPhwIED6N+/PwCgtLQU5ubmIkdXGatXmmH1SgfS04EOHZRbIiIiIqqWyVSuIiIi8Prrr6Nx48aQSCQICQkBAPz2229o27Ztrc/H6pVmjGFhYVaviOhZdu3ahZ07dyI7Oxvl5eVqn6WmpooUFRER6ZrJVK7mzJmDL774Av/6179w/Phx1QKU5ubmauud6BNjqF7pAqtXRCSmFStWICIiAm5ubvjjjz/QtWtXNGrUCNeuXcOgQYPEDo+IiHTIZCpX8+bNU/28YcMGtc+ysrIwdOjQWp+T1SvNsHpFRMbss88+w7p16/Dmm29i06ZNmDFjBlq0aIGYmBgUFBSIHR4REemQySRX3333ndr7x48fIzMzE/Xq1UPLli0RExMjUmQ16+h4S+vrXhkDXSwsrG2lzhJB1r0SnEQCWFoqt0RUSXZ2Nnr06AEAsLGxQVFREQBgzJgx6NatG1atWiVmeEREpEOG/ddoLfzxxx+V2qRSKcaOHYsRI0bU+bysXmmG1SvN6GWC1akTUFYmdhREesvd3R0FBQVo1qwZmjZtipMnT8Lf3x+ZmZlQKPTs3zMREWmVyYy5qoqDgwPmzp2L2bNnix1KjXQx9qq58z2t96FtxjD2iogMzwsvvIAff/wRgHLypMjISPTv3x+jRo16ri/viIjI8JhM5ao6hYWFKCwsfK5zGEP1ShdYvdKM3lWv0tOB0FBg2zbA11fsaIj0zrp16yCXywEAEydORKNGjXDixAkMHToU7777rsjRERGRLplMcrVixQq19wqFArdv38bWrVsNYjYnXYy9MoaFhY1h7JXeefgQ+OMP5ZaIKjEzM4OZ2V8Pgrzxxht44403RIyIiIjEYjJ/hX7yySdq783MzODi4oLw8HBBpmJn9UozrF5pRu+qV0SkJi0tDR06dICZmRnS0tJq3NfPz09HURERkdhMJrnKzMwUO4TnxuqVZnRRveLU7ESmLSAgALm5uXB1dUVAQAAkEkmVk1dIJBLIZDIRIiQiIjGYTHKlC6xeacYYqle6wOoVkf7KzMyEi4uL6mciIiLAxGcLNEScOVAzupg58JGL1rvQD82bAzt3KrdEBABo1qwZJP9b+y0rKwuenp5o1qyZ2svT0xNZWVkiR0pERLrE5Epg3eyuaL0PXSRY2mbdiJMjaKLUWQ8W7m3QAPjnP5VbIqqkX79+KCgoqNReWFiIfv36iRARERGJhckVVYnVK82YRPUqLw9Yvly5JaJKFAqFqor1d/fu3YOtra0IERERkVg45koLdDH2SheTW2ibLsZeGcPU7KKPvbp5E3j/faBvX8DNTbw4iPTMq6++CkA5acXYsWNhZWWl+kwmkyEtLQ09evQQKzwiIhKBYf/VSVplDDMH6gJnDiQyTY6OjgCUlSt7e3vY2Pz1ZZGlpSW6deuGcePGiRUeERGJgMmVlrB6pRlWrzQjevWKiCrZuHGjavr1lStXws7OTuSIiIhIbBxzpUW6mNxC24xh7JUumMTYKyKqRKFQYNu2bbh9+7bYoRARkR5gcmXgOHOgZnQxuYW2iTZzoKMjMGSIcktkIFavXg1vb29YW1sjODgYp06dqnbf9evXo3fv3mjQoAEaNGiAkJCQGvf/OzMzM7Rq1Qr37vGLKCIiYnKldaxeacYYpmY32upVy5bAjz8qt0QGYMeOHYiKikJsbCxSU1Ph7++PAQMG4M6dO1Xuf+TIEbz55pv45ZdfkJycDC8vL7z00ku4efOmRv0tWrQI06dPx59//inkZRARkQGSKJ48ME41kkqlcHR0xNY/OqK+vXmtjtX22CsAWh97pYuJLbQ99gqA1sde6WJii6fHXsnKHyFlxwcoLCyEg4NDnc755P6u8hyPHwMPHgBOToCFRd2CJqqjGu/NagQHB6NLly5YtWoVAEAul8PLywvvvfceZs2a9czjZTIZGjRogFWrViEsLOyZ+zdo0AClpaWoqKiApaWl2sQWAKpcA4uIiIyTYY/yNxC6mNxC23Qxc6AuJrfQNl3MHKjzyS3OnQMCA4GUFKBzZ931S1QH5eXlSElJQXR0tKrNzMwMISEhSE5O1ugcpaWlePz4MRo2bKjR/vHx8XUJlYiIjBCTKyNhDDMH6oIxzBwohLKyMpSVlaneS6VSEaMheran71ErKyu1daWeuHv3LmQyGdyeWpPNzc0NFy9e1KivmTNnwsPDAyEhIRrtHx4ertF+RERk/DjmSkc49kozHHulmeed3CIuLg6Ojo6ql5eXl0CREWmHl5eX2j0bFxenlX4WLVqE7du347vvvoO1tXWtj3/06BGkUqnai4iITAe/wjciuqheGcPCwqxeAdHR0YiKilK9l0ql1SZYRzLuoC+AlT9fxq3rNY250uxRxdqM8tR0X4U2+tZ0P4FjrE3nmseo4e9Hw/Mpz6nhfhqer627PSb2q/7x6ZycHLUxV1VVrQDA2dkZ5ubmyMvLU2vPy8uDu7t7jTF8/PHHWLRoEQ4fPgw/Pz8NIwdKSkowc+ZM7Ny5s8pZA2UymcbnIiIiw2baf2HqmDGMvdIFjr3SzPOMvarukaqqpN+Woi+AA3/m4vxdLpJK2vGg1LnG5MrBwUGjCS0sLS0RGBiIxMREDB8+HIByQovExERMmjSp2uOWLFmCBQsW4ODBgwgKCqpV7DNmzMAvv/yCNWvWYMyYMVi9ejVu3ryJtWvXYtGiRbU6FxERGTYmV0aG1SvNsHqlubYDeyPBJxUDrW0wwLzmmTI1eVhRouETjRJNdxSyTw2uQPNzabifBjtqEpem59KUpr9/oa7Tw0m4L1SioqIQHh6OoKAgdO3aFfHx8SgpKUFERAQAICwsDJ6enqpHCxcvXoyYmBh89dVX8Pb2Rm5uLgDAzs4OdnbP/kJhz5492LJlC/r27YuIiAj07t0bPj4+aNasGbZt24bQ0FDBro2IiPQb/7rUMVavNMPqlWZKnSWw0vI60v3aNUa/do212wmRgEaNGoX8/HzExMQgNzcXAQEBOHDggGqSi+zsbJiZ/TXkeM2aNSgvL8drr72mdp7Y2FjMmTPnmf0VFBSgRYsWAJQVtidTr/fq1Qvjx48X6KqIiMgQMLkyQqxeaUYX1StdJFhad/kyMGkSsGoV0KqV2NEQaWTSpEnVPgZ45MgRtffXr19/rr5atGiBzMxMNG3aFG3btsXOnTvRtWtX7NmzB05OTs91biIiMiycLVAEupg5sKOjlssZOmAMMwfqQmkjAZ8Fq0pREfDf/yq3RFRJREQEzp49CwCYNWsWVq9eDWtra0RGRmL69OkiR0dERLrEyhXVGatXmjGK6hURVSsyMlL1c0hICC5evIiUlBT4+PjUatZBIiIyfKxciYTVK82wekVE+koul2Px4sXo2bMnunTpglmzZuHhw4do1qwZXn31VSZWREQmiMkVPRddLCysbY+dK7Tehy4WFiYi3VqwYAH+85//wM7ODp6envj0008xceJEscOqtTlz5iAgIEDsMFSuX78OiUSCM2fOiB0KEVGtMbkSEatXmtFF9UoXCZbB8vJSTmZRzSLDRKZqy5Yt+Oyzz3Dw4EF8//332LNnD7Zt2wa5XC52aNWSSCT4/vvvxQ6DiMhoMbmi52YM1StdMNjqlYsLMHGicktEKtnZ2Rg8eLDqfUhICCQSCW7dMvwvtZ6lvLxc7BCIiPQSkyuRsXqlGVavRFRQAHz5pXJLRCoVFRWwtrZWa7OwsMDjx4+12m/fvn0xefJkzJgxAw0bNoS7u7tG63F5e3sDAEaMGAGJRKJ6/8TWrVvh7e0NR0dHvPHGGyj62wyhffv2xaRJkzB16lQ4OztjwIABAIA///wTgwYNgp2dHdzc3DBmzBjcvXtXddyBAwfQq1cvODk5oVGjRnjllVdw9epVtX5PnTqFTp06wdraGkFBQfjjjz/UPr9//z5CQ0Ph4uICGxsbtGrVChs3bqzFb4yISHeYXOkBXSRY2sbqlWYMsnp1/TowZoxyS0QqCoUCY8eOxauvvqp6PXr0CP/+97/V2rRh8+bNsLW1xW+//YYlS5Zg3rx5OHToUI3HnD59GgCwceNG3L59W/UeAK5evYrvv/8eP/30E3766SccPXoUixYtqtSnpaUljh8/joSEBDx48AAvvPACOnXqhN9//x0HDhxAXl4eXn/9ddUxJSUliIqKwu+//47ExESYmZlhxIgRqkcni4uL8corr6Bdu3ZISUnBnDlzMG3aNLV+Z8+ejQsXLmD//v1IT0/HmjVr4Ozs/Fy/PyIibeFU7CZCFwsLa5t1o4d4dM9Gq33oYmp2IjIO4eHhldpGjx6tk779/PwQGxsLAGjVqhVWrVqFxMRE9O/fv9pjXP73aK+TkxPc3d3VPpPL5di0aRPs7e0BAGPGjEFiYiIWLFig2qdVq1ZYsmSJ6v38+fPRqVMnLFy4UNW2YcMGeHl54dKlS2jdujVGjhyp1s+GDRvg4uKCCxcuoEOHDvjqq68gl8vxxRdfwNraGu3bt8eNGzcwfvx41THZ2dno1KkTgoKCAKBSxY2ISJ/wr0g90c3uCk4W+4gdxnPRxbpXukiwtI3rXhEZBzEfTXt6mvfGjRvjzp07dT6ft7e3KrGq7nyBgYFq78+ePYtffvkFdnZ2lc539epVtG7dGpcvX0ZMTAx+++033L17V1Wxys7ORocOHZCeng4/Pz+1xyu7d++udq7x48dj5MiRSE1NxUsvvYThw4ejR48edb5WIiJtYnJlQoyheqULrF4Rkb6zsLBQey+RSJ5rlkJNzmdra6v2vri4GEOGDMHixYsrna9x48YAgCFDhqBZs2ZYv349PDw8IJfL0aFDh1pNiDFo0CBkZWVh3759OHToEF588UVMnDgRH3/8scbnICLSFY650iMce6UZY1hY2KDGXtnaAt26KbdEZNAsLCwgk8kEOVfnzp1x/vx5eHt7w8fHR+1la2uLe/fuISMjAx9++CFefPFF+Pr64v79+2rn8PX1RVpaGh49eqRqO3nyZKW+XFxcEB4eji+//BLx8fFYt26dINdARCQ0JlcmxhhmDtQFzhz4N23aAMnJyi0RGTRvb28kJiYiNze3UqJTWxMnTkRBQQHefPNNnD59GlevXsXBgwcREREBmUyGBg0aoFGjRli3bh2uXLmCn3/+GVFRUWrneOuttyCRSDBu3DhcuHAB+/btq1SRiomJwQ8//IArV67g/Pnz+Omnn+Dr6/tcsRMRaQuTKz3D6pVmWL0iIqq9ZcuW4dChQ/Dy8kKnTp2e61weHh44fvw4ZDIZXnrpJXTs2BFTp06Fk5MTzMzMYGZmhu3btyMlJQUdOnRAZGQkli5dqnYOOzs77NmzB+fOnUOnTp3wwQcfVHrM0NLSEtHR0fDz88M//vEPmJubY/v27c8VOxGRtkgUCoVC7CAMgVQqhaOjIy6mu+G8RWOt9qWLiS10MfZK25Nb6GJiC22PvRJiYgtZ2SOkf/YfFBYWwsHBoU7neHJ/V3mO1FQgMBBISQE6d37+gIlqocZ7k4iISM+wcqWHjKF6pQusXhERERGRPmFyVQfdrPPEDuG56WLslTEsLKyLsVdMsIhICNu2bYOdnV2Vr/bt24sdHhGRSeB803rKGNa90gVjWPeKiEgIQ4cORXBwcJWfPT3VOhERaQeTqzrqZp2Hk4/cxA7juehi3StdLCysbbpY94oLCxPR87K3t1dbCJiIiHSPjwXqMV2MvTKGqdmNYeyVXmvXDrh8WbklIiIiomoxuXoOxjD2Shc49kozejv2ytoa8PFRbolMRFxcHLp06QJ7e3u4urpi+PDhyMjI0HkcixYtgkQiwdSpU3XS382bNzF69Gg0atQINjY26NixI37//Xet9imTyTB79mw0b94cNjY2aNmyJT766CNoYzLjX3/9FUOGDIGHhwckEgm+//57tc8VCgViYmLQuHFj2NjYICQkBJcvXxY8DiIyXkyuasn8hm4Xl2X1SjOsXmlRZiYwerRyS2Qijh49iokTJ+LkyZM4dOgQHj9+jJdeegklJSU6i+H06dNYu3Yt/Pz8dNLf/fv30bNnT1hYWGD//v24cOECli1bhgYNGmi138WLF2PNmjVYtWoV0tPTsXjxYixZsgQrV64UvK+SkhL4+/tj9erVVX6+ZMkSrFixAgkJCfjtt99ga2uLAQMG4NGjR4LHQkTGiWOuasnqv2WAr6XqvTGMvdIFjr3SjF6Ovbp/H9i2DYiKApo3FzsaIp04cOCA2vtNmzbB1dUVKSkp+Mc//qH1/ouLixEaGor169dj/vz5Wu8PUCY5Xl5e2Lhxo6qtuQ7+zZ84cQLDhg3Dyy+/DADw9vbG119/jVOnTgne16BBgzBo0KAqP1MoFIiPj8eHH36IYcOGAQC2bNkCNzc3fP/993jjjTcEj4eIjA8rV7Vk81/df3vF6pVmdFG90sXjgUSkfwoLCwEADRs21El/EydOxMsvv4yQkBCd9AcAP/74I4KCgvDPf/4Trq6u6NSpE9avX6/1fnv06IHExERcunQJAHD27FkkJSVVmwRpS2ZmJnJzc9V+546OjggODkZycrJOYyEiw8XKVS3VO1uBijG34eD41/iTF4JK8POo5oBEImJk+s8Yqle6UJvqVX5+PvK+jNNuQEQmTi6XY+rUqejZsyc6dOig9f62b9+O1NRUnD59Wut9/d21a9ewZs0aREVF4T//+Q9Onz6NyZMnw9LSEuHh4Vrrd9asWZBKpWjbti3Mzc0hk8mwYMEChIaGaq3PquTm5gIA3NzUn0Zxc3NTfUZE9CxMrjT0ZGBtMQCHX4AKPIJCAhT/qz6Kh1mjtFiu1f79kIHTJS202kdr8yxckLprtQ9ZqXYrfxY2j1BWoN11r8psAYt72v2nIyvTbL+/J1bPM/j7ybFSqbTyh8XFf22r+pxIi57ck9qY3EBTEydOxJ9//omkpCSt95WTk4MpU6bg0KFDsNbxJDJyuRxBQUFYuHAhAKBTp074888/kZCQoNXkaufOndi2bRu++uortG/fHmfOnMHUqVPh4eGh1X6JiLSByZWGioqKAABef29UAFhbqnzhjg6iOKeDPshQFRUVwdHRsc7HAoCXl1f1O/XpU6dzEwnhee7v5zFp0iT89NNP+PXXX9GkSROt95eSkoI7d+6gc+fOqjaZTIZff/0Vq1atQllZGczNzbXSd+PGjdHuqSUXfH198e2332qlvyemT5+OWbNmqcY0dezYEVlZWYiLi9NpcuXurvxyMS8vD40bN1a15+XlISAgQGdxEJFhY3KlIQ8PD+Tk5MDe3h4SPv5HekShUKCoqAgeHnVfEJr3N+krIe7vuvb73nvv4bvvvsORI0d0MrEDALz44os4d079i7SIiAi0bdsWM2fO1FpiBQA9e/asNN38pUuX0KxZM631CQClpaUwM1MfAm5ubg65XLtPhDytefPmcHd3R2JioiqZkkql+O233zB+/HidxkJEhovJlYbMzMx08q0lUV087zf6vL9Jn4lRsZo4cSK++uor/PDDD7C3t1eNuXF0dISNjfYePba3t680rsvW1haNGjXS+nivyMhI9OjRAwsXLsTrr7+OU6dOYd26dVi3bp1W+x0yZAgWLFiApk2bon379vjjjz+wfPly/N///Z/gfRUXF+PKlb8micrMzMSZM2fQsGFDNG3aFFOnTsX8+fPRqlUrNG/eHLNnz4aHhweGDx8ueCxEZJwkCjEfZCciItJD1VVwN27ciLFjx+o0lr59+yIgIADx8fFa7+unn35CdHQ0Ll++jObNmyMqKgrjxo3Tap9FRUWYPXs2vvvuO9y5cwceHh548803ERMTA0tLy2efoBaOHDmCfv36VWoPDw/Hpk2boFAoEBsbi3Xr1uHBgwfo1asXPvvsM7Ru3VrQOIjIeDG5IiIiIiIiEgDXuSIiIiIiIhIAkysiIiIiIiIBMLkiIiIiIiISAJMrIiIiIiIiATC5IiIiIiIiEgCTKyIiIiIiIgEwuSIiIiIiIhIAkysiIqJqlJWVYc6cOSgrKzOZvnnNRER1x0WEiYiIqiGVSuHo6IjCwkI4ODiYRN+8Zt1eMxEZF1auiIiIiIiIBMDkioiIiIiISAD1xA7AUMjlcty6dQv29vaQSCRih0OkolAoUFRUBA8PD5iZ1e37Et7fpK/Evr+lUqnaVpfE6pvXrDt1vb9Xr16NpUuXIjc3F/7+/li5ciW6du1a7f4PHjzABx98gN27d6OgoADNmjVDfHw8Bg8eLMRlENHfcMyVhm7cuAEvLy+xwyCqVk5ODpo0aVKnY3l/k77j/U3GrDb3944dOxAWFoaEhAQEBwcjPj4e33zzDTIyMuDq6lpp//LycvTs2ROurq74z3/+A09PT2RlZcHJyQn+/v5CXwqRyWNypaHCwkI4OTkBAGxtgeTTlf8DRqRrfu3uqH5+8OABHB0d63Sev9/fAJB2gfc3iU/o+zsnJ4eTFWjizBmgTx/g6FEgIEDsaIyaVCqFl5dXre7v4OBgdOnSBatWrQKgrMx6eXnhvffew6xZsyrtn5CQgKVLl+LixYuwsLAQNH4iqoyPBWroyaMktrbAH+cawcKCw9VIfGkXbOHXrgQAnutxvr8fe+GSI2xseH+T+C5cckS71oUAhLm/HRwcmFxpws7ury1/Xzqh6f1dXl6OlJQUREdHq9rMzMwQEhKC5OTkKo/58ccf0b17d0ycOBE//PADXFxc8NZbb2HmzJkwNzcXJH4i+guTq1pKPu3KxIr0hr29/f8SrDvP3lkDaRdcmViR3rCxsUHaBata399lZWVq6xWJMXaIqDaevketrKxgZWVVab+7d+9CJpPBzc1Nrd3NzQ0XL16s8tzXrl3Dzz//jNDQUOzbtw9XrlzBhAkT8PjxY8TGxgp3EUQEgLMFEhGRkYmLi4Ojo6PqxfFWpO+8vLzU7tm4uDjBzi2Xy+Hq6op169YhMDAQo0aNwgcffICEhATB+iCiv7ByRURERiU6OhpRUVGq90/GtZCGXF2ByEjllnTi6fGAVVWtAMDZ2Rnm5ubIy8tTa8/Ly4O7u3uVxzRu3BgWFhZqjwD6+voiNzcX5eXlsLS0FOAKiOgJJldERGRUqnukasjKY6hnbQsAaGRnhZVvdoKbg7Wuw9N/TZoAy5eLHYVJ0XQ8oKWlJQIDA5GYmIjhw4cDUFamEhMTMWnSpCqP6dmzJ7766ivI5XLVdO+XLl1C48aNmVgRaQEfCyQiIpOQebcUV/NLcDW/BKcyC/DLRWHGKhqd4mIgOVm5Jb0TFRWF9evXY/PmzUhPT8f48eNRUlKCiIgIAEBYWJjahBfjx49HQUEBpkyZgkuXLmHv3r1YuHAhJk6cKNYlEBk1Vq6IiMgkbBrbBbb29lj1yxUcu3wXRY8qxA5JP126BPToAaSkAJ07ix0NPWXUqFHIz89HTEwMcnNzERAQgAMHDqgmucjOzlZbkNjLywsHDx5EZGQk/Pz84OnpiSlTpmDmzJliXQKRUWNyRUREJiGoeUM4ODhgT9otHLsMFJUxuSLDNGnSpGofAzxy5Eiltu7du+PkyZNajoqIAD4WSEREJsbOSrmQajErV0REJDAmV0REZFLsrZUPbRSXPRY5EiIiMjZMroiIyKT8lVyxclWlevUAZ2flloiIaoXJVS1FTn4AuVwudhhEAIDy8nIE+gs341n3Lnfw+DG/zSf9IJPJMHLYXcHPa2elTBo4oUU1/PyA/HzlloiIaoXJVS0lHi7HhHcLxQ6DCADQ0bcAQuZCJSVAZ797wp2Q6Dm8MrAAly8L/2UWkysiItIWJld1kHaW3+yTfigvF/6cXNqG9MX16zKtnNeOjwXW7Px5wMdHuSUiolphclUHfv4WYodABACwtBT+nHZ2wp+TqC68vc21cl57zhZYs7Iy4OpV5ZaIiGqFyVUtvRhiic/WOoodBhEA4Fx6Q1gImOvb2gKpaY2EOyHRc/jpQEO0aiX8/01xQgui2qmoqMDhw4exdu1aFBUVAQBu3bqFYj7qQFQJk6ta+mSFk9rK50RisrS0RMpZV8HOl3zaFRZCZmtEz8Hc3Bzf/uAs+Hn//ligTK4Q/PxExiQrKwsdO3bEsGHDMHHiROTn5wMAFi9ejGnTpokcHZH+YZZAREQm5cmEFgBQUs7qFVFNpkyZgqCgINy/fx82Njaq9hEjRiAxMVHEyIj0ExexICIik2JVzwwW5hI8lilQ/KgCDtas1qrx8QEOHFBuyeQdO3YMJ06cgOVTg3y9vb1x8+ZNkaIi0l+sXBERkUmRSCSq6hXHXVXBwQEYMEC5JZMnl8shk1WeufPGjRuwt7cXISIi/cbkioiITM6TcVdc66oKt28Dc+Yot2TyXnrpJcTHx6veSyQSFBcXIzY2FoMHDxYvMCI9xeSKiIhMjmo6dlauKrt9G5g7l8kVAQCWLVuG48ePo127dnj06BHeeust1SOBixcvFjs8Ir3DMVdERGRy/qpccVF4opo0adIEZ8+exY4dO3D27FkUFxfj7bffRmhoqNoEF0SkxOSKiIhMjv2TMVd8LJDomerVq4fQ0FCEhoaKHQqR3jOKxwJ//fVXDBkyBB4eHpBIJPj+++/VPlcoFIiJiUHjxo1hY2ODkJAQXL58WZxgiYhIdHZcSJhII3FxcdiwYUOl9g0bNvCxQKIqGEVyVVJSAn9/f6xevbrKz5csWYIVK1YgISEBv/32G2xtbTFgwAA8evRIx5ESEZE+eDJbICe0qEKDBkBoqHJLJm/t2rVo27Ztpfb27dsjISFBhIiI9JtRJFeDBg3C/PnzMWLEiEqfKRQKxMfH48MPP8SwYcPg5+eHLVu24NatW5UqXJqInPwAcrlcgKiJnp9MJsPIYXcFOx/vb9InQt/ff2dvzQktqtW8OfDll8otmbzc3Fw0bty4UruLiwtuc9ITokqMIrmqSWZmJnJzcxESEqJqc3R0RHBwMJKTk6s9rqysDFKpVO0FAImHyzHh3UKtx02kiVcGFuDy5donQ7y/yRDU9f7WhL01x1xV69Ej4MoV5ZZMnpeXF44fP16p/fjx4/Dw8BAhIiL9ZvTJVW5uLgDAzc1Nrd3NzU31WVXi4uLg6Oioenl5eak+SzvL2aVIP1y/XnlhR03w/iZDUNf7WxOqxwLLeL9XcuEC0KqVcksmb9y4cZg6dSo2btyIrKwsZGVlYcOGDYiMjMS4cePEDo9I73C2wGpER0cjKipK9V4qlar+APXztxArLCI13t7muHix9n+A8v4mQ1DX+1sTHHNFpJnp06fj3r17mDBhAsrLywEA1tbWmDlzJqKjo0WOjkj/GH3lyt3dHQCQl5en1p6Xl6f6rCpWVlZwcHBQewHAiyGW+Gyto/YCJqqFnw40RKtWtf9nzPubDEFd729NcLZAMmSrV6+Gt7c3rK2tERwcjFOnTlW776ZNmyCRSNRe1tbWGvclkUiwePFi5Ofn4+TJkzh79iwKCgoQExMjxKUQGR2jT66aN28Od3d3JCYmqtqkUil+++03dO/evdbn+2SFE8zMjP7XRgbC3Nwc3/7gLNj5eH+TPhH6/v47rnNFhmrHjh2IiopCbGwsUlNT4e/vjwEDBuDOnTvVHuPg4IDbt2+rXllZWbXu187ODl26dEGHDh1gZWX1PJdAZNSM4rHA4uJiXLlyRfU+MzMTZ86cQcOGDdG0aVNMnToV8+fPR6tWrdC8eXPMnj0bHh4eGD58uHhBExGRaDhbIBmq5cuXY9y4cYiIiAAAJCQkYO/evdiwYQNmzZpV5TESiaTGp3VqUlJSgkWLFiExMRF37typNKPstWvX6nReImNlFMnV77//jn79+qnePxlLEh4ejk2bNmHGjBkoKSnBv/71Lzx48AC9evXCgQMHalUWJyIiw1BWVoaysjLV+yezYf6dHWcLrF7nzoBCIXYUJuXpe9TKyqrK6lB5eTlSUlLUxjqZmZkhJCSkxhmQi4uL0axZM8jlcnTu3BkLFy5E+/btNYrtnXfewdGjRzFmzBg0btwYEolEw6siMk1GkVz17dsXihr+j0AikWDevHmYN2+eDqMiIiIxxMXFYe7cuTXu82RCi+LyCsjlCpiZ8Q9GEs/fZ2wFgNjYWMyZM6fSfnfv3oVMJqtyBuSLFy9Wee42bdpgw4YN8PPzQ2FhIT7++GP06NED58+fR5MmTZ4Z2/79+7F371707NlT8wsiMmEcXEFEREYlOjoahYWFqldOTk6lfZ6sc6VQACXlrF6pycgAundXbkkncnJy1O5ZIWfh6969O8LCwhAQEIA+ffpg9+7dcHFxwdq1azU6vkGDBmjYsKFg8RAZOyZXRERkVKqbDVNtn3pmqPe/ahXHXT2lpAQ4eVK5JZ14+n6tbsIIZ2dnmJub13oG5L+zsLBAp06d1Maq1+Sjjz5CTEwMSktLNdqfyNQxuSIiIpMjkUg47ooMjqWlJQIDA9VmQJbL5UhMTNR4BmSZTIZz586hcePGGu2/bNkyHDx4EG5ubujYsSM6d+6s9iIidUYx5oqIiKi27K3r4UHpYxSxckUGJCoqCuHh4QgKCkLXrl0RHx+PkpIS1eyBYWFh8PT0RFxcHABg3rx56NatG3x8fPDgwQMsXboUWVlZeOeddzTqjzMrE9UOkysiIjJJdlYWAB6iiJUrMiCjRo1Cfn4+YmJikJubi4CAABw4cEA1yUV2drbaeoX379/HuHHjkJubiwYNGiAwMBAnTpxAu3btNOovNjZWK9dBZKyYXBERkUniQsLV8PYGtm5VbkkvTZo0CZMmTarysyNHjqi9/+STT/DJJ588V38PHjzArl27cPXqVUyfPh0NGzZEamoq3Nzc4Onp+VznJjI2HHNVS9u2lNY47TuRLslkMowcdlew840cppzml0gfKBQKbNuivUH0qjFXZY+11odBatgQGD1auSWTl5aWhtatW2Px4sX4+OOP8eDBAwDA7t27BZ3VkMhYMLmqpcWLirFpA2fMIf3wysACXL4sF+x8ly/L8crAAsHOR/Q8Nm0oxeJFxVo7/5O1rvhY4FPy84HVq5VbMnlRUVEYO3YsLl++DGtra1X74MGD8euvv4oYGZF+YnJVBym/81tO0g/XrwtfZdLGOYnq4nct/7f2r8oVkys1OTnApEnKLZm806dP4913363U7unpidzcXBEiItJvTK7qIDDIQuwQiAAA3t7mBnFOoroI0vJ/a+05FTvRM1lZWUEqlVZqv3TpElxcXESIiEi/MbmqpZmz7DD2/+qLHQYRAOCnAw3RqpVw/4xbtTLDTwc4zoL0w9j/q4+Zs+y0dn57PhZI9ExDhw7FvHnz8PixspIskUiQnZ2NmTNnYuTIkSJHR6R/mFzVUmhYfUgkErHDIAIAmJub49sfnAU737c/OMPcnJUr0g8SiQShYdr7MuvJmCs+FkhUvWXLlqG4uBiurq54+PAh+vTpAx8fH9jb22PBggVih0ekdzgVOxERmSQ7a+Vjh1xE+Cn29sBLLym3ZPIcHR1x6NAhJCUlIS0tDcXFxejcuTNCQkLEDo1ILzG5IiIik6SqXD3iJEVqWrUCDh4UOwrSM7169UKvXr3EDoNI7zG5IiIik+TA2QKrJpMBJSWArS3Ax4RN0ooVKzTed/LkyVqMhMjwMLmqpSMPW8DGnL820h8PH1YAuCPIuXh/k74R8v5+mh1nC6za2bNAYCCQkgJ07ix2NCSCTz75RO19fn4+SktL4eTkBAB48OAB6tevD1dXVyZXRE8RdUKLzZs3Y+/evar3M2bMgJOTE3r06IGsrCwRIyMiImPHRYSJqpaZmal6LViwAAEBAUhPT0dBQQEKCgqQnp6Ozp0746OPPhI7VCK9I2pytXDhQtjY2AAAkpOTsXr1aixZsgTOzs6IjIwUMzQiIjJyqspVeQXkcoXI0RDpp9mzZ2PlypVo06aNqq1Nmzb45JNP8OGHH4oYGZF+EvX5n5ycHPj4+AAAvv/+e4wcORL/+te/0LNnT/Tt21fM0IiIyMjZWylnC1QogNLHMlUli4j+cvv2bVRUVK7uymQy5OXliRARkX4TtXJlZ2eHe/fuAQD++9//on///gAAa2trPHz4ULB+ZDIZZs+ejebNm8PGxgYtW7bERx99BIWC31QSEZkqawszmJsp1y3kuCuiqr344ot49913kZqaqmpLSUnB+PHjOR07URVE/Zquf//+eOedd9CpUydcunQJgwcPBgCcP38e3t7egvWzePFirFmzBps3b0b79u3x+++/IyIiAo6OjhyISURkoiQSCeyt6+FB6WMUlz0GYC12SPqhY0fgzh3gf5MXkGnbsGEDwsPDERQUBAsLZbW3oqICAwYMwOeffy5ydET6R9TkavXq1fjwww+Rk5ODb7/9Fo0aNQKg/EbkzTffFKyfEydOYNiwYXj55ZcBAN7e3vj6669x6tQpwfogEoNMJkPc62cFO9+Rr25j4LgmkEgkgp2TqK7kcjk+fz9Dq33YWSmTK05q8TcWFoCLi9hRkJ5wcXHBvn37cOnSJVy8eBEA0LZtW7Ru3VrkyIj0k6jJlZOTE1atWlWpfe7cuYL206NHD6xbtw6XLl1C69atcfbsWSQlJWH58uXVHlNWVoaysjLVe6lUKmhMREL4aPhZ3L5a+0doq7u/dy/LgoWVGULCPQWLkaiuEt67iLQj97XaB2cMrMLVq0BkJPDJJ0DLlmJHQ3qidevWTKiINKDz5CotLU3jff38/ATpc9asWZBKpWjbti3Mzc0hk8mwYMEChIaGVntMXFyc4EkekdDuZD+q03E13d9X/yhCSPjzREUkjOt/Fmu9D3suJFxZYSGwZw8wZ47YkZAekMlk2LRpExITE3Hnzh3I5XK1z3/++WeRIiPSTzpPrgICAiCRSKBQKJ756JFMJhOkz507d2Lbtm346quv0L59e5w5cwZTp06Fh4cHwsOr/isyOjoaUVFRqvdSqRReXl6CxEMkFNem1rh5qbTWx9V0f7fsZC9YfETPw7uDHe7nFmi1jyeVK05oQVS1KVOmYNOmTXj55ZfRoUMHPjZO9Aw6T64yMzNVP//xxx+YNm0apk+fju7duwNQrne1bNkyLFmyRLA+p0+fjlmzZuGNN94AAHTs2BFZWVmIi4urNrmysrKClZWVYDEQacPs7/0xd8iZWj8aWN39/er7zfBimIdQ4RE9l3+vbIvV49O1+mignbVygH4RK1dEVdq+fTt27typmnSMiGqm8+SqWbNmqp//+c9/YsWKFWr/YP38/ODl5YXZs2dj+PDhgvRZWloKMzP1WefNzc0rlbaJDI25uTmid/pjcuBJQc7X963G/FaS9IaZmRneWdZGsPu7KqrHAlm5IqqSpaWlak1SIno2Ude5OnfuHJo3b16pvXnz5rhw4YJg/QwZMgQLFizA3r17cf36dXz33XdYvnw5RowYIVgfRERkeOxVE1o8FjkSPeLpCSxbptySyXv//ffx6aefcm1QIg2Jmlz5+voiLi4O5eXlqrby8nLExcXB19dXsH5WrlyJ1157DRMmTICvry+mTZuGd999Fx999JFgfRARkeFRjbniY4F/cXMDoqKUW9JLq1evhre3N6ytrREcHKzx0jLbt2+HRCKp1ZNBSUlJ2LZtG1q2bIkhQ4bg1VdfVXsRkTpRp2JPSEjAkCFD0KRJE9XMgGlpaZBIJNizZ49g/djb2yM+Ph7x8fGCnZOIiAyf3f8eC+SYq7+5fx84fBgICQEaNBA7GnrKjh07EBUVhYSEBAQHByM+Ph4DBgxARkYGXF1dqz3u+vXrmDZtGnr37l2r/pycnPikD1EtiJpcde3aFdeuXcO2bdtUC9ONGjUKb731FmxtbcUMjYiITABnC6xCZibw+utASgqTKz20fPlyjBs3DhEREQCUX1Tv3bsXGzZswKxZs6o8RiaTITQ0FHPnzsWxY8fw4MEDjfvbuHGjEGETmQxRkysAsLW1xb/+9S+xwyAiIiNRm0Xguc4V6YOn79HqZnQtLy9HSkoKoqOjVW1mZmYICQlBcnJyteefN28eXF1d8fbbb+PYsWO1jq+iogJHjhzB1atX8dZbb8He3h63bt2Cg4MD7Ozsan0+ImMmenIFABcuXEB2drba2CsAGDp0qEgRERGRoarNIvD2/5uKnZUrEtPT62jGxsZiThWLON+9excymQxuT42Hc3NzUz0B9LSkpCR88cUXOHPmTJ1iy8rKwsCBA5GdnY2ysjL0798f9vb2WLx4McrKypCQkFCn8xIZK1GTq2vXrmHEiBE4d+6camFhAKqpoIVaRJiIiExHbRaBt+NsgaQHcnJy4ODgoHov1DqbRUVFGDNmDNavXw9nZ+c6nWPKlCkICgrC2bNn0ahRI1X7iBEjMG7cOEHiJDImoiZXU6ZMQfPmzZGYmIjmzZvj1KlTuHfvHt5//318/PHHYoZGREQGqjaLwHNCiyrY2ACdOim3pBMODg5qyVV1nJ2dYW5ujry8PLX2vLw8uLu7V9r/6tWruH79OoYMGaJqe7LGZ7169ZCRkYGWLVvW2OexY8dw4sQJWFpaqrV7e3vj5s2bz4yZyNSIOhV7cnIy5s2bB2dnZ5iZmcHMzAy9evVCXFwcJk+eLGZoRERkAuz/NhU71/H5H19fIDVVuSW9YmlpicDAQCQmJqra5HI5EhMT0b1790r7t23bFufOncOZM2dUr6FDh6Jfv344c+ZMtRXdv5PL5VU+SXTjxg3Y29s/3wURGSFRK1cymUz1D9PZ2Rm3bt1CmzZt0KxZM2RkZIgZGhERmYAnlSuFAij9f/buO66J+40D+CcJhD1FQBDBrThAQRG3FcXduuqgihT91UEd2DrqtlrQVkvrwr2qtba1w4WDghMXiDjBKgoqQ0U2hJDc7w8kNbKSI+FCeN6vV15wl7vvPYdf4Z58V5EERnoaMRSZkAoFBgbC19cX7u7u6Ny5M0JCQpCXlyebPXDixImwt7dHUFAQ9PX10bZtW7nzzc3NAaDM/or0798fISEh2LZtG4CSoRu5ublYtmwZBg0apLobI0RLcNpy1bZtW9y6dQsA4OHhgbVr1+LSpUtYuXIlmjRpwmVoFYo8mEKfbhKNIZFIEPTxLZWVR/WbaBJV1+/yGOgKIOCXjPOlGQPfunkT0NMr+Uo0zpgxY/Ddd99h6dKlcHV1RWxsLMLCwmSTXCQlJSElJUVl11u3bh0uXboEZ2dnFBYWYvz48bIugWvWrFHZdQjRFpx+RLd48WLk5eUBKJkmdMiQIejRowfq1auHX375hcvQKnRk3VPo6vHh5WvPdSiE4OuPbiHlUYHKyqP6TTSJqut3eXg8Hoz1dJBVIEZOYTFsqh72ov0YBigqKvlKNFJAQAACAgLKfS8yMrLSc/fs2aPUtRo2bIhbt27h0KFDiIuLQ25uLvz9/eHj4wMDGpdHSBmcJlfe3t6y75s1a4YHDx4gIyMDFhYWshkDNdGjmznw8uU6CkKA9KRClZdJ9ZtoCnXU7/L8l1zRjIGElEdHRweffPIJ12EQUitw2i2w1L///otTp06hoKAAlpaWXIdTpaYdaAAn0QzWjfRVXibVb6Ip1FG/y0MLCRNSufj4eAQEBKBv377o27cvAgICKlxXi5C6jtPk6vXr1+jbty9atGiBQYMGyfoI+/v7Y+7cuVyGVqERcx3Rd6Id12EQAgBY8qcLGjRVXbcMqt9Ek6i6flekdK0rWkiYkLJ+//13tG3bFtHR0XBxcYGLiwtiYmLQrl07/P7771yHR4jG4TS5mjNnDnR1dZGUlARDQ0PZ/jFjxiAsLIzDyCrWe3wDje6ySOoWgUCAhYddVFYe1W+iSVRdvytCa129p3Vr4M4dmoqdAADmzZuHhQsXIioqCuvXr8f69etx+fJlfPXVV5g3bx7X4RGicThNrk6fPo01a9agYcOGcvubN2+Op0+fchQVIYSQusREXxcAtVzJGBgAbdrQIsIEAJCSkoKJEyeW2f/JJ5+odFZCQrQFp8lVXl6eXItVqYyMDOjp6XEQESGEkLrGWI/GXMl5+hSYPLnkK6nzevfujQsXLpTZf/HiRfTo0YODiAjRbJzOFtijRw/s27cPX3/9NYCSKXGlUinWrl2LPn36cBkaIYSQOqJ0QguaLfCt16+BnTuB6dMBR0euoyEcGzZsGObPn4/o6Gh06dIFAHDlyhX8+uuvWLFiBf7++2+5Ywmp6zhNrtauXYu+ffvixo0bKCoqwrx583D37l1kZGTg0qVLXIZGCCGkjihtuYpPy8WZe2kKn2ckFKBzY0voCDRi4l1C1GL69OkAgM2bN2Pz5s3lvgeUfEAukUhqNDZCNBGnyVXbtm2RkJCAjRs3wsTEBLm5uRgxYgRmzJiBBg0acBlahXobPIaJIf0hJZojRyJVWVlUv4mmUWX9rojp25ar8wkvcT7hpVLnLhrUGlN6NlFHWIRoBKlU/f8HCdEmnCVXYrEYAwYMQGhoKBYtWsRVGIQQQuq4AW0b4PzDV8jIK1L4nJc5IjzPLEBCWo4aIyNEsxQWFkJfv2bWnyOktuIsudLV1UVcXFyNXe/58+eYP38+Tp48ifz8fDRr1gy7d++Gu7u7UuXoPCsGWgvVFCUh3KL6TeoiWzN97JrUSalzDl5Nwld/3MabfMUTslrDxgZYsKDkK6nzJBIJvvnmG4SGhiItLQ0JCQlo0qQJlixZAicnJ/j7+3MdIiEahdP+P5988gl27typ9uu8efMG3bp1g66uLk6ePIl79+5h3bp1sLCwULosg9Na+IeUkLeofhOiGEujkg8hXivR2lVr2NsDQUElX0mdt3r1auzZswdr166FUPjfh29t27bFjh07OIyMEM3E6Zir4uJi7Nq1C2fPnoWbmxuMjIzk3l+/fr1KrrNmzRo4ODhg9+7dsn2NGzdmVZbhmULkzzJWSVyEaBqq34Qopp5xyUOmMl0Ja42cHCA6GnBzA0xMuI6GcGzfvn3Ytm0b+vbti6lTp8r2u7i44MGDBxxGRohm4jS5unPnDjp27AgASEhIkHuPx+Op7Dp///03vL29MXr0aJw7dw729vaYPn06pkyZonRZwlvFkE5KhbHJf32ORW66yPE1BFQYMyGKyMvLQ3tn1Y35oPpNNEl+fj7aO2dzHUa5LAzfJle5WphcPXwI9OlTkmC9/RtN6q7nz5+jWbNmZfZLpVKIxbR8ASHv4zS5ioiIqJHrPH78GFu2bEFgYCC++uorXL9+HTNnzoRQKISvr2+554hEIohEItl2dnbJH3g+ANtwACgEwweyphsh5xN68CTcaNuKXWJF9ZvUBm1aamZiBQD13nYLzBEVo6hYCqEOzbJJtJOzszMuXLgAx/fWPPvtt9/QoUMHjqIiRHNxmlzVFKlUCnd3d3zzzTcAgA4dOuDOnTsIDQ2tMLkKCgrCihUrKiyz2JaPVz+YobCrnlpiJkSdqH4TUj1mBroQ8HmQSBm8yS+CjSnNoEa009KlS+Hr64vnz59DKpXiyJEjiI+Px759+3Ds2DGuwyNE43D6UVteXh6WLFmCrl27olmzZmjSpIncS1UaNGgAZ2dnuX2tW7dGUlJShecsXLgQWVlZsldycrLc+2kHLOnBk9RaVL8JqR4+nwcLQ10AwGtt7BpIyFsffvghjh49irNnz8LIyAhLly7F/fv3cfToUfTr14/r8AjROJy2XE2ePBnnzp3DhAkT0KBBA5WOs3pXt27dEB8fL7cvISGhTBP3u/T09KCnV/HDpd61Iohb1ImGP6LB7jwwYdU1kOo3qQ3uxptqdNdASyMhXuUWad+kFrq6JTMF6upyHQnRED169MCZM2e4DoOQWoHTp6eTJ0/i+PHj6Natm1qvM2fOHHTt2hXffPMNPv74Y1y7dg3btm3Dtm3blC6rqAkfeCyF0YlC5H5iqIZoCVGckZER4u4ZoL1zukrKo/pNNImhoSHi7umrrH6rWumkFq/zRFUcWcu0awc8e8Z1FIQQUitxmlxZWFjA0tJS7dfp1KkT/vjjDyxcuBArV65E48aNERISAh8fH6XLSv2tHvS+zYXxrwXgv5FCakGDmIn2oPpNiOJKp2N/o20tV6TOs7CwULg3UUZGhpqjIaR24TS5+vrrr7F06VLs3bsXhobq/ZR8yJAhGDJkSLXLYfR5eL3WDAU99aB/pQj5A2kQM9EeVL8JUVzpQsJa1y3w9m1g4EDg5MmSVixS54SEhMi+f/36NVatWgVvb294enoCAKKionDq1CksWbKEowgJ0Vw1nlx16NBB7tOQf//9FzY2NnBycoLue/27Y2Jiajo8heUP0QcYhuswCFELqt+EVM3SqGTc4mttS67EYuD585KvpE56dyblkSNHYuXKlQgICJDtmzlzJjZu3IizZ89izpw5XIRIiMaq8eTqo48+qulLqg+t/UO0GdVvQipVT1tbrojG27RpE7799lukpqbCxcUFGzZsQOfOncs99siRI/jmm2/w77//QiwWo3nz5pg7dy4mTJig0LVOnTqFNWvWlNk/YMAALFiwoFr3QYg2qvHkatmyZTV9SUIIIUTlLIxKJ7Sg5IrUnF9++QWBgYEIDQ2Fh4cHQkJC4O3tjfj4eFhbW5c53tLSEosWLUKrVq0gFApx7Ngx+Pn5wdraGt7e3lVer169evjrr78wd+5cuf1//fUX6tWrp7L7IkRbcDrmqkmTJrh+/XqZ/5yZmZno2LEjHj9+zFFkhBBCSOVKW65oQgtSk9avX48pU6bAz88PABAaGorjx49j165d5bYk9e7dW2571qxZ2Lt3Ly5evKhQcrVixQpMnjwZkZGR8PDwAABcvXoVYWFh2L59e/VviBAtw+lUYE+ePIFEIimzXyQS4RlNA0sIIYQFkUiE7OxsuZc6aO2EFs2bAxERJV9JjXi/vopE5U/vX1RUhOjoaHh5ecn28fl8eHl5ISoqqsrrMAyD8PBwxMfHo2fPngrFNmnSJFy6dAmmpqY4cuQIjhw5AlNTU1y8eBGTJk1SqAxC6hJOWq7+/vtv2fenTp2CmZmZbFsikSA8PByNGzfmIjRCCCG1XFBQEFasWKH268harvKLIJUy4PO1ZJyiiQnwXmsHUS8HBwe57WXLlmH58uVljnv16hUkEglsbGzk9tvY2ODBgwcVlp+VlQV7e3uIRCIIBAJs3rwZ/fr1Uzg+Dw8PHDhwQOHjCanLOEmuSie14PF4cjPSAICuri6cnJywbt06DiIjhBBS2y1cuBCBgYGy7ezs7DIPr6pQOuZKygCZBWJZS1at9/w5sHEjEBAA2NtzHU2dkJycDFNTU9m2np6eSss3MTFBbGwscnNzER4ejsDAQDRp0qRMl0FCSPVxklxJpVIAQOPGjXH9+nVYWVlxEQYrc2ZmYttOc/D5tLgq4Z5UKsWcmZkqK2/kh69w8kw9CAQClZVJCFsMw+DAvnylz9PT01P5w2l5dAV8mOjrIKewGBl5Iu1JrtLSgOBgYPRoSq5qiKmpqVxyVRErKysIBAKkpaXJ7U9LS4OtrW2F5/H5fDRr1gwA4Orqivv37yMoKIiSK0LUgNMMITExUaHEql27dkhOTq6BiKoWfrYI0z/L4joMQgAA0z/LQvhZ1Y33ePhQiiEDMlRWHiHVsWdXPtYE53IdRqX+m46d1oQi6icUCuHm5obw8HDZPqlUivDwcNkCv4qQSqUVjusihFQPp7MFKurJkycQa9BihnG3NCcWUrfdUkNdfPKk7CQzhHDhxg3N/11raSTEk9f5yMijB1VSMwIDA+Hr6wt3d3d07twZISEhyMvLk80eOHHiRNjb2yMoKAhAyRhEd3d3NG3aFCKRCCdOnMD+/fuxZcsWLm+DEK1VK5IrTdPeRZfrEAgBALi46CI1RbUPdU5O1CWQaAZ3d12cOFbIdRiVsjQq6X5Ia12RmjJmzBi8fPkSS5cuRWpqKlxdXREWFiab5CIpKUlu6EJeXh6mT5+OZ8+ewcDAAK1atcJPP/2EMWPGcHULhGg1Sq6U1NdLiM1bzao+kJAasHmrGf7nn6myroHNm/NxLMxSJWURUl2TPjWEqJDR6K6Bsm6BuVqUXNWrB/j7l3wlGikgIAABAQHlvhcZGSm3vWrVKqxatUqp8keMGKHwsUeOHFGqbEK0HSVXSvr+R5rMgmgOPp+P7380R3vndJWU9/tfVhAIqH4TzcDj8eAz0VCjk6vSGQO1quXK0RHYsYPrKAiH3l0ihxCiHEquCCGEEJbeXetKaxQUAI8fA02aAAYGXEdDOLB7926uQyCk1qKPqAkhhBCWLGWzBWpRcnX/PtC2bclXQgghSuG85So8PBzh4eFIT0+XrX9VateuXQCArVu3llmNnCuRBU1gIOD8x0aITEFBMQDVdAuk+k00jSrrtzpYGr/tFqhNY64Iec9vv/2Gw4cPIykpCUVF8nU9JiaGo6gI0UyctlytWLEC/fv3R3h4OF69eoU3b97IvUqNHz8eRkZGHEZKCCGElGVpqIUtV4S848cff4Sfnx9sbGxw8+ZNdO7cGfXq1cPjx48xcOBArsMjRONw+hF1aGgo9uzZgwkTJnAZBiGEEMLKu90CGYYBj8fjOCJCVGvz5s3Ytm0bxo0bhz179mDevHlo0qQJli5diowMWnSekPdx2nJVVFSErl27chkCIYQQwlq9t90CiyRS5BVpyQLcPB4gFJZ8JXVeUlKS7FnNwMAAOTk5AIAJEybg559/5jI0QjQSp8nV5MmTcfDgQS5DIIQQQlgzFOpAX7fkT6nWrHXVoQMgEpV8JXWera2trIWqUaNGuHLlCgAgMTERDMNwGRohGonTboGFhYXYtm0bzp49i/bt20NXV1fu/fXr16vlusHBwVi4cCFmzZqFkJAQtVyDEEJI3VDPSA/PMwvwOk+ERvUMuQ6HEJX64IMP8Pfff6NDhw7w8/PDnDlz8Ntvv+HGjRtKLTZMSF3BaXIVFxcHV1dXAMCdO3fk3lNXv/Xr169j69ataN++vVrKJ4QQUrdYGOnieWaB9kxqcf8+4OMDHDgAtG7NdTSEY9u2bZPN5jxjxgzUq1cPly9fxrBhw/DZZ59xHB0hmofT5CoiIqJGr5ebmwsfHx9s374dq1atYlVG5MEUDJjSkAYtE40glUqxY268ysqj+k00iarrt7pYGukBAF5rS3JVUADcvFnyldR5fD4ffP5/o0jGjh2LsWPHchgRIZqtTi1oM2PGDAwePBheXl5VJlcikQgikUi2nZ2dDQA4su4pdPX48PK1V2ushCgi9PMHiIt8U/WB76H6TWoDtvW7ptV7O2PgG21JrkidFxcXh7Zt24LP5yMuLq7SY6knECHy6kxydejQIcTExOD69esKHR8UFIQVK1aU+96jmznw8lVldISw8+ROLqvzqH6T2oBt/a5p707HTog2cHV1RWpqKqytreHq6goej1fu5BU8Hg8SiZbMkkmIitSJ5Co5ORmzZs3CmTNnoK+vr9A5CxcuRGBgoGw7OzsbDg4OAICmHUzUEichynJqa4w3qcqvM0L1m9QGbOt3TStNrrSmWyCp8xITE1G/fn3Z94QQxXE6FXtNiY6ORnp6Ojp27AgdHR3o6Ojg3Llz+PHHH6Gjo1Pupy56enowNTWVewHAiLmO6DvRrqZvgZByTd3QCu17Wyh9HtVvUhuwrd81Tetarho3Bg4fLvlK6iRHR0fZ2NunT5/C3t4ejo6Oci97e3s8ffqU40gJ0Tx1Irnq27cvbt++jdjYWNnL3d0dPj4+iI2NhUAgULis3uMb0GB/ojH4fD4mr2upsvKofhNNour6rS5al1xZWACjR5d8JXVenz59ZOtcvSsrKwt9+vThICJCNFud6BZoYmKCtm3byu0zMjJCvXr1yuwnhBBClFFP25KrtLSSadh9fAAbG66jIRxjGKbcD91ev34NIyMjDiIiRLPVieSKEEIIUReta7l6/hyYOxfo3ZuSqzqsdIFgHo+HSZMmQU9PT/aeRCJBXFwcunbtylV4hGisOptcRUZGch0CIYQQLVDv7TpXuaJiiIol0NNRvKs5IZrKzMwMQEnLlYmJCQwMDGTvCYVCdOnSBVOmTOEqPEI0Vp1NrgghhBBVMNHXgYDPg0TKICOvCA3MDKo+iRANt3v3btn06xs2bICxsTHHERFSO9SJCS0IIYQQdeHzebAw1LKugYSgpNXqwIEDSElJ4ToUQmoNSq4IIYSQatKqSS3MzIChQ0u+Eo20adMmODk5QV9fHx4eHrh27VqFx27fvh09evSAhYUFLCws4OXlVenx7+Lz+WjevDlev36tqtAJ0XqUXBFCCCHVpFWTWjRtCvz9d8lXonF++eUXBAYGYtmyZYiJiYGLiwu8vb2Rnp5e7vGRkZEYN24cIiIiEBUVBQcHB/Tv3x/Pnz9X6HrBwcH48ssvcefOHVXeBiFai5IrQgghpJosjUuSq9e5WpBcicXAy5clX4nGWb9+PaZMmQI/Pz84OzsjNDQUhoaG2LVrV7nHHzhwANOnT4erqytatWqFHTt2QCqVIjw8XKHrTZw4EdeuXYOLiwsMDAxgaWkp9yKEyKMJLQghhJBqstSmMVe3bwNubkB0NNCxI9fRkHcUFRUhOjoaCxculO3j8/nw8vJCVFSUQmXk5+dDLBYrnBiFhISwCZWQOouSK0IIIVpFJBJBJBLJtrOzs9V+TVm3wHwtSK5IjXu/jurp6cmtK1Xq1atXkEgksHlv/TEbGxs8ePBAoWvNnz8fdnZ28PLyUuh4X19fhY4jhJSgboFK2jE3HlKplOswCAFQMpNT5EHVzeIUeTBFNvUuIVyTSqXYMTde6fOCgoJgZmYmezk4OKghOnn13nYLzNCGboGkxjk4OMjV2aCgILVcJzg4GIcOHcIff/wBfX19pc8vLCxEdna23IsQIo9arpQUF/kGoZ8/wPRNzlyHQgjC973AkXVPVVbekXVPoavHh5evvcrKJISt0M8fIC7yjdLnLVy4EIGBgbLt7OxstSdYpS1X91OzsfXcowqPM9LTwWj3hrTQMJGTnJwMU1NT2XZ5rVYAYGVlBYFAgLS0NLn9aWlpsLW1rfQa3333HYKDg3H27Fm0b99e4djy8vIwf/58HD58uNxZAyUSicJlEVIXUHLFwpM7uVyHQAgA4N+YHJWX+ehmDryoFwjRAGx/11bUpUqdbE1LWgGevs5H0MnKu2clvsrDkiH0AR35j6mpqVxyVRGhUAg3NzeEh4fjo48+AgDZ5BQBAQEVnrd27VqsXr0ap06dgru7u1KxzZs3DxEREdiyZQsmTJiATZs24fnz59i6dSuCg4OVKouQuoCSKxac2tIq5UQzNOtoguiwVyots2kHE5WWRwhbTm2N8SY1g+swFNKxkQVmezVHUkZ+hceIxFIcv52CPZefYLR7Q7SyrfphmhMuLkBWFmBkxHUkpByBgYHw9fWFu7s7OnfujJCQEOTl5cHPzw9Ayex+9vb2sq6Fa9aswdKlS3Hw4EE4OTkhNTUVAGBsbAxj46qfZ44ePYp9+/ahd+/e8PPzQ48ePdCsWTM4OjriwIED8PHxUd/NElILUXKlpPa9LTB1QyuuwyAEANB3oh3EIqnKugaOmOuIvhPtVFIWIdU1dUMrbJp2n1XXwJrG5/Mw26tFlcdJf4rGyTupWPLnHRz+zBM8Hq8GolOSQAAo0IpCuDFmzBi8fPkSS5cuRWpqKlxdXREWFiab5CIpKQl8/n9D6rds2YKioiKMGjVKrpxly5Zh+fLlVV4vIyMDTZo0AVDSwpaRUfKBR/fu3TFt2jQV3RUh2oMmtFDS5HUt5X5pEcIlHo+H3uMbqKy83uMbaObDHqmT+Hw+Jq9ryXUYKrVkiDMMhQJcf/IGv8cotohrjXv4EPD2LvlKNFJAQACePn0KkUiEq1evwsPDQ/ZeZGQk9uzZI9t+8uQJGIYp81IksQKAJk2aIDExEQDQqlUrHD58GEBJi5a5ubmqbokQrUEtV0q6mNkcQrGQ6zAIkSnKKwJwRSVlUf0mmkaV9VsT2JkbYGbf5gg++QBBJ+6jX2sbmBnqch2WvJwc4PTpkq8VkEoZLPv7Lu6nKDdbHJ/Hw+D2DTDR05E+yKkl/Pz8cOvWLfTq1QsLFizA0KFDsXHjRojFYqxfv57r8AjROJRcKenea1sICmp2oDQhlZHki6o+SEFUv4mmUWX91hSfdmuM36Of4WF6Lr49/QCrPmrHdUhKu5mcif1X2HVHvvYkA49f5mLZ0Dbg8ynB0nRz5syRfe/l5YUHDx4gOjoazZo1U2rWQULqCkquCCGEkBok1OFj5YdtMW77FRy4mgSnekYw1Ve89Uqow4eXsw2M9bj7E37lccmU3J0bW+LTbo0VPi8hLQffn03A3qineJ1XhHUfu9C09BpKKpXi22+/xd9//42ioiL07dsXy5Ytg6OjIxwdHbkOjxCNRcmVkjLTjME3UH7hPULURVqgui5FVL+JplFl/dYknk3rYXgHe/xx8zlWHb+v9PnjOjsgaAR3rQalydWgtrYY0Lby9ZXeNaCtLRpbGSHwcCyOxaUgM1+M0AlunCaKpHyrV6/G8uXL4eXlBQMDA/zwww9IT0/Hrl27uA6Nld69e8PV1RUhISFqvc7y5cvx559/IjY2VqXlRkZGok+fPnjz5g2NddNw9NtMScI0XQiU+ISREHWTFKpuAUeq30TTqLJ+a5olQ5zB4wFZ+WKFzxEVS3Hx31f4K/YFFg12Vk9S4uAAbNxY8rUcRcVS3HhSMoNjl6b1lC5+qIsdzA118dn+aFz89xV6ro2AiX7F9yHg8TC1d1N87K7ehaCJvH379mHz5s347LPPAABnz57F4MGDsWPHDprYiwNdu3ZFSkoKzMzMuA6FVIGSK0IIIYQDlkZCrP/YValzGIZB3/Xn8PhlHo7HvcCYTo1UH1j9+sCMGRW+fft5JgrEElgaCdHCmt26eD2a18eh/3WB3+7reJ1XhIy8okqPX/H3XfRuWR/WJtSyXlOSkpIwaNAg2baXlxd4PB5evHiBhg0bchhZ3SQUCmFrq3grMeFOnfjoISgoCJ06dYKJiQmsra3x0UcfIT4+nuuwCCGEEKXweDxZC84v15PVc5GMDOCnn0q+luPK45L9Ho0tqzUhRfuG5jg/rw9+n9a10pdLQzPkFUnw/RmaGr4mFRcXQ19fPpnV1dWFWFzS0tq7d298/vnnmD17NiwsLGBjY4Pt27fLFjQ2MTFBs2bNcPLkSQCARCKBv78/GjduDAMDA7Rs2RI//PCDrOzCwkK0adMG//vf/2T7Hj16BBMTE4W7Il66dAm9e/eGoaEhLCws4O3tjTdv/lsnTyqVYt68ebC0tIStra3cdPRPnjwBj8eT686XmZkJHo+HyMhIACVd83g8HsLDw+Hu7g5DQ0N07dq10mfKR48eoUmTJggICADDMJXG//TpUwwdOhQWFhYwMjJCmzZtcOLECblrZ2ZmAij5+fN4vDKvJ0+eyGKfPHky6tevD1NTU3zwwQe4deuWQj9HUj11Irk6d+4cZsyYgStXruDMmTMQi8Xo378/8vLyuA6NEEIIUcqIjvYQ8HmIScrEv+kVT5fO2pMnwIQJJV/LUTreqksT5bsEvs9ITwdujhaVvhYPcQYA/HI9CfGparhfUi6GYTBp0iSMGDFC9iosLMTUqVMxYsQI3L17F6GhobCyssK1a9fw+eefY9q0aRg9ejS6du2KmJgY9O/fHxMmTEB+fj6kUikaNmyIX3/9Fffu3cPSpUvx1VdfydbN0tfXx4EDB7B371789ddfkEgk+OSTT9CvXz98+umnVcYbGxuLvn37wtnZGVFRUbh48SKGDh0KieS/rsV79+6FkZERrl69irVr12LlypU4c+aM0j+bRYsWYd26dbhx4wZ0dHQqjC8uLg7du3fH+PHjsXHjxiqXH5gxYwZEIhHOnz+P27dvY82aNTA2Ni732CNHjiAlJUX2GjFiBFq2bClbTHr06NFIT0/HyZMnER0djY4dO6Jv376yRaCJ+tSJboFhYWFy23v27IG1tTWio6PRs2dPjqIihBBClGdtoo8+La1x9n4aDt94hq8Gta6xa8uNt1JBcqWITk6WGNDGFmF3U7H6xH3s+7RzjVy3rvP19S2z75NPPpF9LxAIYGlpicWLFwMAFi5ciODgYFhZWWHKlCkAgKVLl2LLli2Ii4tDly5dsGLFCtn5jRs3RlRUFA4fPoyPP/4YAODq6opVq1Zh8uTJGDt2LJ4+fYpjx44pFO/atWvh7u6OzZs3y/a1adNG7pj27dtj2bJlAIDmzZtj48aNCA8PR79+/RS6RqnVq1ejV69eAIAFCxZg8ODBKCwslGvpu3z5MoYMGYJFixZh7ty5CpWblJSEkSNHol27kuUZmjRpUuGxlpaWsu+///57/PPPP7h69SoMDAxw8eJFXLt2Denp6dDTK1le5bvvvsOff/6J3377Ta51kKhenUiu3peVlQVAvmIqKvPqJVj2/IAWPyRaieo3IbXDmE4OOHs/DUdinuFL75bQFdRMR5R3x1s1ty7/E3V1WDCwFcIfpOF8wkucS3iJXi3q19i166rdu3dX+n7v3r3lkheBQIB69erJEgMAslaU9PR0AMCmTZuwa9cuJCUloaCgAEVFRXB1dZUrd+7cufjzzz+xceNGnDx5EvXqKZbEx8bGYvTo0ZUe8/66XA0aNJDFpox3y2nQoAGAknts1KhkDGRSUhL69euH1atXY/bs2QqXO3PmTEybNg2nT5+Gl5cXRo4cWeVaYidPnsSCBQtw9OhRtGjRAgBw69Yt5ObmlvnZFRQU4NGjRwrHQ9ipE90C3yWVSjF79mx069YNbdu2rfA4kUiE7OxsuRcAZJw5jsyoCzUVLiFqQfWbkNqtd8v6sDLWw6vcIvzzQPmHQ7ZUNd5KWU5WRpjQxQkA8M3x+5BIKx+7QmqGrq787LI8Hk9uX+kHdVKpFIcOHcIXX3wBf39/nD59GrGxsfDz80NRkfxkJunp6UhISIBAIMDDh4qPszMwMGAVr1QqBQDZDIjvjosqHV9WWTnv3mOp+vXro3Pnzvj5559lf18VMXnyZDx+/BgTJkzA7du34e7ujg0bNlR4/L179zB27FgEBwejf//+sv25ublo0KABYmNj5V7x8fH48ssvFY6HsFPnkqsZM2bgzp07OHToUKXHBQUFwczMTPZyeGdK2sKkJ2qOkhD1ovpNSO2mK+BjpJs9AOCwqie2MDICunQp+fqeqEeqG2+lrJl9m8HMQBfxaTnYfuExHqRmK/XKKlB8ynuiepcuXULXrl0xffp0dOjQAc2aNSu3FeXTTz9Fu3btsHfvXsyfPx/37yu2Dlz79u0RHh7OOr769UtaQ1NSUmT72K5VZWBggGPHjkFfXx/e3t7IyVF8rKCDgwOmTp2KI0eOYO7cudi+fXu5x7169QpDhw7FyJEjMWfOHLn3OnbsiNTUVOjo6KBZs2ZyLysrK1b3RBRXp7oFBgQE4NixYzh//nyV04guXLgQgYGBsu3s7GzZA6h+Iyd1hkmI2lH9JqT2G+3mgK3nHiMiPh1p2YWwMVXRNOUtWwJRUWV2FxVLceNpScsVF8mVuaEQn3/QDKuO30fwyQcIPvlAqfN5PKC1rSk6N7aER2NLtLA1gaCSLtACPg8NLQyom7SKNG/eHPv27cOpU6fQuHFj7N+/H9evX0fjxo1lx2zatAlRUVGIi4uDg4MDjh8/Dh8fH1y5cgVCobDS8hcuXIh27dph+vTpmDp1KoRCISIiIjB69GiFEgoDAwN06dIFwcHBaNy4MdLT02XjydgwMjLC8ePHMXDgQAwcOBBhYWEVTk5Ravbs2Rg4cCBatGiBN2/eICIiAq1blz+mcuTIkTA0NMTy5cuRmpoq21+/fn14eXnB09MTH330EdauXYsWLVrgxYsXOH78OIYPHw53d3fW90WqVieSK4Zh8Pnnn+OPP/5AZGSk3H/kiujp6ckGAb7Lst9gmHv2UEeYhNQYqt+E1H7NrI3h7miBG0/fYEvkIwxsW/EaOMb6OnBuYFqtRCHuWSYKxdKS9a1sam681bsmejrhwsNXuPtC8a5WACBlGGTkFeFeSjbupWRjz+UnCp3XtWk9bJvorp7FmuuYzz77DDdv3sSYMWPA4/Ewbtw4TJ8+XTZV+4MHD/Dll19i586dsg/7Nm/ejPbt22PJkiVYs2ZNpeW3aNECp0+fxldffYXOnTvDwMAAHh4eGDdunMIx7tq1C/7+/nBzc0PLli2xdu1aue52yjI2NsbJkyfh7e2NwYMH48SJEzAqp0W4lEQiwYwZM/Ds2TOYmppiwIAB+P7778s99vz58wAAR0dHuf2JiYlwcnLCiRMnsGjRIvj5+eHly5ewtbVFz549ZePgiPrwmKom3dcC06dPx8GDB/HXX3+hZcuWsv1mZmYK9dEFSj7ZNzMzQ5PF30CgT4sYEs0hKSzE41VfISsrC6ampqzKoPpNNJUq63d1ytBUh28kY95vcQodO6idLdaNdoWBUFD5gTExgJsbEB0NdOwo273xn4f47nQCBrWzxWYft+qEzYn07EJce5KBa4klr2dvCio9vkAsgUTKwNXBHHs/7QwzA91Kj2dDm+smIXVVnfgoZsuWLQBKZrZ51+7duzFp0qSaD4gQQghRgaHt7XD6bioSX1W+bmNSRj5O3E7FszdR2DHRHdYsuhCWTmbBRZdAVbA21ceQ9nYY0t5OoeNvP8vChF1XEZucifHbr2C/vwcsjSrvmkYIIXUiuaoDjXOEEELqIAOhADt8O1V53NXHr/HZT9GIe5aFDzddwg5fd7SxM1P4OlyPt+JCu4Zm+HlKF3yy4yruvsjGuG1X8NNkD9Q3KduluiIMw+DZmwLEp+a8nVgjB6lZhbL3xQWVJ8XkPwMHDsSFC+XPZvvVV1/hq6++quGIlKcN90CqVieSK0IIIaQu82hSD3/N6IZP91zHo5d5GLUlCgPb2YJfzhgs+8cJmAPg+zMJeP6opAthTqFYNt6qJte34lrrBqb45TNP+Oy4gvi0HHRafVal5UtF+SotT5vt2LEDBQXld+Vks24pF7ThHkjVKLlSkmE6IKBeAUSDSIqqPkZRVL+JplFl/a7rHOsZ4cj0bphxIAYX/32FIzHPyz2uTWoa5gA4ez8Nd9+YyL3XvZlVnZs9r5m1MQ5/5gm/3dfxuIrul+XRFfDQtL4xWjcwRStbEzhYGqJ0ibC83ByMClFtvNrK3t6e6xCqTRvugVSNkislGb2QQEdXwnUYhMgUi1VXH6l+E02jyvpNADMDXez264S/Y18gPUdU7jGCosY40O8sPqzfAEOE/3WB0xXwMMxVsfFK2saxnhHOBvZCRr7y2b6ZgS50BeUvK5qdXfHMcYSQ2omSKyUZPc+DjoD+2BPNUSwprPogBVH9JppGlfWblChZgLjytR4B5xqJpTbh83mwMlZ8vJU2CQ4OxsKFCzFr1iyEhIQAAAoLCzF37lwcOnQIIpEI3t7e2Lx5s9xU30lJSZg2bRoiIiJgbGwMX19fBAUFQUen6sdPiUSC5cuX46effkJqairs7OwwadIkLF68WNZ6yjAMli1bhu3btyMzMxPdunXDli1b0Lx5c1k5GRkZ+Pzzz3H06FHw+XyMHDkSP/zwQ7lrTp0/fx7ffvstoqOjkZKSgj/++AMfffQRAEAsFmPx4sU4ceIEHj9+DDMzM3h5eSE4OBh2dnZKXS8uLg4zZszA9evXUb9+fXz++eeYN29elTGUun//PubPn49z586huLgYzs7O+P3339GoUaNq/9t8++23OHLkCB48eAADAwN07doVa9askZttW1X/9pGRkQgMDMTdu3fh4OCAxYsXa8VEc5RcKYn3JAU8PvWbIpqDJ1Vdvymq30TTqLJ+EwUlJgJLlgBffw0osC4k0W7Xr1/H1q1b0b59e7n9c+bMwfHjx/Hrr7/CzMwMAQEBGDFiBC5dugSgJDkaPHgwbG1tcfnyZaSkpGDixInQ1dXFN998U+V116xZgy1btmDv3r1o06YNbty4AT8/P5iZmWHmzJkAgLVr1+LHH3/E3r170bhxYyxZsgTe3t64d+8e9N8uK+Lj44OUlBScOXMGYrEYfn5++N///oeDBw+WuWZeXh5cXFzw6aefYsSIEXLv5efnIyYmBkuWLIGLiwvevHmDWbNmYdiwYbhx44bsuKqul52djf79+8PLywuhoaG4ffs2Pv30U5ibm+N///tfpTEAwKNHj9C9e3f4+/tjxYoVMDU1xd27d2X3W91/m5iYGMyYMQOdOnVCcXExvvrqK/Tv3x/37t2TrdGlin/7xMREDB48GFOnTsWBAwcQHh6OyZMno0GDBvD29q6yfmiyOrHOlSqUrkXR18IXOvTwSTRIsbQI4W/2qmQdIKrfRNOosn7TWkIKqmCdK6J6ml43c3Nz0bFjR2zevBmrVq2Cq6srQkJCkJWVhfr16+PgwYMYNWoUgJJFgFu3bo2oqCh06dIFJ0+exJAhQ/DixQtZi0ZoaCjmz5+Ply9fQiis/G/NkCFDYGNjg507d8r2jRw5EgYGBvjpp5/AMAzs7Owwd+5cfPHFFwCArKws2NjYYM+ePRg7dizu378PZ2dnXL9+He7u7gCAsLAwDBo0CM+ePZNrcXofj8crt9XoXdevX0fnzp3x9OlTNGrUSKHrbdmyBYsWLUJqaqrsZ7BgwQL8+eefePDgQZUxjB07Frq6uti/f3+5Man63+bly5ewtrbGuXPn0LNnT5WVP3/+fBw/fhx37tyRu7fMzEyEhYVV+DOvDcrvBEwIIYQQQuq0GTNmYPDgwfDy8pLbHx0dDbFYLLe/VatWaNSoEaKiogAAUVFRaNeunVxXMW9vb2RnZ+Pu3btVXrtr164IDw9HQkICAODWrVu4ePEiBg4cCKCk5SM1NVUuBjMzM3h4eMjFYG5uLkt0AMDLywt8Ph9Xr15V9sdRRlZWFng8HszNzRW+XlRUFHr27CmXwHh7eyM+Ph5v3ryp9HpSqRTHjx9HixYt4O3tDWtra3h4eODPP/+UHaPqf5usrCwA/81mqKryo6KiytQrb29vWRm1GSVXhNRiUqkUt3IiVFZeUuE9WheOaAyGYZBUeI/rMAipkw4dOoSYmBgEBQWVea+01aU0qShlY2OD1NRU2THvPlyXvl/6XlUWLFiAsWPHolWrVtDV1UWHDh0we/Zs+Pj4yJVR3jXejcHa2lrufR0dHVhaWioUQ2UKCwsxf/58jBs3TtbqqMj1qvNzSU9PR25uLoKDgzFgwACcPn0aw4cPx4gRI3Du3DlZGar6t5FKpZg9eza6deuGtm3bqrT8io7Jzs6ucLr62oLGXCmo9IGzmCkCpBwHQ8hbt3Ii8Ko4CUD1FssuPfdhwXUAQCN9GsxOuJdUeE9WJ1VRv7Ozs1USl9bLzf3vK/3M1Kq0Tmrah1rJycmYNWsWzpw5IzeWpyYdPnwYBw4cwMGDB9GmTRvExsZi9uzZsLOzg6+vLycxlRKLxfj444/BMAy2bNlSY9eVSkseQD/88EPMmTMHAODq6orLly8jNDQUvXr1Uun1ZsyYgTt37uDixYsqLVfbUXKloJycHADAucyfOY6EkPLl5OTAzMyM9bmlHhZclz3QEqIpVFG/HRwcVBmS9lPxgxqpWHXqtzpER0cjPT0dHd8ZcyeRSHD+/Hls3LgRp06dQlFRETIzM+VaMNLS0mBrawsAsLW1xbVr1+TKTUtLk71XlS+//FLWegUA7dq1w9OnTxEUFARfX19ZGWlpaWjQoIHcNVxdXWXXSU9Plyu3uLgYGRkZCsVQntLE6unTp/jnn3/kxsopcj1bW1vZz+HdmEvfq4yVlRV0dHTg7Cz/AWjr1q1lCZCtra1K/m0CAgJw7NgxnD9/Hg0b/je7qKrKr+jnYGpqCgMDg0p/DpqOkisF2dnZITk5GSYmJnVuAUWi2RiGQU5OTqUDc6tC9ZtoKqrfRJupon6rQ9++fXH79m25fX5+fmjVqhXmz58PBwcH6OrqIjw8HCNHjgQAxMfHIykpCZ6engAAT09PrF69Gunp6bKucmfOnIGpqWmZ5KA8+fn54PPlR68IBAJZ603jxo1ha2uL8PBwWTKVnZ2Nq1evYtq0abIYMjMzER0dDTc3NwDAP//8A6lUCg8PD6V/LqWJ1cOHDxEREYF69erJva/I9Tw9PbFo0SKIxWLo6urKfi4tW7aEhYVFpdcXCoXo1KkT4uPj5fYnJCTA0dERAODm5latf5vWrVsjICAAf/zxByIjI9H4vRlDq1t+6b+9p6cnTpw4IVf2mTNnZGXUagwhhBBCCCGV6NWrFzNr1izZ9tSpU5lGjRox//zzD3Pjxg3G09OT8fT0lL1fXFzMtG3blunfvz8TGxvLhIWFMfXr12cWLlyo0PV8fX0Ze3t75tixY0xiYiJz5MgRxsrKipk3b57smODgYMbc3Jz566+/mLi4OObDDz9kGjduzBQUFMiOGTBgANOhQwfm6tWrzMWLF5nmzZsz48aNK/eaOTk5zM2bN5mbN28yAJj169czN2/eZJ4+fcoUFRUxw4YNYxo2bMjExsYyKSkpspdIJFL4epmZmYyNjQ0zYcIE5s6dO8yhQ4cYQ0NDZuvWrVXGwDAMc+TIEUZXV5fZtm0b8/DhQ2bDhg2MQCBgLly4oJJ/m2nTpjFmZmZMZGSk3D3m5+er9N/+8ePHjKGhIfPll18y9+/fZzZt2sQIBAImLCxMofqhySi5IoQQQgghlXo/uSooKGCmT5/OWFhYMIaGhszw4cOZlJQUuXOePHnCDBw4kDEwMGCsrKyYuXPnMmKxWKHrZWdnM7NmzWIaNWrE6OvrM02aNGEWLVokl8hIpVJmyZIljI2NDaOnp8f07duXiY+Plyvn9evXzLhx4xhjY2PG1NSU8fPzY3Jycsq9ZkREBAOgzMvX15dJTEws9z0ATEREhFLXu3XrFtO9e3dGT0+Psbe3Z4KDgxWKodTOnTuZZs2aMfr6+oyLiwvz559/ypVfnX+biu5x9+7dKin//Z+3q6srIxQKmSZNmshdozajda4IIYQQQgghRAVoKnZCCCGEEEIIUQFKrgghhBBCCCFEBSi5IoQQQgghhBAVoOSKEEIIIYQQQlSAkitCCCGEEEIIUQFKrgghhBBCCCFEBSi5IoQQQgghhBAVoOSKEEIIIYSwJhKJsHz5cohEojobA9fXpxg0By0iTAghhBBCWMvOzoaZmRmysrJgampaJ2Pg+voUg+aglitCCCGEEEIIUQFKrgghhBBCCCFEBXS4vHhQUBCOHDmCBw8ewMDAAF27dsWaNWvQsmVL2TGFhYWYO3cuDh06BJFIBG9vb2zevBk2NjayY5KSkjBt2jRERETA2NgYvr6+CAoKgo7Of7cXGRmJwMBA3L17Fw4ODli8eDEmTZqkcKxSqRQvXryAiYkJeDyeSu6fEFVgGAY5OTmws7MDn8/u8xKq30RTUf0m2kxb6nd2drbc17oYA9fX19QY2NTxTZs24dtvv0VqaipcXFywYcMGdO7cucLjMzMzsWjRIhw5cgQZGRlwdHRESEgIBg0aVP0bYoPhkLe3N7N7927mzp07TGxsLDNo0CCmUaNGTG5uruyYqVOnMg4ODkx4eDhz48YNpkuXLkzXrl1l7xcXFzNt27ZlvLy8mJs3bzInTpxgrKysmIULF8qOefz4MWNoaMgEBgYy9+7dYzZs2MAIBAImLCxM4ViTk5MZAPSil8a+kpOTWf9fpPpNL01/Uf2mlza/qH7TS9tfitbxQ4cOMUKhkNm1axdz9+5dZsqUKYy5uTmTlpZW7vEikYhxd3dnBg0axFy8eJFJTExkIiMjmdjYWNb/p6pLoya0ePnyJaytrXHu3Dn07NkTWVlZqF+/Pg4ePIhRo0YBAB48eIDWrVsjKioKXbp0wcmTJzFkyBC8ePFC1poVGhqK+fPn4+XLlxAKhZg/fz6OHz+OO3fuyK41duxYZGZmIiwsTKHYsrKyYG5ujmQApgCS/raEuJmuqn8EhCilo3Oa7PvMzEyYmZmxKofqN9FEqq7fjs28YNekh9yn+3k2OiioD4jqs/xTaFUIW6ssAEBbi1R0MXkEADh7MA2/fPdMdtiYLxrCa7xNuUV0NUgus2//3nwEB+fKthcsMMYEX0OFQmJ7rs5DMWyHvSmzP/WoJYqbyXd0uVzgoFAsylDkZ1bezwrg5udV3XPnzM7C6VMlM6qp5Pd3cnKdnUCAaK7sS5fgMGiQwnXcw8MDnTp1wsaNGwGUtMw6ODjg888/x4IFC8ocHxoaim+//RYPHjyArq5mPLdw2i3wfVlZJX+gLC0tAQDR0dEQi8Xw8vKSHdOqVSs0atRIllxFRUWhXbt2ct0Evb29MW3aNNy9excdOnRAVFSUXBmlx8yePVvh2Er/GJu+fdW/W4zsDnrsbpQQFTEzB7IyS76vTncQqt9EEzVrAfybUPK9Kur303/PQkdoCPsm3WXvCYQ6EOgBfH2WyZUhoGNUCAAQGuvC0EQAABj6vwYQ6vGRcDMXLToYw9vXpsJ7MDEo21Vm2gwj6OnzEB0thpubLj71N1T4Z8D2XKO7EpT3aF58txh5HYRy+wx1BArFoownd/Plt+/ly36epcr7WQHc/Lyqe66np1CWXKnk97epKSVXRPMYGQFQrI4XFRUhOjoaCxculO3j8/nw8vJCVFRUuef8/fff8PT0xIwZM/DXX3+hfv36GD9+PObPnw+BQPW/pxShMcmVVCrF7Nmz0a1bN7Rt2xYAkJqaCqFQCHNzc7ljbWxskJqaKjvm3cSq9P3S9yo7Jjs7GwUFBTAwMCgTj0gkkpujv7TvqKiJAHgsgfGJQmR/YlSNOyak+hrY6SArs1jp8yqq3w8AdAaofhON8PE4E3yzIkfp8yqq3wCQ/eYJ7NG9vNNUisfjYcAkWwyYxP58/8lG8J9cc+canihJEsXNBciaawKzdTnQfSiB4YlC5H2iWGtMdbToaIyrJzP+2+5grPC5XPy8qnvup/6GEBUyci1fiqisfhOiqd6vp3p6etDTk/8Q99WrV5BIJOU+sz948KDcch8/fox//vkHPj4+OHHiBP79919Mnz4dYrEYy5YtU+1NKEhjZgucMWMG7ty5g0OHDnEdCoCSyTbMzMxkLweHki4Qz36rh6xxBjCMKgL/jZTjKEldN2J02Q8GFFFR/e4F4EZHXarfRCP4+hvhiwUmSp9XUf0GAFMLJxVGqD34b6TQiypC7ngDpJ2wQsEQfaSdsELuOAPoXa6Z3wfevjaYsKgRPAZZYsKiRvD2Lb8bpbbg8XgKdyF8V2X1mxBN5eDgIFdvg4KCVFKuVCqFtbU1tm3bBjc3N4wZMwaLFi1CaGioSspnQyOSq4CAABw7dgwRERFo2LChbL+trS2KioqQmZkpd3xaWhpsbW1lx6SlpZV5v/S9yo4xNTUtt9UKABYuXIisrCzZKzn5bT9vfR7S15ojdYM5DK4Usb5nQlSB7cNnRfU7YIEJTP+sR/WbaAQej4fxE5V/+KyofjduORB2jbupOkytoHelCK83muPNWjMwBiXddxgDHt58a4bXG82hVwO/D0pb+2b+0AwDJtnSzI4VqPD5hBBNVL8+ACA5OVmu3r7b9a+UlZUVBAJBuc/spc/072vQoAFatGgh1wWwdevWSE1NRVERN88xnCZXDMMgICAAf/zxB/755x80btxY7n03Nzfo6uoiPDxcti8+Ph5JSUnw9PQEAHh6euL27dtIT0+XHXPmzBmYmprC2dlZdsy7ZZQeU1pGefT09GT9l8vrx5w7xAB5A2hMCuEW24fPiur3+Ikl4wWofpParKL63cDJkx7YK1AwQA8FQ/TLf2+IPgro94HGqOr5hBCNYm8PAGXq7PtdAgFAKBTCzc1N7pldKpUiPDy8wmf2bt264d9//4VU+l/rekJCAho0aAChUFjuOerGaXI1Y8YM/PTTTzh48CBMTEyQmpqK1NRUFBQUAADMzMzg7++PwMBAREREIDo6Gn5+fvD09ESXLl0AAP3794ezszMmTJiAW7du4dSpU1i8eDFmzJgh+4ebOnUqHj9+jHnz5uHBgwfYvHkzDh8+jDlz5lTvBuiPNNFmVL8JqTuq+v9Ovw8IIWzkKjemMDAwENu3b8fevXtx//59TJs2DXl5efDz8wMATJw4Ua7Va9q0acjIyMCsWbOQkJCA48eP45tvvsGMGTNUehvK4HRCiy1btgAAevfuLbd/9+7dsgV+v//+e/D5fIwcOVJuEeFSAoEAx44dw7Rp0+Dp6QkjIyP4+vpi5cqVsmMaN26M48ePY86cOfjhhx/QsGFD7NixA97e3mq/R0IIIYQQQuqkf/9V6vAxY8bg5cuXWLp0KVJTU+Hq6oqwsDDZJBdJSUlyixE7ODjg1KlTmDNnDtq3bw97e3vMmjUL8+fPV+ltKIPT5EqRJbb09fWxadMmbNq0qcJjHB0dceLEiUrL6d27N27evKl0jIRoMoZhcHBfftUHElILqbp+GzzPhY5ALNsucFB+vCKpGMMwOLU3DQkxuWjRsfLp5wkhpCIBAQEICAgo973IyMgy+zw9PXHlyhU1R6U4jZmKnRCivH278vFdsPJTVRNSG1D9rl1O7U3D/tVJACCbUn3ApPIHoRNCiLbSiNkCCSHsxNygGf2I9qL6XbskxMiPrUi4qdxYC0II0QaUXBFSi3V052YmHELU7USuM/Tb0fo9tUmLjvKL/iqzCDAhREvp1L1OcnXvjolGO5HrzHUItYrVxwyGZD/BsfWPuQ6FVBPV/bJ6TnCAWCSh+l1LlC76m3AzFy06GGv9IsCEEAW0bct1BDWOkislnc5rBX0e/diIZuDxeOg+zqHWPXxSIkEUUVvrd11VugjwgElcR0IIIdyhLIEQIkMfHhBCCCFEZe7f5zqCGkdjrgghhBBCCCGqJxJxHUGNo+SKEEIIIYQQQlSAkisl7fvyNqRSKddhEKIWVL8JIWwxDIOwPan4cea/CNuTCoZhuA5JozEMg/17aRF4QrhWXFyMs2fPYuvWrcjJKVlb8cWLF8jNZbecBCVXSroXmYHds25zHQYhakH1mxDCVukiwldPZmD/6iSc2pvGdUgabdfOfAQH01pghHDp6dOnaNeuHT788EPMmDEDL1++BACsWbMGX3zxBasyKbliIflODtchEKI2VL8JIZW5UNCo3P20iLBybtwQcx0CIerXpAnXEVRq1qxZcHd3x5s3b2BgYCDbP3z4cISHh7Mqk5IrFhzamnAdAiFqQ/WbEMIGLSKsHHd3Xa5DIET9TE25jqBSFy5cwOLFiyEUCuX2Ozk54fnz56zKpDmXleTc2xJ+P7TjOgxC1ILqNyGELVpEWDmf+htCVMhQ10Ci3VJTuY6gUlKpFBKJpMz+Z8+ewcSE3YfN1HKlpInftgOfTz82op2ofhNC2CpdRHjmD80wYJIteDyeSsvvYZCk0vK4xuPxMMHXkOswCFEvDU+u+vfvj5CQENk2j8dDbm4uli1bhkGDBrEqk1quCCGEEEIIIXXOunXr4O3tDWdnZxQWFmL8+PF4+PAhrKys8PPPP7Mqk5IrQgghhBBCSJ3TsGFD3Lp1C7/88gtu3bqF3Nxc+Pv7w8fHR26CC2VQckUIIYQQQgipk3R0dODj4wMfHx+VlEeDKwipxRiGwcWfk7kOgxC1oPpNagLDMNi5Iw/TpmZi5448WvyYEFUyN+c6gkoFBQVh165dZfbv2rULa9asYVUmJVeE1GLn9yfj2PrHXIdBiFpQ/SY1YdfOfKxYnoPjxwqxYnkOdu3M5zokQrSHkxPXEVRq69ataNWqVZn9bdq0QWhoKKsyKbkipBZLvJnFdQiEqA3Vb1IT3l/MNzqaFvclRGUKC7mOoFKpqalo0KBBmf3169dHSkoKqzJZJ1eZmZnYsWMHFi5ciIyMDABATEwM6wW3CCHKa9zBjOsQCFEbqt+kJry/mK+bGy3uS4jKPHjAdQSVcnBwwKVLl8rsv3TpEuzs7FiVyWpCi7i4OHh5ecHMzAxPnjzBlClTYGlpiSNHjiApKQn79u1jFQwhRDk9JzhALJJQ1ymilah+k5rwqX/JWlPR0WK4uenKtgkh2m/KlCmYPXs2xGIxPvjgAwBAeHg45s2bh7lz57Iqk1XLVWBgICZNmoSHDx9CX19ftn/QoEE4f/68wuWcP38eQ4cOhZ2dHXg8Hv78888yx9y/fx/Dhg2DmZkZjIyM0KlTJyQl/beQYGFhIWbMmIF69erB2NgYI0eORFpamlwZSUlJGDx4MAwNDWFtbY0vv/wSxcXFyt84IRqGx+Oh+zgHrsMgRC2ofpOawOPx4D/ZCJu3mMN/spHKFz8mhChn06ZNcHJygr6+Pjw8PHDt2rUKj92zZw94PJ7c693cpCpffvkl/P39MX36dDRp0gRNmjTB559/jpkzZ2LhwoWs4meVXF2/fh2fffZZmf329vZIVWIl5ry8PLi4uGDTpk3lvv/o0SN0794drVq1QmRkJOLi4rBkyRK5H9qcOXNw9OhR/Prrrzh37hxevHiBESNGyN6XSCQYPHgwioqKcPnyZezduxd79uzB0qVLlbhjQgghhBBCiDr98ssvCAwMxLJlyxATEwMXFxd4e3sjPT29wnNMTU2RkpIiez19+lTh6/F4PKxZswYvX77ElStXcOvWLWRkZFQrT2DVLVBPTw/Z2dll9ickJKB+/foKlzNw4EAMHDiwwvcXLVqEQYMGYe3atbJ9TZs2lX2flZWFnTt34uDBg7KmvN27d6N169a4cuUKunTpgtOnT+PevXs4e/YsbGxs4Orqiq+//hrz58/H8uXLIRQKFY6XEEIIIYQQoh7r16/HlClT4OfnBwAIDQ3F8ePHsWvXLixYsKDcc3g8Hmxtbat1XWNjY3Tq1KlaZZRi1XI1bNgwrFy5EmJxyYw6PB4PSUlJmD9/PkaOHKmSwKRSKY4fP44WLVrA29sb1tbW8PDwkOs6GB0dDbFYDC8vL9m+Vq1aoVGjRoiKigIAREVFoV27drCxsZEd4+3tjezsbNy9e7fC64tEImRnZ8u9CNE0bNcBovpNagOq3zWP1nwihKiUq6vChxYVFSE6OlruuZ7P58PLy0v2XF+e3NxcODo6wsHBAR9++GGlz/fvy8vLw5IlS9C1a1c0a9ZM1jWw9MUGq5ardevWYdSoUbC2tkZBQQF69eqF1NRUeHp6YvXq1awCeV96ejpyc3MRHByMVatWYc2aNQgLC8OIESMQEREhu6ZQKIT5ewuU2djYyLonpqamyiVWpe+XvleRoKAgrFixQiX3Qoi6sF0HiOo3qQ2ofte80jWfAOD4sZIplP0nG3EZEnmPSCSCSCSSbdOHB6Q2eL+e6unpQU9PT27fq1evIJFIyn1uf1DBrIMtW7bErl270L59e2RlZeG7775D165dcffuXTRs2LDKuCZPnoxz585hwoQJaNCggUrGXLJKrszMzHDmzBlcvHgRcXFxyM3NRceOHeUyzeqSSqUAgA8//BBz5swBALi6uuLy5csIDQ1Fr169VHat8ixcuBCBgYGy7ezsbDg40MBqolkex7BbB4jqN6kNqH7XvPLWfPKfzFEwtQDDMNi1Mx83bojh7l4y06C6J8SgDw9IrfLwIQCU+R28bNkyLF++vNrFe3p6wtPTU7bdtWtXtG7dGlu3bsXXX39d5fknT57E8ePH0a1bt2rHUopVcpWUlAQbGxt0794d3bt3l+1nGAbJyclo1KhRtQOzsrKCjo4OnJ2d5fa3bt0aFy9eBADY2tqiqKgImZmZcq1XaWlpsr6Xtra2ZWYZKZ1NsLL+meVl1IRoGqlEyuo8qt+kNqD6XfPc3XVlLVYArflUFS5a+ujDA1Kr5OUBAJKTk2FqairbXd7vaCsrKwgEgjKzfr/7XF8VXV1ddOjQAf/++69Cx1tYWMDS0lKhYxXFasyVk5MTOnbsiEePHsntT09PR+PGjVUSmFAoRKdOnRAfHy+3PyEhAY6OjgAANzc36OrqIjw8XPZ+fHw8kpKSZFmsp6cnbt++LTfLyJkzZ2BqalomcSOktuELaMpgor2ofte8T/0NsWy5CYYM1cey5Sa05lMVymvpUzc9PT2YmprKvQjRdO/X2fKSK6FQCDc3N7nneqlUivDwcLnWqcpIJBLcvn0bDRo0UOj4r7/+GkuXLkV+fr5iN6IAVi1XQEkLUufOnXH48GH07dtXtl+Zwa+5ublymWViYiJiY2NhaWmJRo0a4csvv8SYMWPQs2dP9OnTB2FhYTh69CgiIyMBlHRP9Pf3R2BgICwtLWFqaorPP/8cnp6e6NKlCwCgf//+cHZ2xoQJE7B27VqkpqZi8eLFmDFjBn2ySWq9xh3McOvUS67DIEQtqH7XvNI1n6groGLc3HRw/Nh/2x07sn6sIoSgZC1dX19fuLu7o3PnzggJCUFeXp5s9sCJEyfC3t4eQUFBAICVK1eiS5cuaNasGTIzM/Htt9/i6dOnmDxZsV9i69atw6NHj2BjYwMnJyfo6sq31sfExCh9D6x+C/B4PGzevBkHDhzA4MGDsXbtWsycOVP2nqJu3LiBPn36yLZLm7l9fX2xZ88eDB8+HKGhoQgKCsLMmTPRsmVL/P7773JdEb///nvw+XyMHDkSIpEI3t7e2Lx5s+x9gUCAY8eOYdq0afD09ISRkRF8fX2xcuVKNrdOiFa7+HMy+k52okU0iWaoI/XwQoHyXel7GCSpIZK6qTrjpt4/jn53ElI9Y8aMwcuXL7F06VKkpqbC1dUVYWFhskkukpKSwOf/1/HuzZs3mDJlClJTU2FhYQE3NzdcvnxZ4d5pH330kcrvgcewmGeVz+cjNTUV1tbWOHnyJMaNG4fRo0dj6dKlcHJygkQiUXmgXMvOzoaZmRmCr/eCvjF9MkU0w7fDr+L5g1wAJeu+se0eUlq/AWD4wuboNbH64yYJqa49c24jNqykS7cq6nfftvOgI/ivx0KBgwlybXVQYA0UWrOccty6EHb1MwEA7S1foLvpQ3blKInL5IpNMqgK6rrnnTvyZOOmAGDZchOFx01Nm5opN0ZtyFB9bN5irvC1c3KkaNM6XSX1uzplEKIu2U+ewKxx4zpVP1mNuXrXwIEDcfnyZURERGDIkCGqiIkQwqHEm+xmaCNE1Rp3MOM6BFIHVGfclLu7fBcimgCEkPeoeLIIdcjMzMSOHTuwcOFCZGRkACjpDvj8+XNW5bFqgunVqxeEQqFs29nZGVevXsWIESNowUFCalCnj2zxPFixGXEURQ+0RFP0nOAAsUjCaq0rQhRVnRkSSyf8iI4Ww81NlyYAIeR9r15xHUGl4uLi4OXlBTMzMzx58gRTpkyBpaUljhw5gqSkJOzbt0/pMlklVxEREWX21atXD+fOnWNTHCGEpV4TG6G4SKqyh88hgU3QcwJN6Us0A4/HQ/dxDpRcEbWqToJEE4AQUoVnz7iOoFKBgYGYNGkS1q5dCxMTE9n+QYMGYfz48azKVDi5ys7OlvWVrGo18LrSp5IQQgjRJlwsiss1SpAIqbuuX7+OrVu3ltlvb2+P1NRUVmUqnFxZWFggJSUF1tbWMDc3L/eXLcMw4PF4WjmhRSmaTY1okvP7k1X6qf6x9Y+hoydAb5rQgpA6iYtFcesqhmGwf6/q1tYhhChPT0+v3EajhIQE1K9fn1WZCidX//zzj2wF4/K6BdYVx9Y/hq6egGZTIxpBHZNPXP8zhZIrQjTchYJGapk9r7zJHahFRz127cxHcHAu12EQUqcNGzYMK1euxOHDhwGUtGQnJSVh/vz5GDlyJKsyFU6uevXqVe73dVHizSz0msh1FISUTD5ROlW1ytCcNITUWdWZ3IEo5/1ElhCt9M44Jk20bt06jBo1CtbW1igoKECvXr2QmpoKT09PrF69mlWZrCa0CAsLg7GxsWwx302bNmH79u1wdnbGpk2bYGFhwSqY2oJmUyOaQh2zqXX6yFZlZRFCahea/a7mvJ/IEqKVmjblOoJKmZmZ4cyZM7h48SLi4uKQm5uLjh07wsvLi3WZrNa5+vLLL2X9E2/fvo3AwEAMGjQIiYmJCAwMZB1MbUCzqRFNUjqbmqoMCWxCXV4JqcNKJ3fYvMUc/pONaHyxGn3qb4gFC4y5DoMQ9aol8zB0794d06dPx7x586qVWAEsW64SExPh7OwMAPj9998xdOhQfPPNN4iJicGgQYOqFZCm6z7Ogf7YEK1F9ZsQQmoGj8fDBF9DGndFtNvt21xHUMaPP/6o8LEzZ85UunxWyZVQKER+fskMN2fPnsXEiSUDkCwtLaucpp0QQgghhBBCuPD999/Lbb98+RL5+fkwNzcHAGRmZsLQ0BDW1tY1l1x1794dgYGB6NatG65du4ZffvkFQMm0hQ0bNmRTJCGEEEIIIYSoVWJiouz7gwcPYvPmzdi5cydatmwJAIiPj8eUKVPw2WefsSqf1ZirjRs3QkdHB7/99hu2bNkCe3t7AMDJkycxYMAAVoEQQgghhBBCSE1ZsmQJNmzYIEusAKBly5b4/vvvsXjxYlZlsmq5atSoEY4dO1Zm//vNbMHBwZg6daqsmY0QQgghhBBCNEFKSgqKi4vL7JdIJEhLS2NVJquWK0V98803yMjIUOclCCGEEEIIIZqoTRuuI6hU37598dlnnyEmJka2Lzo6GtOmTWM9a6BakyuGodVICVEnhmFw8edkrsMgRC2oftc8hmGwc0cepk3NxM4defR3nBBSPbqavRD5rl27YGtrC3d3d+jp6UFPTw+dO3eGjY0NduzYwapMVt0C67J9X97G5E0u4PPVmpcSopDz+5NVuoDwxZ+T0XeyE03HTjTCORXX7/cZJOcg11b7Fr1nGAa7dubjxg0x3N1LFgJW9P/0rp35WLE8BwBkC9z6TzZSW6yEEC33WH2/w1Whfv36OHHiBBISEvDgwQMAQKtWrdCiRQvWZVJypaR7kRnYNTMOkze6ch0KIUi8maXS8o6tfwwdIR+9fR1VWi4hbFz/I4XrEGql6iRIvx4ukN/+tYCSK0IIe7VkiaYWLVpUK6F6FyVXLDy6lsl1CIQAABp3MENsWLpKy7z+ZyolV4TUYjduiOW2o6PF8J+s2LmvX0sr3SaEEG0ikUiwZ88ehIeHIz09HVKp/O+8f/75R+kyqW8bC0IDAdchEAIA6DnBAUMCm3AdBiFq0ekjW65DqJXc3eXHOLi5KT7mwdJSvvtgPcuy3Ql7GCSxC0wL0Rg1Qmq3WbNmYdasWZBIJGjbti1cXFzkXmyovOWqoKAABgYGAIAePXrIvtcmDduacB0CIQBKPnE5t1e1A/4tG+qrtDxC2Oo6rgH+XvcvpOKqj9U2DMPg1N40JMTkokVHY3j72ig8bupTf0MAJS1Wbm66sm1FfDzGUNalEABGf6z4uep2oaCRxiV21emCWVxcjF49XqktNkJI1Q4dOoTDhw9j0KBBKiuTVXI1c+ZM/Pjjj2X25+XlYciQIYiIiAAAnDhxonrRaag3Lwq5DoEQAMDKD6KQ81q1T558AU1mQTTDkq6X62RiBQCn9qZh/+qSROLqyZIlTQZMkm/JqyjZ4PF48J9spHBXwHdVJzGri27cKJLbjr5RpHBy1aXza7yi3IpoOzs7riOolFAoRLNmzVRaJqtugcePH8eyZcvk9uXl5WHAgAHlLsRVkfPnz2Po0KGws7MDj8fDn3/+KXtPLBZj/vz5aNeuHYyMjGBnZ4eJEyfixYsXcmVkZGTAx8cHpqamMDc3h7+/P3Jzc+WOiYuLQ48ePaCvrw8HBwesXbtW+Zt+B82kRjRFzquiqg9SklRC3VqIZhDlSbgOgTMJMfJ/xxJu5lZwpGqVJmabt5jDf7IR/b2rwvuPPGLFH4Hw6hX9riV1gLW10qds2rQJTk5O0NfXh4eHB65du6bQeYcOHQKPx8NHH32k8LXmzp2LH374QaVdelklV6dPn8b27dsREhICAMjJyUG/fv3A4/EQFhamcDl5eXlwcXHBpk2byryXn5+PmJgYLFmyBDExMThy5Aji4+MxbNgwueN8fHxw9+5dnDlzBseOHcP58+fxv//9T/Z+dnY2+vfvD0dHR0RHR+Pbb7/F8uXLsW3bNja3DgDo9FED1ucSokoCXdU/+FDLFdEUekZ1d3xri47G8tsdjCs4knBJ8F4V1VGiP5CVFf2uJXXAmzdKHf7LL78gMDAQy5YtQ0xMDFxcXODt7Y309Mon73ry5Am++OIL9OjRQ6nrXbx4EQcOHEDTpk0xdOhQjBgxQu7FBqtugU2bNkVYWBj69OkDPp+Pn3/+GXp6ejh+/DiMjBSfsnXgwIEYOHBgue+ZmZnhzJkzcvs2btyIzp07IykpCY0aNcL9+/cRFhaG69evw93dHQCwYcMGDBo0CN999x3s7Oxw4MABFBUVYdeuXRAKhWjTpg1iY2Oxfv16uSRMUUMCm6DXBAelzyNEHQbMdMSx756otMwmHc1VWh4hbH19uSsWuF+ok10DvX1tAJS0WLXoYCzbJpqlUychThwXybbd3YUKnxt11RLt275GXp46IiNEQzx9qtTh69evx5QpU+Dn5wcACA0NxfHjx7Fr1y4sWLCg3HMkEgl8fHywYsUKXLhwAZmZmQpfz9zcHMOHD1cqxqqwntCiffv2OHbsGPr16wcPDw8cO3ZM7ZNXZGVlgcfjwdzcHAAQFRUFc3NzWWIFAF5eXuDz+bh69SqGDx+OqKgo9OzZE0Lhf7/wvL29sWbNGrx58wYWFuUvICkSiSAS/fcLM/vtPP3dxzlQNwmiMfjvf2yqoIrq95DAJuhJHx4QDXHp4AtWiVVF9bs8xqnFKLDWvFVJeDweBkyyxYBJXEdCKlOdMWp7dheySqyUqd+EaIr366menh709PTk9hUVFSE6OhoLFy6U7ePz+fDy8kJUVFSFZa9cuRLW1tbw9/fHhQsXlIpr9+7dSh2vCIX/onTo0KHcpEJPTw8vXrxAt27dZPtiYmJUE907CgsLMX/+fIwbNw6mpqYAgNTUVFi/15dTR0cHlpaWSE1NlR3TuHFjuWNsbGxk71WUXAUFBWHFihWqvg1CVCpyN7uZAiuq3/ThAdEkESqu34SoWnUmD9m2NZ/VNal+k9rIwUH+g9tly5Zh+fLlcvtevXoFiUQie04vZWNjgwcPHpRb7sWLF7Fz507Exsayjq24uBiRkZF49OgRxo8fDxMTE7x48QKmpqYwNla+S7bCyZUyg8NUTSwW4+OPPwbDMNiyZUuNXHPhwoUIDAyUbWdnZ5epGIRwrSBLidHT76D6TWoDqt9Em2VmshtAT/Wb1EbJycmyxhEAZVqt2MjJycGECROwfft2WFlZsSrj6dOnGDBgAJKSkiASidCvXz+YmJhgzZo1EIlECA0NVbpMhZOr92cHrCmlidXTp0/xzz//yP3D2NralhngVlxcjIyMDNja2sqOSUtLkzumdLv0mPKU11xJiKYxMNOBOF35GQOpfpPagOo30Wbm5nykpUmVPo/qN6lV3g4ZMjU1lXuGL4+VlRUEAkG5z+3lPbM/evQIT548wdChQ2X7pNKS/1M6OjqIj49H06ZNK73mrFmz4O7ujlu3bqFevXqy/cOHD8eUKVMqv7cKsJotsFRRURGePXuGpKQkuZeqlCZWDx8+xNmzZ+VuGgA8PT2RmZmJ6Oho2b5//vkHUqkUHh4esmPOnz8Psfi/jvtnzpxBy5YtK+wSSEht0efTRlyHQIjaUP0m2uyzqbSGGKkDWrZU+FChUAg3NzeEh4fL9kmlUoSHh8PT07PM8a1atcLt27cRGxsrew0bNgx9+vRBbGysQi26Fy5cwOLFi+XmZgAAJycnPH/+XOHY38VqFG9CQgL8/f1x+fJluf0Mw4DH40EiUWxtktzcXPz777+y7cTERMTGxsLS0hINGjTAqFGjEBMTg2PHjkEikcjGUVlaWkIoFKJ169YYMGAApkyZgtDQUIjFYgQEBGDs2LGwe7to2fjx47FixQr4+/tj/vz5uHPnDn744Qd8//33bG6dEI3Se2IjSIqkOLb+MdehEKJyVL+JNvOfbIQiERAcXDNrmBFSGwQGBsLX1xfu7u7o3LkzQkJCkJeXJ5s9cOLEibC3t0dQUBD09fXRtm1bufNLJ717f39FpFJpuXnLs2fPYGJiwuoeWCVXfn5+0NHRwbFjx9CgQQPWA+Bv3LiBPn36yLZL+xD7+vpi+fLl+PvvvwEArq6ucudFRESgd+/eAIADBw4gICAAffv2BZ/Px8iRI/Hjjz/KjjUzM8Pp06cxY8YMuLm5wcrKCkuXLmU1DTshhBBC1INhGJzam4aEmFy06Fgy/bw2T7DD4/EwwdeQkiui3W7dUurwMWPG4OXLl1i6dClSU1Ph6uqKsLAw2SQXSUlJ4POr1fFOTv/+/RESEiJb/5bH4yE3NxfLli3DoEGDWJXJKrmKjY1FdHQ0WrVqxeqipXr37l3pisiKrJZsaWmJgwcPVnpM+/btlZ6akZDa4Pz+ZPpUn2gtqt91y6m9adi/umRowdWTGQCAAZMqHhtNCKkFFHiWf19AQAACAgLKfS8yMrLSc/fs2aPUtdatWwdvb284OzujsLAQ48ePx8OHD2FlZYWff/5ZqbJKsUqunJ2d8erVK1YXJISoTuLNLK5DIERtqH7XLQkx8i04CTdzaZ0vQohaNWzYELdu3cKhQ4cQFxeH3Nxc+Pv7w8fHh/X6vaySqzVr1mDevHn45ptv0K5dO+jq6sq9X9VsIIQQ1XDqYIbYsPSqD1TQxZ+T0Xeyk1Z3xSG1R2MV129SNYZhsGtnPm7cEMPdvWRR3Jr6fdCio7GsxQoAWnRQfn0ZQghRlo6ODj755BPVlcfmJC8vLwBA37595fYrO6FFbUQPn0QZ/7yqXtfZqjzKLQTwUGXlHVv/GLp6AvSaSLO0Ee71+KQh4qNe415kRtUHE5XYuSMPK1eUtCAdP1YIhmEweUrNJDneviVjKhJu5qJFB2PZNiGEqFN8fDw2bNiA+/fvAwBat26NgIAA1sOfWCVXERERrC6mDejhs3ZRd3LDtYzbKSovM/FmFnpNVHmxRENp8v+RR4djKbGqYb/9Wii//VthjSVXPB4PAybZsuoKyGWLGyGkEkpMxc6F33//HWPHjoW7u7tsuvcrV66gXbt2OHToEEaOHKl0maySq169erE5TWvQw6fiNPnBTRswUuUHilalcQczlZdZm1Ed5k7yyXiuQ6jzakt6smtnPlYszwFQ0uIGlEx1TgjhGMtxSzVl3rx5WLhwIVauXCm3f9myZZg3b17NJVfnz5+v9P2ePXuyKbbW0PSHT3oYrDt4AtVNRwoAQwKboOeEsovuUZ0i3FD9hwekcqM/NpAlKQAwarRmPxiVunFDLLcdHS2G/2SOgiGE/CcpiesIKpWSkoKJE8u2mHzyySf49ttvWZXJKrkqXWPqXe82v2vzmKv3Hz7poZNwydzZGikRj1RWXuQfb8AM6w+BQKCyMglhy2FgK9zdcInrMOqUT/0NAZQkJ25uurJtTefuritrsQIANzfdSo7WDBKJBB8Ne811GISoV4Zmd+3u3bs3Lly4gGbNmsntv3jxInr06MGqTFbJ1Zs3b+S2xWIxbt68iSVLlmD16tWsAqktJP37IuK1kOswCAEAPFNxt6ncxDc4738YffaMU2m5hLDRZLQLpEUS3N96hetQ6gwejwf/yUa1rtWnNiaFA70z8PChlOswCKnThg0bhvnz5yM6OhpdunQBUDLm6tdff8WKFSvw999/yx2rCFbJlZlZ2W5x/fr1g1AoRGBgIKKjo9kUWys8+eM2mvl0pIGyRCPkPstUfZnJqi+TEDZ4PB6chrej5EpJdXFyh9qYFCYmam8vH0Jqi+nTpwMANm/ejM2bN5f7HgClZkNnlVxVxMbGBvHx2j0A+f7WKxDo6aDJaBeuQyEEPAEfDFT8yScNcyEagmEYPPnjNtdh1DpcTe7AMAxO7U1DQkwuWnQsmUpd25O66tDVBUQirqMgpG6TSlXfeswquYqLi5PbZhgGKSkpCA4Ohqurqyri0mgZd1IpuSIagcdXw4MLPQwRDZH4Wxy1WrHA1eQOp/amYf/qksHrpYsBD5hkq/4L11LGxjzk5tKnWUTLWVtzHYHCCgsLoa+vX+1yWE015urqig4dOsDV1VX2/aBBg1BUVIQdO3ZUOyhNZ9mW/lgQzaBjoPpB28YOmj0bJqk7XqthHTdtc6Gg7JqL7u7yvxdqanKHhJhc+e2buRUcqT0YhsHOHXmYNjUTO3fkgWEUT5ZcXGn8NqkD7Oy4jqBSEokEX3/9Nezt7WFsbIzHjx8DAJYsWYKdO3eyKpNVy1ViYqLcNp/PR/369VWS7Wm61p91QeNR7bkOgxAAgHlrG6RdSKz6QAUZN7ZAz50fq6w8QqqDkdBgfza4mtyhRUdjWYsVALToUDOLD3OpOl0wt24zg/+nmQg/W6S2+AjhXE5O1cdwaPXq1di7dy/Wrl2LKVOmyPa3bdsWISEh8Pf3V7pMVsmVo6Mjm9O0gtPwdtSHnGgMvkC1dbHHllE0DTvRGGrp9loHcDW5g7evDYCSFqsWHYxl29qsOl0w+Xw+ftxgjjat09UQGSEa4pHqlotRh3379mHbtm3o27cvpk6dKtvv4uKCBw8esCqT9YQW4eHhCA8PR3p6epnBYLt27WJbLCFECfXa2yEl8jHXYRCiFiqv389SAd47XbEaNVBd2QQ8Hg8DJtliwCSuI6k5tXF9LULIf54/f15mjSugZKILsVhczhlVY5VcrVixAitXroS7uzsaNGhQp1pyaCp2okkaj2oPiaiYBv0TreQ0sh3SbyQj/fJTrkMhWqw6U9fXxvW1CCH/cXZ2xoULF8r0yvvtt9/QoUMHVmWySq5CQ0OxZ88eTJgwgdVFazOaip1oEloHiGizJ7/fpsSKqF11xk3VxvW1CCH/Wbp0KXx9ffH8+XNIpVIcOXIE8fHx2LdvH44dO8aqTFazBRYVFaFr166sLqgNMu6kch0CIYRovQyaLZDUgPLGTRFCVERXs7vKfvjhhzh69CjOnj0LIyMjLF26FPfv38fRo0fRr18/VmWySq4mT56MgwcPsrqgNqCp2AkhRP0s29GYKKJ+XE1dT0id0KYN1xFUqUePHjhz5gzS09ORn5+Pixcvon///qzLU7hbYGBgoOx7qVSKbdu24ezZs2jfvj1038tK169fzzogTUdTsRNCSM2gMYWkJtC4KUKIKimcXN28eVNu29XVFQBw584duf3aPtEDTcVOCCE1g8YUkppA46YIUaO7d7mOoAwLCwuFn+UzMjKqPug9CidXERERSheuChKJBMuXL8dPP/2E1NRU2NnZYdKkSVi8eLHsB8MwDJYtW4bt27cjMzMT3bp1w5YtW9C8eXNZORkZGfj8889x9OhR8Pl8jBw5Ej/88AOMjbV/kUNCCCFEG1woaIQeBklch0EIURTL6czVKSQkRPb969evsWrVKnh7e8PT0xMAEBUVhVOnTmHJkiWsymc15iorK6vcTC4jIwPZ2dmsAqnImjVrsGXLFmzcuBH379/HmjVrsHbtWmzYsEF2zNq1a/Hjjz8iNDQUV69ehZGREby9vVFY+N/aEz4+Prh79y7OnDmDY8eO4fz58/jf//6n0lgJIYQQQggh7G3atAlOTk7Q19eHh4cHrl27VuGxR44cgbu7O8zNzWFkZARXV1fs37+/0vJ9fX1lr0uXLmHlypX4+eefMXPmTMycORM///wzVq5ciXPnzrGKn1VyNXbsWBw6dKjM/sOHD2Ps2LGsAqnI5cuX8eGHH2Lw4MFwcnLCqFGj0L9/f9kPmmEYhISEYPHixfjwww/Rvn177Nu3Dy9evMCff/4JALh//z7CwsKwY8cOeHh4oHv37tiwYQMOHTqEFy9eKBXPkz9ug2EYld4jIZqC6jchhNQMhmGwf28+12EQolF++eUXBAYGYtmyZYiJiYGLiwu8vb2Rnp5e7vGWlpZYtGgRoqKiEBcXBz8/P/j5+eHUqVMKXe/UqVMYMGBAmf0DBgzA2bNnWd0Dq+Tq6tWr6NOnT5n9vXv3xtWrV1kFUpGuXbsiPDwcCQkJAIBbt27h4sWLGDhwIAAgMTERqamp8PLykp1jZmYGDw8PREVFAShp3jM3N4e7u7vsGC8vL/D5fKXjvb/1ChJ/i6vubRGikah+E0JIzdi1Mx/Bwblch0GIRlm/fj2mTJkCPz8/ODs7IzQ0FIaGhti1a1e5x/fu3RvDhw9H69at0bRpU8yaNQvt27fHxYsXFbpevXr18Ndff5XZ/9dff6FevXqs7oHVIsIikQjFxcVl9ovFYhQUFLAKpCILFixAdnY2WrVqBYFAAIlEgtWrV8PHxwcAkJpasuaUjY2N3Hk2Njay91JTU2FtbS33vo6ODiwtLWXHvE8kEkEkEsm23+3umHEnlRYRJrUa1W+izSqr34RoivfX11IU1W9SqzRtCqBsPdXT04Oenp7cvqKiIkRHR2PhwoWyfXw+H15eXrIGk8owDIN//vkH8fHxWLNmjULhrVixApMnT0ZkZCQ8PDwAlDQihYWFYfv27QqV8T5WLVedO3fGtm3byuwPDQ2Fm5sbq0AqcvjwYRw4cAAHDx5ETEwM9u7di++++w579+5V6XXeFxQUBDMzM9nLwcFB9h6tc0VqO6rfRJtVVr8J0RTvr6+lKKrfpFYxMQEAODg4yNXboKCgMoe+evUKEomk0gaT8mRlZcHY2BhCoRCDBw/Ghg0bFF4AeNKkSbh06RJMTU1x5MgRHDlyBKamprh48SImTZqk+H2+g1XL1apVq+Dl5YVbt26hb9++AIDw8HBcv34dp0+fZhVIRb788kssWLBANparXbt2ePr0KYKCguDr6wtb25IHwbS0NDRo8N+Ck2lpabLp4m1tbcv01SwuLkZGRobs/PctXLhQbm2v7OxsODg4wLqrI5xGtlPlLRJS4yqq37SOG9EkDMPgyR+3lT6vovpNqsYwDHbtzMeNG2K4u5es+UTLj6jHp/6GEBUySncNpPpNapW3cxskJyfD1NRUtvv9VqvqMDExQWxsLHJzcxEeHo7AwEA0adIEvXv3Vuh8Dw8PHDhwQGXxsEquunXrhqioKHz77bc4fPgwDAwM0L59e+zcuVNu+nNVyM/PB58v38AmEAgglUoBAI0bN4atrS3Cw8NlyVR2djauXr2KadOmAQA8PT2RmZmJ6OhoWcvaP//8A6lUKmsCfF95zZUAkH75KZ78fpu6TZFaraL6Teu4EU2S+FscqzWuKqrfpGq7duZjxfIcAMDxYyUz7vpPNuIyJK3F4/EwwddQ6eSK6jepVd42bpiamsolV+WxsrKCQCBAWlqa3P60tLQKG0OAkq6DzZo1A1CyDu/9+/cRFBSkcHKlaqy6BQIlwR84cAB3797FjRs3sGvXrjKJVXBwMDIzM6sV4NChQ7F69WocP34cT548wR9//IH169dj+PDhAEp+Oc2ePRurVq3C33//jdu3b2PixImws7PDRx99BABo3bo1BgwYgClTpuDatWu4dOkSAgICMHbsWNjZ2SkdU8adipsmCSGEqEbG7RSuQ6hz3h8HFB2teWvUEEK0k1AohJubG8LDw2X7pFIpwsPDZWtQKUIqlcqNS6xprFquFPXNN9/g448/hrm5OesyNmzYgCVLlmD69OlIT0+HnZ0dPvvsMyxdulR2zLx585CXl4f//e9/yMzMRPfu3REWFgZ9fX3ZMQcOHEBAQAD69u0rW0T4xx9/ZBUTjUkhhBD1s2zXAC8iHnEdRp3i7q4ra7ECADc3duOCCCGEjcDAQPj6+sLd3R2dO3dGSEgI8vLy4OfnBwCYOHEi7O3tZWO2goKC4O7ujqZNm0IkEuHEiRPYv38/tmzZwtk9qDW5UsV6OSYmJggJCZFbTfl9PB4PK1euxMqVKys8xtLSEgcPHqx2PDQmhRBCakbjUe0hERWz6hpI2PnU3xBASYuVm5uubJsQQmrCmDFj8PLlSyxduhSpqalwdXVFWFiYbJKLpKQkueFCeXl5mD59Op49ewYDAwO0atUKP/30E8aMGcPVLag3udJGNCaFEEJqBo/Hg9PwdpRc1SAejwf/yUbwn8x1JIQQrWBpqfQpAQEBCAgIKPe9yMhIue1Vq1Zh1apVbCJTG0qulPTkj9to5tOREiyilah+E0KI4mh2RUKq0KgR1xGUMWLECIWPPXLkiNLlU3KlpPtbr0Cgp0OzBRK1up9qU/VBb0nyC6s+SNHrUv0mhBCF0eyKhFShoIDrCMowMzNTa/mUXLGQcSeVHj6hXAJAag9Nqt9Ux4gqPzwgRNXKm12RulQS8o74eK4jKGP37t1qLV+tyVWPHj1gYGCgzktwoqhRc3roI1qL6jchhCiGZlckhLyPVXK1Z88eTJo0qcz+4uJiLFmyRDY94okTJ6oVnCayHO8F80FduA6DELWg+k0IIYqj2RUJqf1+++03HD58GElJSSgqKpJ7LyYmRunyWC0iPHPmTIwePRpv3ryR7YuPj4eHhwd+/vlnNkXWGubenWmwKtFaVL8JIURxpbMrbt5iDv/JRvT7k5Ba5scff4Sfnx9sbGxw8+ZNdO7cGfXq1cPjx48xcOBAVmWySq5u3ryJZ8+eoV27djhz5gw2bdqEjh07olWrVrh16xarQAghhBBCCCFaRMM/cNi8eTO2bduGDRs2QCgUYt68eThz5gxmzpyJrKwsVmWy6hbYtGlTXLp0CbNnz8aAAQMgEAiwd+9ejBs3jlUQhBBCCCGEEC3johkTZFUkKSkJXbt2BQAYGBggJ6dk9s8JEyagS5cu2Lhxo9Jlsmq5AoDjx4/j0KFD8PT0hLm5OXbu3IkXL16wLY4QQgghhBBCaoytrS0yMjIAAI0aNcKVKyWL1icmJoJhGFZlskquPvvsM4wePRrz58/HhQsXEBcXB6FQiHbt2uHw4cOsAiGEEEIIIYRoEQ2civ1dH3zwAf7++28AgJ+fH+bMmYN+/fphzJgxGD58OKsyWXULvHTpEq5evQqXt019tra2OHHiBDZt2oRPP/0UH3/8MatgCCGEEKLZLhQ04joEQkhtoYGLCL9r27ZtkEqlAIAZM2agXr16uHz5MoYNG4bPPvuMVZmskqvo6Gjo6emV2T9jxgx4eXmxCoQQQgghhBBCagqfzwef/19HvrFjx2Ls2LHVKpNVclVeYlWqZcuWrIMhhBBCCCGEEHWJi4tD27ZtwefzERcXV+mx7du3V7p8VskVoPoFt2qLzFPXYPlRD1rLgmglqt+EkNqIYRjs2pmPGzfEcHcvWcyXfo8RQsrj6uqK1NRUWFtbw9XVFTwer9zJK3g8HiQSidLls0qufvzxRyxatAiTJk3CX3/9BT8/Pzx69AjXr1/HjBkz2BRZa2QcPAu+UBcWgz25DoUQMAyDzFPXVFZeSf3WgcXgriork5C6iGEYnNqbhoSYXLToaAxvXxt62FejXTvzsWJ5yRTKx48VAgD8JxtxGRIhBAAcHbmOoIzExETUr19f9r2qsZotUB0LbtUmhfHJXIdACAAg88QVZBw8q9IysyNjVVoeIXXRqb1p2L86CVdPZmD/6iSc2pvGdUha7cYNsdx2dLS4giMJITXKwoLrCMpwdHSUfdj19OlT2Nvbw9HRUe5lb2+Pp0+fsiqfVcuVOhbcqk30WzpwHQLhSPFzzfoktCCW1pYjRBMlxOTKb9/MxYBJ3MRSF7i768parADAzU2Xw2gIITLp6VxHUKk+ffogJSUF1tbWcvuzsrLQp0+fmusWWLrglqOjo2zBLRcXl2otuFVbWI73gvmgLgA070Gb1D16jZ2QF3tLpWUauXamuk00hrRQwHUIrLToaIyrJzP+2+5gzGE02u9Tf0MAJS1Wbm66sm1CCMdeaPaHwAzDlNtl+/Xr1zAyYvcsxCq5Kl1wq0OHDrIFt3777TfcuHEDI0aMYBVIbWHSvg8kL/S5DoMQAIBpzx5gxGK8OXZCJeVZDBkEs149VVIWIbWOdWHVxyjI29cGQEmLVYsOxrJtoh48Hg/+k43gP5nrSAghtUFpvsLj8TBp0iS5mdAlEgni4uJkvfSUxSq5en/BLSsrK1y6dAnDhg3D1KlTWQVCCFEej8eDafduKkuuTLt3o0H3hKgAj8fDgEm21BVQDS4UNEIPgySuwyCE1GJmZmYASlquTExMYGBgIHtPKBSiS5cumDJlCquyWSVXfD4fRUVFiImJQXp6OgwMDGSLB4eFhWHo0KGsgiGEEEIIIcobueUyXJo0QBs7U7SxM0PrBiYw0aexZ4SUZ/fu3bKhTBs2bICxseq6brNKrsLCwjBhwgS8fv26zHts54RXRHBwMBYuXIhZs2YhJCQEAFBYWIi5c+fi0KFDEIlE8Pb2xubNm2Fj818XjKSkJEybNg0REREwNjaGr68vgoKCoKPDepkvQgghhBCNEZ+ag4dvJPgt+r99jSwN4dzAFM52pmjdwBStbE3Q0MKAeiiQmmNqynUEFWIYBgcOHMBXX32F5s2bq6xcVlOxf/755/j444+RkpICqVQq91JXYnX9+nVs3bq1zErJc+bMwdGjR/Hrr7/i3LlzePHihdy4L4lEgsGDB6OoqAiXL1/G3r17sWfPHixdulQtcRJCCCGE1LSQsa6Y2bc5vFpbo4FZydjwpIx8hN1NxfozCZiy7wZ6rI1Au+WnMXLLZXz1x23svfwEUY9eIyOviOPoidZq0kTpUzZt2gQnJyfo6+vDw8MD165VvJ7n9u3b0aNHD1hYWMDCwgJeXl6VHv8uPp+P5s2bl9tYVB2smm7S0tIQGBgo1zqkTrm5ufDx8cH27duxatUq2f6srCzs3LkTBw8exAcffACgpJmvdevWuHLlCrp06YLTp0/j3r17OHv2LGxsbODq6oqvv/4a8+fPx/LlyyEUCmvkHgghhBCiOS4UNKrymNo0tsurtQ1M32kleJNXhPsp2biXko27L7JxPyUbj17mIldUjOinbxD99I3c+VbGemhmbYRm1sZoVt8YzaxN0MzaGDametTSRdgTK7fm3C+//ILAwECEhobCw8MDISEh8Pb2Rnx8fJnp0gEgMjIS48aNQ9euXaGvr481a9agf//+uHv3Luzt7au8XnBwML788kts2bIFbdu2VSrWirBKrkaNGoXIyEg0bdpUJUFUZcaMGRg8eDC8vLzkkqvo6GiIxWLZeC8AaNWqFRo1aoSoqCh06dIFUVFRaNeunVwi6O3tjWnTpuHu3bvo0KFDjdwDIYQQQjSDIolVbWdhJETXZlbo2sxKtk8skSLxVR7up2QjIS0H8am5SEjLQVJGPl7livAqV4QrjzPkyjESCtCkvjGa1DdCY6uSl2M9IzjVM4S5IX1ATapw965Sh69fvx5TpkyBn58fACA0NBTHjx/Hrl27sGDBgjLHHzhwQG57x44d+P333xEeHo6JEydWeb2JEyciPz8fLi4uEAqFchNbAEBGRkYFZ1aMVXK1ceNGjB49GhcuXEC7du2gqys/YHLmzJlsii3XoUOHEBMTg+vXr5d5LzU1FUKhEObm5nL7bWxskJqaKjvm/Ra20u3SY8ojEokgEolk29nZ2WxvgRCNQ/WbaDOq30RVNHFmwurUb10BHy1sTNDCxkRuf56oGP+m55a8XubKvk/KyEdekQS3n2fh9vOsMuWZGeiikaUhGlkawsHSEA6WBmhoYQh7c33YmRvAUEhj20mJ9+upnp6e3PTnAFBUVITo6GgsXLhQto/P58PLywtRUVEKXSc/Px9isRiWlpYKHV86h4Mqsar1P//8M06fPg19fX1ERkbKNRfzeDyVJVfJycmYNWsWzpw5A339ml1bKigoCCtWrKjRaxJSU6h+E21G9Vt96kKLj6ZTR/020tOBi4M5XBzM5fYXFUuRlJGHRy/z8OhlLp6+yseT13l4+jofqdmFqGlFbAAAIk5JREFUyCoQV5h4AYCFoS7szA3QwEwftmb6aGBmAFtTfVib6sHaRB/WJnowN9Slbod1gIODg9z2smXLsHz5crl9r169gkQiKbdR5MGDBwpdZ/78+bCzs5Pr1VYZX19fhY5TBqvkatGiRVixYgUWLFgAPp/VnBgKiY6ORnp6Ojp27CjbJ5FIcP78eWzcuBGnTp1CUVERMjMz5Vqv0tLSYGtrCwCwtbUtM7AtLS1N9l5FFi5ciMDAQNl2dnZ2mYpB1M/wOf3CrYpEpPzPiOq35qO6X4LqN1G12p4g1mT9Furw3469MinzXkGRBE8z8pCcUYCkjHwkZ+QjKSMfLzIL8PxNAXJExXiTL8abfDHuvqi4dU0o4KOesbDkZaQHK2M9WBrpwtxQCAtDISwMS743M9CFmaEuzAx0YSQUUEJWyyQnJ8uNCXy/1UoVgoODcejQIURGRrJqlCksLERRkfzkLqYsZjtklVwVFRVhzJgxak2sAKBv3764ffu23D4/Pz+0atUK8+fPh4ODA3R1dREeHo6RI0cCAOLj45GUlARPT08AgKenJ1avXo309HTZQLgzZ87A1NQUzs7OFV67vOZKADB4wYNAj/5Dk9qN6jfRZhXVb0K0gabUbwOhAK1sTdHKtvyHz6wCMZ6/KUBqdgFSsgqRklmIlKxCpGUXIj2nEOk5ImTmi1EkkZa8n1Wo8LUFfB6M9XRgrKcDE/2Sl6GwZNtQKIDR268GugIYCAXQ1y198aGv89/3Qp23L4H897qyF4+SOBUxNTWtMlGxsrKCQCCQNYKUerfRpCLfffcdgoODcfbs2TIzi1cmLy8P8+fPx+HDh8udNZDNLOiskitfX1/88ssv+Oqrr9icrjATE5MyM3cYGRmhXr16sv3+/v4IDAyEpaUlTE1N8fnnn8PT0xNdunQBAPTv3x/Ozs6YMGEC1q5di9TUVCxevBgzZszQiF9OhBBCCCHaxsygpJXJ2a7iB2pRsQSvcovwOleE17lFePl2Uo3MfDHe5BXhTX4R3uSLkZlfhKyCYmQVFEEsYSCRMsgqECOrQLmZ6NjQFfCgw+dDR8CDroAPHf7brwKe7HsBnwedt+/p8HnQEfAg4JdsC97u++/r2/0CHnRLtwXlH6f7zn6dt9fRfVu27ttjS2MR8HkQCviyOEqTw3cTRR1BafJYcnyNJI7t2il8qFAohJubG8LDw/HRRx8BAKRSKcLDwxEQEFDheWvXrsXq1atx6tQpuLu7KxXevHnzEBERgS1btmDChAnYtGkTnj9/jq1btyI4OFipskqxSq4kEgnWrl2LU6dOoX379mUmtFi/fj2rYNj4/vvvwefzMXLkSLlFhEsJBAIcO3YM06ZNg6enJ4yMjODr64uVK1fWWIyEqAvDMMiIvcR1GISoBdXv2oVhGJzam4aEmFy06GgMb18bjfvUn02XQE2c1EJb6OkIYG9uAHtzg6oPRkkdKxBLkF1QjFyRGDmFxcgVFSOnsBh5omLkF0mQKyr5vkAsQaFYgoIiCQrEEhSIpSgUSyAqlkL09r2iYimKJFKIxFKIJFIUFUvLXFMsYSCWSAD153E1iscrmeBE1mon4ENXhydrudPT+a81T09HAKGADz3d//br6QigV/r17X59XUGZr+KCXKXiCgwMhK+vL9zd3dG5c2eEhIQgLy9PNnvgxIkTYW9vj6CgIADAmjVrsHTpUhw8eBBOTk6yyeqMjY1hbGxc5fWOHj2Kffv2oXfv3vDz80OPHj3QrFkzODo64sCBA/Dx8VHyJ8syubp9+7ZsCvM7d+7IvafuX6SRkZFy2/r6+ti0aRM2bdpU4TmOjo44ceKEWuMihAsZMReQfv4412EQohZUv2uXU3vTsH91SRJy9WTJ9MUDJlXelYcQZfB4PBgKdd7OQqj6ic4YpqRVrEgihbi45GuxtOR7sVQKsUSKYgmDYikDiVQqa0UTS6Rvv5ZsF0ulb78yKJYwkDAMJBJpybb07TGSt2VIGUjf3S999xqM7JoShkFxaRmSkuPEEvnjiyXy+8Rvt8Vvz5O/15IJS4qKpYCogh+ICjRM/Vep48eMGYOXL19i6dKlSE1NhaurK8LCwmSTXCQlJckNS9qyZQuKioowatQouXLKmzCjPBkZGWjydqFjU1NT2dTr3bt3x7Rp05SKvRSr5CoiIoLVxbRBRuwlWHX+QOM+jSN1U/7zRK5DIERt8p/V3vpdG1pxVC0hRv4T6oSbuRgwiZtYCGGDxyvp0qcj4ANatoQXwzCyREv8tpWu6J3kq3S7NOF6d19py55ILPlv++1xhcWSt9sSFL79+u62TSYDZfsfBAQEVNgN8P1GlidPnrD6eZRq0qQJEhMT0ahRI7Rq1QqHDx9G586dcfTo0TJLPSmKFiBQUvr54+Dr6KKeW0+uQyEEhvaNkR1/i+swCFELhinbRae2qIutOC06GsvuFQBadKi6S05d8363xPwCCYB0boIhdQqPx4NQhwehjnono3tf9nlg/9YavaRS/Pz8cOvWLfTq1QsLFizA0KFDsXHjRojFYtbDnCi5YiH/+RNKrohGsOzYA9JiMXWdItqpFrf0aGMrTlXjlbx9S7rtJNzMRYsOxrJtbUDjrgjRTnPmzJF97+XlhQcPHiA6OhrNmjVTatbBd1FyxYKhvRPXIRACoOSTKEvXbpRcEa1k1LAJchLiuA6DlbrYisPj8TBgkm2tTyLVpbavr0WINpFKpfj222/x999/o6ioCH379sWyZcvg6OgIR0fHapVNyZWSrHsOhmXHHlyHUasYP6+9XXtqg2Jx3f35Ut3SbkY23aDrUoTkW7VvQiJNb8WhB/2aRT9vUmc1bMh1BOVavXo1li9fDi8vLxgYGOCHH35Aeno6du3aVe2yKblSkoONJ3ReMACYKo8lpLYxSpFCR5cSFqIZeDwerFt0rZXJFbXiaJ7qJjjUNZAQFqysuI6gXPv27cPmzZvx2WefAQDOnj2LwYMHY8eOHXKzEbJRs6PaCCGEEMIpakWpWfTzJnVaRkbVx3AgKSkJgwYNkm17eXmBx+PhxYsX1S6bkitCCCGkFqOHd0KIxkrSzNbe4uJi6OvLr5Wmq6sLsbj6q0VTt0BCCCGEEAVQIkuIdmAYBpMmTYKenp5sX2FhIaZOnQojIyPZviNHjihdNiVXhBBCCCGEkDrD19e3zL5PPvlEJWVTckUIIYTUEdTyQgghwO7du9VWNiVXhBBCCEcuZjdndV5304cqjoQQQtTgnS52dQUlV6QMk8QCrkMgSiguLlRZWSZPCqCjQ8sMEM2hyvqtadgmVoQQUms0r3u/5yi5UhI9fBJCCCGEEELKQ1OxE0IIITWMWq0IIXVCbCzXEdQ4Sq4IIYQQQgghRAUouSKEEELqgLo8U2BdvndCSM2i5IoQQgipQdQlkBBCtBdNaEEIIYSQWoVaogghmoqSK6LVdB495zoE9ZMWcR0BISpR7v9Xqt+EEFJ7tWrFdQQ1jpIrJekkpkCHL+Q6DELUguo30WoNbQGBntyuXFv6M0gIIWqjr891BDWOxlwRQgghNYTGWxFC6pQnT7iOoMZRckUIIYQQQghRvcxMriOocZRcEUIIIYQQQogKUGdzJUVl/IGuFqMgEAi4DoUQiEQiRGTtVll5Z1/txgfmfhAKadwV4V5xcTHOZfyssvKSXt1AY+uu4PF4Ch3PMAyyw6IgSngKvRaOMB3gqfC51cUwDG7/HI/UWy9h61If7ca1VCruXTvzceOGGO7uuvjU31Cpc0/tTUNCTC5adDSGt69Njd4z22vX1nPPHkxT6FhCSO1ByZWCGIYBAORJ3+Dym9/gaTmc44gIgVxiVVpH2fjvXAb/ZO6Gl5VfNSMjpPrOZfwMMQoAqKZ+P0z9BwDQyMpd9p6kqBASESAtZID8Qrnzss9cxZtfTgMA8q7eAVMkhmk/jzLlF+eJAABFQjH+yXWqIhqxQjHf+TUBVzfGAgAenU1CsUiCtqNbyN7P50nkjs8plsq+3783H8HBuQCA48cKISpkMMHXEPkF8ueU5+zBNPzy3TMAwNWTGSgSSeE13kahmKurOteu7eeqon5nZ2ezLoMQdcnOywNQvTpe2/CYunS31fDs2TM4ODhwHQYhFUpOTkbDhg1ZnUv1m2g6qt9Em1H9JtquOnW8tqHkSkFSqRQvXryAiYlJjXWRIEQRDMMgJycHdnZ24PPZDaOk+k00Fdf1Ozs7Gw4ODkhOToapqSmr67PF1bXpnmvuulzXb0LUTRV1vLah5IoQQgipQHZ2NszMzJCVlcVJosHFtemea/aeCSHapW6kkIQQQgghhBCiZpRcEUIIIYQQQogKUHJFCCGEVEBPTw/Lli2Dnp5enbk23TMhhLBHY65IlRiGwWeffYbffvsNb968wc2bN+Hq6qrWa/J4PPzxxx/46KOP1HodQrji5OSE2bNnY/bs2RpZHiGEEEKURy1XpEphYWHYs2cPjh07hpSUFGRnZ2Po0KGws7MDj8fDn3/+We559+/fx7Bhw2BmZgYjIyN06tQJSUlJNRs8IYQQQgghNYSSK1KlR48eoUGDBujatStsbW2Rl5cHFxcXbNq0qdJzunfvjlatWiEyMhJxcXFYsmQJ9PX1azByQgghhBBCag4lV6RSkyZNwueff46kpCTweDw4OTlh4MCBWLVqFYYPH17heYsWLcKgQYOwdu1adOjQAU2bNsWwYcNgbW3NKo5ly5ahQYMGiIuLw1dffQUPD48yx7i4uGDlypWsyie1n5OTE0JCQuT2ubq6Yvny5WAYBsuXL0ejRo2gp6cHOzs7zJw5U3acSCTCF198AXt7exgZGcHDwwORkZEKXXfPnj0wNzfHsWPH0LJlSxgaGmLUqFHIz8/H3r174eTkBAsLC8ycORMSiaTCcpKSkvDhhx/C2NgYpqam+Pjjj5GWliZ3zNGjR9GpUyfo6+vDysqq0v+DO3bsgLm5OcLDwxW6D0IIIYRUHyVXpFI//PADVq5ciYYNGyIlJQXXr1+v8hypVIrjx4+jRYsW8Pb2hrW1NTw8PCrsPlgZhmHw+eefY9++fbhw4QLat28PHx8fXLt2DY8ePZIdd/fuXcTFxWH8+PFKX4Nov99//x3ff/89tm7diocPH+LPP/9Eu3btZO8HBAQgKioKhw4dQlxcHEaPHo0BAwbg4cOHCpWfn5+PH3/8EYcOHUJYWBgiIyMxfPhwnDhxAidOnMD+/fuxdetW/Pbbb+WeL5VK8eGHHyIjIwPnzp3DmTNn8PjxY4wZM0Z2zPHjxzF8+HAMGjQIN2/eRHh4ODp37lxueWvXrsWCBQtw+vRp9O3bV4mfFCGEEEKqQ4frAIhmMzMzg4mJCQQCAWxtbRU6Jz09Hbm5uQgODsaqVauwZs0ahIWFYcSIEYiIiECvXr0UKqe4uBiffPIJbt68iYsXL8Le3h4A0KZNG7i4uODgwYNYsmQJAODAgQPw8PBAs2bN2N0o0WpJSUmwtbWFl5cXdHV10ahRI1likpSUhN27dyMpKQl2dnYAgC+++AJhYWHYvXs3vvnmmyrLF4vF2LJlC5o2bQoAGDVqFPbv34+0tDQYGxvD2dkZffr0QUREhFzCVCo8PBy3b99GYmIiHBwcAAD79u1DmzZtcP36dXTq1AmrV6/G2LFjsWLFCtl5Li4uZcqaP38+9u/fj3PnzqFNmzbK/7AIIYQQwhq1XBGVk0qlAIAPP/wQc+bMgaurKxYsWIAhQ4YgNDRU4XLmzJmDq1ev4vz587LEqpSPjw8OHjwIoKR16+eff4aPj4/qboJoldGjR6OgoABNmjTBlClT8Mcff6C4uBgAcPv2bUgkErRo0QLGxsay17lz5+RaRytjaGgoS6wAwMbGBk5OTjA2Npbbl56eXu759+/fh4ODgyyxAgBnZ2eYm5vj/v37AIDY2NgqW6HWrVuH7du34+LFi5RYqUhNT6ibkpKCe/fu1eg1S+Xn56OoqIiTaxNCiLag5IqonJWVFXR0dODs7Cy3v3Xr1krNFtivXz88f/4cp06dKvPeuHHjEB8fj5iYGFy+fBnJycnltgiQuoPP55d5EBaLxQAABwcHxMfHY/PmzTAwMMD06dPRs2dPiMVi5ObmQiAQIDo6GrGxsbLX/fv38cMPPyh0bV1dXbltHo9X7r7SDx7YMDAwqPKYHj16QCKR4PDhw6yvQ4C8vDzk5OQgOzsbPB6vxq77/PlztGvXDosXL8aNGzdq7LoAcOfOHXz88ce4cuUKRCJRjV332bNnOHz4MI4cOYLbt2/X2HUVQSvVEELYoG6BROWEQiE6deqE+Ph4uf0JCQlwdHRUuJxhw4Zh6NChGD9+PAQCAcaOHSt7r2HDhujVqxcOHDiAgoIC9OvXj/VkGUQ71K9fHykpKbLt7OxsJCYmyrYNDAwwdOhQDB06FDNmzECrVq1w+/ZtdOjQARKJBOnp6ejRowcXoaN169ZITk5GcnKyrPXq3r17yMzMlH1I0b59e4SHh8PPz6/Ccjp37oyAgAAMGDAAOjo6+OKLL2okfm1y7949zJkzBy9fvkRaWhrWrl0LHx8fMAyj9kTr4cOHyMrKQlZWFjZs2IBZs2ahY8eOAKDW69+9exc9evTAmDFj0Lhx4xpbSPf27dsYOnQo6tevj+TkZHTu3Bnff/+9XCtwTUhISMDOnTuRnp4OV1dXDBo0CM2bNwePx6uRf3dCiHah5IooLTc3F//++69sOzExEbGxsbC0tESjRo0AAF9++SXGjBmDnj17ok+fPggLC8PRo0cVnoGt1PDhw7F//35MmDABOjo6GDVqlOw9Hx8fLFu2DEVFRfj+++9Vcm+k9vrggw+wZ88eDB06FObm5li6dCkEAgGAkhn9JBIJPDw8YGhoiJ9++gkGBgZwdHREvXr14OPjg4kTJ2LdunXo0KEDXr58ifDwcLRv3x6DBw9We+xeXl5o164dfHx8EBISguLiYkyfPh29evWCu7s7gJIZM/v27YumTZti7NixKC4uxokTJzB//ny5srp27YoTJ05g4P/bu/ugqMo2DODXWeQjlkVSkA8lFkYsAxa2IMUxdaQgzI/UEjCJYKQZ1BhA0HESYRrNJMqwpoLQlYRGM8WU0EJELRsTUDNZ84MZYRwTJyBTEoHd5/3Dlx03s6TOQsD1m9k/zlnPcz9n14G9OM/eJzISQ4YM4U2Fe0Cv12PSpEl46aWXEBwcjNraWsTHx8PPz8/iN04HbgfoadOm4dlnn0V+fj7eeecdrFixAn5+fhb7kN/W1oa0tDTExMTggw8+AAD89NNPaG9vN/uZLreGhgZERkYiNjYWK1euxOHDh5GQkIDm5uZeDVd6vR4TJkxAaGgolEolsrKyUFZWhqioKCxcuJABi4h6ThD9jfXr1wsvLy/TdlVVlQBw1yMuLs7suI0bN4rRo0cLOzs7ERgYKHbt2nXfNQGI0tJS0/a2bduEnZ2d2LFjh2lfa2ursLW1Ffb29uL69ev/9PRogLh27ZqIiooSjo6OwtPTU2zevFkEBgaKrKwsUVpaKsaNGyccHR2FUqkU48ePF/v37zcd29HRIVatWiXUarWwtrYW7u7uYvbs2eLUqVN/W1en04mhQ4ea7cvKyhKBgYFm++Li4sSsWbNM215eXmL9+vWm7YaGBjFz5kyhVCqFSqUSL7zwgrhy5YrZGDt27BBBQUHCxsZGODs7izlz5txzvEOHDgmlUik2bNjwt+dAQjQ3N4vw8HCRnJxstn/KlCni1VdfFUIIYTQaLVa/q6tLXL16VYwZM0ZcunRJ7Ny5U4SEhIjExEQxYcIEMXfuXIvUbW9vFxMnThTHjx8XXV1dIiIiQoSEhAiVSiXGjx8vCgsLLVI3Pz9fTJkyxew1nTZtmsjPzxdFRUXiwIEDFql7p1u3bokFCxaIxMRE077z58+LqKgoMX78eJGXl2fxORDRwCMJwUXFREQ0uDU1NWHmzJnIzc3Fk08+CaPRCIVCgYSEBHR0dKC4uNii9cX/r44sWLAAsbGxiIiIQHl5OeLi4nDr1i1s2LABL7/8sux1m5qaEBAQgJKSEuzduxd6vR45OTm4fPkyDhw4gC1btuC9994zWzUgh/z8fOTk5ODzzz+HVqvFmjVrkJmZibCwMFy7dg0NDQ1Yt26dRc75TuHh4fD29kZ+fr7pPWhsbERWVhbq6+uRkZGBGTNmWHQORDSwsKEFERENeq6uriguLjZ97677hs8jR46EQmH+q/LGjRuy1+9edmZlZWVaPr1z504YDAZ4enrim2++wbFjx2SvO2LECISFhWH37t04f/48UlNTodFo8MwzzyA5ORlPPfUUKisrYTAYZG3wEB4eDjc3N8ybNw/PP/88MjMzUVpaiq+//hplZWWIjo5GUVERmpubLdJYwmAwoLOzE6NGjUJLS4upiYfRaMRDDz2EzMxMdHV1oaSkRPbaRDSwMVxRryspKTFreX3ng+2j6b8mMjLynv9f7+ceWNR/+Pr6Arj9Abu726MQwqyF/tq1a1FQUGBq5S+X7gAxdepU2NraYtGiRSgvL0dtbS1Wr16NQ4cOQafTob29Xda6kiRh6dKl0Ol0+PLLL81asY8aNQqurq7Q6/VQKBSyfu/I29sbxcXFWLNmDfz9/TF37lzMmjULkiRhxIgR8PDwQGtrK5RKpax1u0OzlZUVrK2tERcXh9LSUuTn50OSJCgUChgMBvj4+GDt2rXYvn076urqZKtPRAMfG1pQr5s5cybGjRv3p8/9sX01UV8rLCzEzZs3//S5YcOG9fJsqDd0t/Xv/lDffeVq1apVWL16NU6cOIEhQ+T99dldy9vbG/Hx8XB1dUVZWRm8vb3h7e0NSZIQGBgIOzs7WesCQHBwMPbu3YvJkyejoKAAPj4+pj90dXZ2YsyYMejq6pL953P3uRUWFqKmpgYdHR2wsbEBcHu5olqtNoUhOZw7dw579uzB/Pnz4e7uDgCYPHky1q1bh9TUVNjb22PhwoWmRjgqlQoPP/wwlEqlbHMgooGP4Yp6nUqlgkql6utpEN2XP97AmgaH7nA1ZMgQeHp6Ijc3Fzk5OaipqUFgYKDF6oaGhqKwsBDBwcHQaDSmeTz33HMWqwncvkfawYMHERMTg4SEBAQEBKCjowO7d+/Gt99+a9E/fE2YMAHp6enIy8uDm5sbTp8+DZ1Oh8OHD8sWbC5cuIDQ0FC0traiubkZaWlpcHZ2BgAkJSWhra0Nr7zyChoaGjBnzhx4eXlh+/bt6OzsZLgioh5hQwsiIqJ76G604OjoiP3795ta41tSdzONvnD27FkUFxfj6NGj8PX1xaJFi+Dv72/xulVVVUhMTIRCocDIkSORl5cHjUYjy9htbW1ITk6G0WhESEgIlixZgvT0dGRkZMDFxQXA7de8uLgYy5cvh5WVFVQqFX777Tfs2bPHdK8xIqL7wXBFRER0DzU1NXjiiSdw+vRp0w2dBwOj0QgAvRryWlpa0NnZCVtbWzg5Ock27s2bN6HT6TB8+HBERUXhs88+Q3R09F0BCwAuXryIxsZG/P777wgICOCVayLqMYYrIiKiv9DW1salYf3cH9/Dbdu2ISYmBkuXLsXy5cvh7OyMrq4uXL582WI3TiaiwYHfuSIiIvoLDFb9X/d7aDAYoFAoEBUVBSEE5s+fD0mSkJKSgtzcXDQ0NOCTTz6Bvb29rF0KiWjw4JUrIiIiGjSEEBBCQKFQYNu2bYiNjYWPjw/q6+tRXV2NoKCgvp4iEfVjDFdEREQ0qHR/9JEkCWFhYTh58iQOHjyIgICAPp4ZEfV3XBZIREREg4okSTAYDMjIyEBVVRVOnjzJYEVEsuibXq9EREREfczPzw/Hjx+Xre07ERGXBRIREdGg1H2TZiIiufDKFREREQ1KDFZEJDeGKyIiIiIiIhkwXBEREREREcmA4YqIiIiIiEgGDFdEREREREQyYLgiIiIiIiKSAcMVERERERGRDBiuiIiIBrHs7GwEBQX19TRMLl68CEmScPLkyb6eChFRjzFcERERDRKSJGHXrl19PQ0iogGL4YqIiIh6pKOjo6+nQET0n8RwRURE1I9MmTIFycnJWLZsGYYNGwY3NzdkZ2f/7XFqtRoAMHv2bEiSZNrutmXLFqjVagwdOhTR0dG4fv26Wc0lS5YgJSUFzs7OiIiIAACcPn0akZGRcHBwgKurK2JjY/HLL7+Yjtu3bx8mTpwIJycnDB8+HNOnT0d9fb1Z3WPHjkGr1cLOzg7BwcE4ceKE2fOtra148cUX4eLiggceeAC+vr7Q6XQ9eMWIiHoPwxUREVE/U1RUBKVSie+//x45OTl4/fXXUVFR8ZfHVFdXAwB0Oh1+/vln0zYA1NfXY9euXSgrK0NZWRkOHTqEN998866aNjY2OHLkCD766CP8+uuvmDp1KrRaLWpqarBv3z40NTVh3rx5pmPa2tqQlpaGmpoaVFZWQqFQYPbs2TAajQCAGzduYPr06Xj00UdRW1uL7OxspKenm9XNzMyEXq/H3r17cebMGXz44Ydwdnb+V68fEZGlDOnrCRAREVHPaDQaZGVlAQB8fX3x/vvvo7KyEk8//fQ9j3FxcQEAODk5wc3Nzew5o9GIzZs3Q6VSAQBiY2NRWVmJNWvWmP6Nr68vcnJyTNurV6+GVqvFG2+8Ydq3adMmeHp64ty5cxgzZgzmzp1rVmfTpk1wcXGBXq+Hv78/Pv30UxiNRmzcuBF2dnbw8/PDpUuXkJSUZDqmsbERWq0WwcHBAHDXFTciov8SXrkiIiLqZzQajdm2u7s7rl69+o/HU6vVpmB1r/Eef/xxs+0ffvgBVVVVcHBwMD0eeeQRADAt/Tt//jxiYmLg4+MDR0dHUzBqbGwEAJw5cwYajQZ2dnamcUNDQ83qJCUlYevWrQgKCsKyZcvw3Xff/ePzJCKyNF65IiIi6mesra3NtiVJMi21s9R4SqXSbPvGjRuYMWMG1q1bd9d47u7uAIAZM2bAy8sLH3/8MTw8PGA0GuHv79+jhhiRkZFoaGhAeXk5KioqEBYWhsWLFyM3N/e+xyAi6i28ckVERDRIWFtbw2AwyDLWY489hrq6OqjVaowePdrsoVQq0dzcjLNnz2LlypUICwvD2LFj0draajbG2LFjcerUKbS3t5v2HT169K5aLi4uiIuLQ3FxMd59910UFBTIcg5ERHJjuCIiIhok1Go1KisrceXKlbuCTk8tXrwYLS0tiImJQXV1Nerr6/HVV18hPj4eBoMBDz74IIYPH46CggJcuHABBw4cQFpamtkY8+fPhyRJSExMhF6vR3l5+V1XpFatWoUvvvgCFy5cQF1dHcrKyjB27Nh/NXciIkthuCIiIhok3n77bVRUVMDT0xNarfZfjeXh4YEjR47AYDAgPDwcAQEBSElJgZOTExQKBRQKBbZu3Yra2lr4+/sjNTUVb731ltkYDg4O2LNnD3788UdotVq89tprdy0ztLGxwYoVK6DRaDBp0iRYWVlh69at/2ruRESWIgkhRF9PgoiIiIiIqL/jlSsiIiIiIiIZMFwRERENACUlJWZt0e98+Pn59fX0iIgGBS4LJCIiGgCuX7+OpqamP33O2toaXl5evTwjIqLBh+GKiIiIiIhIBlwWSEREREREJAOGKyIiIiIiIhkwXBEREREREcmA4YqIiIiIiEgGDFdEREREREQyYLgiIiIiIiKSAcMVERERERGRDBiuiIiIiIiIZPA/gcFmmEnCPq4AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "from skopt.plots import plot_objective\n", + "\n", + "plot_objective(res)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " fun: 0.10675194263458251\n", + " x: [True, True, 6, 2048]\n", + " func_vals: [ 1.373e-01 1.390e-01 ... 1.127e-01 1.138e-01]\n", + " x_iters: [[True, True, 5, 1300], [False, True, 5, 990], [True, True, 7, 1800], [False, False, 10, 1692], [False, True, 6, 1075], [True, False, 3, 291], [False, True, 3, 514], [False, False, 11, 1569], [False, False, 7, 1915], [False, True, 10, 1514], [False, False, 11, 1527], [False, False, 12, 2033], [False, True, 9, 3], [False, True, 1, 2004], [True, True, 12, 1], [False, False, 6, 2048], [False, False, 4, 2048], [False, False, 10, 1], [False, True, 11, 2048], [False, True, 9, 2048], [False, False, 8, 2017], [False, False, 6, 1], [False, True, 4, 1], [False, False, 6, 1587], [False, False, 9, 1056], [True, True, 12, 1450], [False, True, 6, 2048], [False, False, 6, 2048], [False, False, 6, 2048], [False, True, 6, 2048], [False, True, 6, 2048], [False, True, 5, 2048], [False, True, 6, 1464], [False, True, 8, 1], [True, True, 12, 1798], [True, False, 3, 2048], [True, True, 11, 683], [False, True, 11, 1], [True, True, 2, 1], [False, True, 11, 1238], [True, True, 11, 1260], [True, False, 6, 1295], [True, True, 6, 1292], [False, False, 12, 1250], [False, False, 12, 1200], [True, False, 4, 1250], [False, False, 12, 1191], [False, False, 12, 1180], [True, False, 10, 906], [False, False, 12, 1192], [True, True, 10, 2044], [False, False, 6, 1310], [False, False, 8, 1122], [True, False, 5, 4], [False, False, 7, 322], [False, False, 12, 1246], [False, False, 12, 1247], [False, False, 12, 1252], [True, True, 12, 811], [True, False, 6, 2048], [True, True, 12, 998], [False, True, 12, 1021], [False, True, 12, 1021], [False, True, 12, 1019], [True, False, 6, 759], [True, False, 6, 1064], [False, True, 12, 991], [True, True, 9, 533], [False, False, 11, 956], [False, False, 1, 3], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [False, False, 7, 986], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048], [True, True, 6, 2048]]\n", + " models: [GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097), GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5) + WhiteKernel(noise_level=1),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097)]\n", + " space: Space([Categorical(categories=(True, False), prior=None),\n", + " Categorical(categories=(True, False), prior=None),\n", + " Integer(low=1, high=12, prior='uniform', transform='normalize'),\n", + " Integer(low=1, high=2048, prior='uniform', transform='normalize')])\n", + " random_state: RandomState(MT19937)\n", + " specs: args: func: \n", + " dimensions: Space([Categorical(categories=(True, False), prior=None),\n", + " Categorical(categories=(True, False), prior=None),\n", + " Integer(low=1, high=12, prior='uniform', transform='normalize'),\n", + " Integer(low=1, high=2048, prior='uniform', transform='normalize')])\n", + " base_estimator: GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1], nu=2.5),\n", + " n_restarts_optimizer=2, noise='gaussian',\n", + " normalize_y=True, random_state=1248744097)\n", + " n_calls: 100\n", + " n_random_starts: None\n", + " n_initial_points: 10\n", + " initial_point_generator: random\n", + " acq_func: gp_hedge\n", + " acq_optimizer: auto\n", + " x0: None\n", + " y0: None\n", + " random_state: RandomState(MT19937)\n", + " verbose: False\n", + " callback: None\n", + " n_points: 10000\n", + " n_restarts_optimizer: 5\n", + " xi: 0.01\n", + " kappa: 1.96\n", + " n_jobs: 1\n", + " model_queue_size: None\n", + " function: base_minimize" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llama-cpp-python/llama_cpp/__init__.py b/llama-cpp-python/llama_cpp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..95c8819660a48e79d73a12caf4a0f7c90eeb2605 --- /dev/null +++ b/llama-cpp-python/llama_cpp/__init__.py @@ -0,0 +1,4 @@ +from .llama_cpp import * +from .llama import * + +__version__ = "0.2.69" \ No newline at end of file diff --git a/llama-cpp-python/llama_cpp/_internals.py b/llama-cpp-python/llama_cpp/_internals.py new file mode 100644 index 0000000000000000000000000000000000000000..b404601d30504a208200ff0841aa5b1ba5033b23 --- /dev/null +++ b/llama-cpp-python/llama_cpp/_internals.py @@ -0,0 +1,815 @@ +from __future__ import annotations + +import os +import ctypes + +from typing import ( + List, + Optional, + Sequence, +) +from dataclasses import dataclass, field + +import numpy as np +import numpy.typing as npt + +from .llama_types import * +from .llama_grammar import LlamaGrammar +from ._utils import suppress_stdout_stderr + +import llama_cpp.llama_cpp as llama_cpp + + +# Python wrappers over llama.h structs + + +class _LlamaModel: + """Intermediate Python wrapper for a llama.cpp llama_model. + NOTE: For stability it's recommended you use the Llama class instead.""" + + _llama_free_model = None + # NOTE: this must be "saved" here to avoid exceptions when calling __del__ + + def __init__( + self, + *, + path_model: str, + params: llama_cpp.llama_model_params, + verbose: bool = True, + ): + self.path_model = path_model + self.params = params + self.verbose = verbose + + self._llama_free_model = llama_cpp._lib.llama_free_model # type: ignore + + self.model = None + + if not os.path.exists(path_model): + raise ValueError(f"Model path does not exist: {path_model}") + + with suppress_stdout_stderr(disable=verbose): + self.model = llama_cpp.llama_load_model_from_file( + self.path_model.encode("utf-8"), self.params + ) + + if self.model is None: + raise ValueError(f"Failed to load model from file: {path_model}") + + def __del__(self): + if self.model is not None and self._llama_free_model is not None: + self._llama_free_model(self.model) + self.model = None + + def vocab_type(self) -> int: + assert self.model is not None + return llama_cpp.llama_vocab_type(self.model) + + def n_vocab(self) -> int: + assert self.model is not None + return llama_cpp.llama_n_vocab(self.model) + + def n_ctx_train(self) -> int: + assert self.model is not None + return llama_cpp.llama_n_ctx_train(self.model) + + def n_embd(self) -> int: + assert self.model is not None + return llama_cpp.llama_n_embd(self.model) + + def rope_freq_scale_train(self) -> float: + assert self.model is not None + return llama_cpp.llama_rope_freq_scale_train(self.model) + + def desc(self) -> str: + assert self.model is not None + buf = ctypes.create_string_buffer(1024) + llama_cpp.llama_model_desc(self.model, buf, 1024) + return buf.value.decode("utf-8") + + def size(self) -> int: + assert self.model is not None + return llama_cpp.llama_model_size(self.model) + + def n_params(self) -> int: + assert self.model is not None + return llama_cpp.llama_model_n_params(self.model) + + def get_tensor(self, name: str) -> ctypes.c_void_p: + assert self.model is not None + return llama_cpp.llama_get_model_tensor(self.model, name.encode("utf-8")) + + def apply_lora_from_file( + self, + lora_path: str, + scale: float, + path_base_model: Optional[str], + n_threads: int, + ): + assert self.model is not None + return llama_cpp.llama_model_apply_lora_from_file( + self.model, + lora_path.encode("utf-8"), + scale, + path_base_model.encode("utf-8") + if path_base_model is not None + else ctypes.c_char_p(0), + n_threads, + ) + + # Vocab + + def token_get_text(self, token: int) -> str: + # TODO: Fix + assert self.model is not None + return llama_cpp.llama_token_get_text(self.model, token).decode("utf-8") + + def token_get_score(self, token: int) -> float: + assert self.model is not None + return llama_cpp.llama_token_get_score(self.model, token) + + def token_get_type(self, token: int) -> int: + assert self.model is not None + return llama_cpp.llama_token_get_type(self.model, token) + + # Special tokens + + def token_bos(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_bos(self.model) + + def token_eos(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_eos(self.model) + + def token_nl(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_nl(self.model) + + def token_prefix(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_prefix(self.model) + + def token_middle(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_middle(self.model) + + def token_suffix(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_suffix(self.model) + + def token_eot(self) -> int: + assert self.model is not None + return llama_cpp.llama_token_eot(self.model) + + # Tokenization + + def tokenize(self, text: bytes, add_bos: bool, special: bool): + assert self.model is not None + n_ctx = self.n_ctx_train() + tokens = (llama_cpp.llama_token * n_ctx)() + n_tokens = llama_cpp.llama_tokenize( + self.model, text, len(text), tokens, n_ctx, add_bos, special + ) + if n_tokens < 0: + n_tokens = abs(n_tokens) + tokens = (llama_cpp.llama_token * n_tokens)() + n_tokens = llama_cpp.llama_tokenize( + self.model, text, len(text), tokens, n_tokens, add_bos, special + ) + if n_tokens < 0: + raise RuntimeError( + f'Failed to tokenize: text="{text}" n_tokens={n_tokens}' + ) + return list(tokens[:n_tokens]) + + def token_to_piece(self, token: int, special: bool = False) -> bytes: + assert self.model is not None + buf = ctypes.create_string_buffer(32) + llama_cpp.llama_token_to_piece(self.model, token, buf, 32, special) + return bytes(buf) + + def detokenize(self, tokens: List[int], special: bool = False) -> bytes: + assert self.model is not None + output = b"" + size = 32 + buffer = (ctypes.c_char * size)() + for token in tokens: + n = llama_cpp.llama_token_to_piece( + self.model, llama_cpp.llama_token(token), buffer, size, special + ) + assert n <= size + output += bytes(buffer[:n]) + # NOTE: Llama1 models automatically added a space at the start of the prompt + # this line removes a leading space if the first token is a beginning of sentence token + return ( + output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() and output[0:1] == b' ' else output + ) + + # Extra + def metadata(self) -> Dict[str, str]: + assert self.model is not None + metadata: Dict[str, str] = {} + buffer_size = 1024 + buffer = ctypes.create_string_buffer(buffer_size) + # zero the buffer + buffer.value = b'\0' * buffer_size + # iterate over model keys + for i in range(llama_cpp.llama_model_meta_count(self.model)): + nbytes = llama_cpp.llama_model_meta_key_by_index(self.model, i, buffer, buffer_size) + if nbytes > buffer_size: + buffer_size = nbytes + 1 + buffer = ctypes.create_string_buffer(buffer_size) + nbytes = llama_cpp.llama_model_meta_key_by_index(self.model, i, buffer, buffer_size) + key = buffer.value.decode("utf-8") + nbytes = llama_cpp.llama_model_meta_val_str_by_index(self.model, i, buffer, buffer_size) + if nbytes > buffer_size: + buffer_size = nbytes + 1 + buffer = ctypes.create_string_buffer(buffer_size) + nbytes = llama_cpp.llama_model_meta_val_str_by_index(self.model, i, buffer, buffer_size) + value = buffer.value.decode("utf-8") + metadata[key] = value + return metadata + + @staticmethod + def default_params(): + """Get the default llama_model_params.""" + return llama_cpp.llama_model_default_params() + + +class _LlamaContext: + """Intermediate Python wrapper for a llama.cpp llama_context. + NOTE: For stability it's recommended you use the Llama class instead.""" + + _llama_free = None + + def __init__( + self, + *, + model: _LlamaModel, + params: llama_cpp.llama_context_params, + verbose: bool = True, + ): + self.model = model + self.params = params + self.verbose = verbose + + self._llama_free = llama_cpp._lib.llama_free # type: ignore + self.ctx = None + + assert self.model.model is not None + + self.ctx = llama_cpp.llama_new_context_with_model( + self.model.model, self.params + ) + + if self.ctx is None: + raise ValueError("Failed to create llama_context") + + def __del__(self): + if self.ctx is not None and self._llama_free is not None: + self._llama_free(self.ctx) + self.ctx = None + + def n_ctx(self) -> int: + assert self.ctx is not None + return llama_cpp.llama_n_ctx(self.ctx) + + def pooling_type(self) -> int: + assert self.ctx is not None + return llama_cpp.llama_pooling_type(self.ctx) + + def kv_cache_clear(self): + assert self.ctx is not None + llama_cpp.llama_kv_cache_clear(self.ctx) + + def kv_cache_seq_rm(self, seq_id: int, p0: int, p1: int): + assert self.ctx is not None + llama_cpp.llama_kv_cache_seq_rm(self.ctx, seq_id, p0, p1) + + def kv_cache_seq_cp(self, seq_id_src: int, seq_id_dst: int, p0: int, p1: int): + assert self.ctx is not None + llama_cpp.llama_kv_cache_seq_cp(self.ctx, seq_id_src, seq_id_dst, p0, p1) + + def kv_cache_seq_keep(self, seq_id: int): + assert self.ctx is not None + llama_cpp.llama_kv_cache_seq_keep(self.ctx, seq_id) + + def kv_cache_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int): + assert self.ctx is not None + llama_cpp.llama_kv_cache_seq_add(self.ctx, seq_id, p0, p1, shift) + + def get_state_size(self) -> int: + assert self.ctx is not None + return llama_cpp.llama_get_state_size(self.ctx) + + # TODO: copy_state_data + + # TODO: set_state_data + + # TODO: llama_load_session_file + + # TODO: llama_save_session_file + + def decode(self, batch: "_LlamaBatch"): + assert self.ctx is not None + assert batch.batch is not None + return_code = llama_cpp.llama_decode( + self.ctx, + batch.batch, + ) + if return_code != 0: + raise RuntimeError(f"llama_decode returned {return_code}") + + def set_n_threads(self, n_threads: int, n_threads_batch: int): + assert self.ctx is not None + llama_cpp.llama_set_n_threads(self.ctx, n_threads, n_threads_batch) + + def get_logits(self): + assert self.ctx is not None + return llama_cpp.llama_get_logits(self.ctx) + + def get_logits_ith(self, i: int): + assert self.ctx is not None + return llama_cpp.llama_get_logits_ith(self.ctx, i) + + def get_embeddings(self): + assert self.ctx is not None + return llama_cpp.llama_get_embeddings(self.ctx) + + # Sampling functions + + def set_rng_seed(self, seed: int): + assert self.ctx is not None + llama_cpp.llama_set_rng_seed(self.ctx, seed) + + def sample_repetition_penalties( + self, + candidates: "_LlamaTokenDataArray", + last_tokens_data: "llama_cpp.Array[llama_cpp.llama_token]", + penalty_last_n: int, + penalty_repeat: float, + penalty_freq: float, + penalty_present: float, + ): + assert self.ctx is not None + llama_cpp.llama_sample_repetition_penalties( + self.ctx, + llama_cpp.byref(candidates.candidates), + last_tokens_data, + penalty_last_n, + penalty_repeat, + penalty_freq, + penalty_present, + ) + + def sample_softmax(self, candidates: "_LlamaTokenDataArray"): + assert self.ctx is not None + llama_cpp.llama_sample_softmax( + self.ctx, + llama_cpp.byref(candidates.candidates), + ) + + def sample_top_k(self, candidates: "_LlamaTokenDataArray", k: int, min_keep: int): + assert self.ctx is not None + llama_cpp.llama_sample_top_k( + self.ctx, llama_cpp.byref(candidates.candidates), k, min_keep + ) + + def sample_top_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int): + assert self.ctx is not None + llama_cpp.llama_sample_top_p( + self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep + ) + + def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int): + assert self.ctx is not None + llama_cpp.llama_sample_min_p( + self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep + ) + + def sample_tail_free( + self, candidates: "_LlamaTokenDataArray", z: float, min_keep: int + ): + assert self.ctx is not None + llama_cpp.llama_sample_tail_free( + self.ctx, llama_cpp.byref(candidates.candidates), z, min_keep + ) + + def sample_typical( + self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int + ): + assert self.ctx is not None + llama_cpp.llama_sample_typical( + self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep + ) + + def sample_temp(self, candidates: "_LlamaTokenDataArray", temp: float): + assert self.ctx is not None + llama_cpp.llama_sample_temp( + self.ctx, llama_cpp.byref(candidates.candidates), temp + ) + + def sample_grammar(self, candidates: "_LlamaTokenDataArray", grammar: LlamaGrammar): + assert self.ctx is not None + assert grammar.grammar is not None + llama_cpp.llama_sample_grammar( + self.ctx, + llama_cpp.byref(candidates.candidates), + grammar.grammar, + ) + + def sample_token_mirostat( + self, + candidates: "_LlamaTokenDataArray", + tau: float, + eta: float, + m: int, + mu: llama_cpp.CtypesPointerOrRef[ctypes.c_float], + ) -> int: + assert self.ctx is not None + return llama_cpp.llama_sample_token_mirostat( + self.ctx, + llama_cpp.byref(candidates.candidates), + tau, + eta, + m, + mu, + ) + + def sample_token_mirostat_v2( + self, candidates: "_LlamaTokenDataArray", tau: float, eta: float, mu: llama_cpp.CtypesPointerOrRef[ctypes.c_float] + ) -> int: + assert self.ctx is not None + return llama_cpp.llama_sample_token_mirostat_v2( + self.ctx, + llama_cpp.byref(candidates.candidates), + tau, + eta, + mu, + ) + + def sample_token_greedy(self, candidates: "_LlamaTokenDataArray") -> int: + assert self.ctx is not None + return llama_cpp.llama_sample_token_greedy( + self.ctx, + llama_cpp.byref(candidates.candidates), + ) + + def sample_token(self, candidates: "_LlamaTokenDataArray") -> int: + assert self.ctx is not None + return llama_cpp.llama_sample_token( + self.ctx, + llama_cpp.byref(candidates.candidates), + ) + + # Grammar + def grammar_accept_token(self, grammar: LlamaGrammar, token: int): + assert self.ctx is not None + assert grammar.grammar is not None + llama_cpp.llama_grammar_accept_token(self.ctx, grammar.grammar, token) + + def reset_timings(self): + assert self.ctx is not None + llama_cpp.llama_reset_timings(self.ctx) + + def print_timings(self): + assert self.ctx is not None + llama_cpp.llama_print_timings(self.ctx) + + # Utility functions + @staticmethod + def default_params(): + """Get the default llama_context_params.""" + return llama_cpp.llama_context_default_params() + + +class _LlamaBatch: + _llama_batch_free = None + + def __init__( + self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True + ): + self._n_tokens = n_tokens + self.embd = embd + self.n_seq_max = n_seq_max + self.verbose = verbose + + self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore + + self.batch = None + self.batch = llama_cpp.llama_batch_init( + self._n_tokens, self.embd, self.n_seq_max + ) + + def __del__(self): + if self.batch is not None and self._llama_batch_free is not None: + self._llama_batch_free(self.batch) + self.batch = None + + def n_tokens(self) -> int: + assert self.batch is not None + return self.batch.n_tokens + + def reset(self): + assert self.batch is not None + self.batch.n_tokens = 0 + + def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool): + assert self.batch is not None + n_tokens = len(batch) + self.batch.n_tokens = n_tokens + for i in range(n_tokens): + self.batch.token[i] = batch[i] + self.batch.pos[i] = n_past + i + self.batch.seq_id[i][0] = 0 + self.batch.n_seq_id[i] = 1 + self.batch.logits[i] = logits_all + self.batch.logits[n_tokens - 1] = True + + def add_sequence(self, batch: Sequence[int], seq_id: int, logits_all: bool): + assert self.batch is not None + n_tokens = len(batch) + n_tokens0 = self.batch.n_tokens + self.batch.n_tokens += n_tokens + for i in range(n_tokens): + j = n_tokens0 + i + self.batch.token[j] = batch[i] + self.batch.pos[j] = i + self.batch.seq_id[j][0] = seq_id + self.batch.n_seq_id[j] = 1 + self.batch.logits[j] = logits_all + self.batch.logits[n_tokens - 1] = True + + +class _LlamaTokenDataArray: + def __init__(self, *, n_vocab: int): + self.n_vocab = n_vocab + self.candidates_data = np.array( + [], + dtype=np.dtype( + [("id", np.intc), ("logit", np.single), ("p", np.single)], align=True + ), + ) + self.candidates_data.resize(3, self.n_vocab, refcheck=False) + self.candidates = llama_cpp.llama_token_data_array( + data=self.candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p), + size=self.n_vocab, + sorted=False, + ) + self.default_candidates_data_id = np.arange(self.n_vocab, dtype=np.intc) # type: ignore + self.default_candidates_data_p = np.zeros(self.n_vocab, dtype=np.single) + + def copy_logits(self, logits: npt.NDArray[np.single]): + self.candidates_data["id"][:] = self.default_candidates_data_id + self.candidates_data["logit"][:] = logits + self.candidates_data["p"][:] = self.default_candidates_data_p + self.candidates.data = self.candidates_data.ctypes.data_as( + llama_cpp.llama_token_data_p + ) + self.candidates.sorted = ctypes.c_bool(False) + self.candidates.size = ctypes.c_size_t(self.n_vocab) + + +# Python wrappers over common/common +def _tokenize(model: _LlamaModel, text: str, add_bos: bool, special: bool) -> list[int]: + assert model.model is not None + n_tokens = len(text) + 1 if add_bos else len(text) + result = (llama_cpp.llama_token * n_tokens)() + n_tokens = llama_cpp.llama_tokenize( + model.model, + text.encode("utf-8"), + len(text), + result, + n_tokens, + add_bos, + special, + ) + if n_tokens < 0: + result = (llama_cpp.llama_token * -n_tokens)() + check = llama_cpp.llama_tokenize( + model.model, + text.encode("utf-8"), + len(text), + result, + len(result), + add_bos, + special, + ) + if check != -n_tokens: + raise RuntimeError(f'Failed to tokenize: text="{text}" n_tokens={n_tokens}') + else: + result = result[:n_tokens] + return list(result) + + +def _token_to_piece(model: _LlamaModel, token: int, special: bool = False) -> str: + assert model.model is not None + result = (ctypes.c_char * 8)(0) + n_tokens = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special) + if n_tokens < 0: + result = (ctypes.c_char * -n_tokens)(0) + check = llama_cpp.llama_token_to_piece(model.model, token, result, len(result), special) + if check != -n_tokens: + raise RuntimeError(f"Failed to get piece: token={token}") + else: + result = result[:n_tokens] + return bytes(result).decode("utf-8") + + +def _detokenize_spm(model: _LlamaModel, tokens: List[int]) -> str: + bos_id = model.token_bos() + result = "" + for i, token in enumerate(tokens): + piece = _token_to_piece(model, token) + if ( + (tokens[0] == bos_id and i == 1) or (tokens[0] != bos_id and i == 0) + ) and piece[0] == " ": + piece = piece[1:] + result += piece + return result + + +def _detokenize_bpe(model: _LlamaModel, tokens: List[int]) -> str: + result = "" + for token in tokens: + piece = _token_to_piece(model, token) + result += piece + return result + + +def _should_add_bos(model: _LlamaModel) -> bool: + assert model.model is not None + add_bos = llama_cpp.llama_add_bos_token(model.model) + if add_bos != -1: + return add_bos != 0 + else: + return llama_cpp.llama_vocab_type(model.model) == llama_cpp.LLAMA_VOCAB_TYPE_SPM + + +# Embedding functions + + +def _normalize_embedding(embedding): + norm = float(np.linalg.norm(embedding)) + if norm == 0.0: + return embedding + return [v / norm for v in embedding] + + +# Python wrappers over common/sampling structs + + +@dataclass +class _LlamaSamplingParams: + n_prev: int = 64 + n_probs: int = 0 + top_k: int = 40 + top_p: float = 0.95 + min_p: float = 0.05 + tfs_z: float = 1.00 + typical_p: float = 1.00 + temp: float = 0.80 + penalty_last_n: int = 64 + penalty_repeat: float = 1.10 + penalty_freq: float = 0.00 + penalty_present: float = 0.00 + mirostat: int = 0 + mirostat_tau: float = 5.00 + mirostat_eta: float = 0.10 + penalize_nl: bool = True + + grammar: str = "" + + cfg_negative_prompt: str = "" + cfg_scale: float = 1.00 + + logit_bias: dict[int, float] = field(default_factory=dict) + + +@dataclass +class _LlamaSamplingContext: + params: _LlamaSamplingParams = field(default_factory=_LlamaSamplingParams) + mirostat_mu: ctypes.c_float = field(default_factory=ctypes.c_float) + grammar: Optional[LlamaGrammar] = None + # NOTE: Missing parsed_grammar + prev: list[int] = field(default_factory=list) + cur: list[llama_cpp.llama_token_data] = field(default_factory=list) + + def reset(self): + self.prev = [] + self.cur = [] + if self.grammar is not None: + self.grammar.reset() + + def cp(self): + return _LlamaSamplingContext( + params=self.params, + mirostat_mu=self.mirostat_mu, + grammar=self.grammar, + prev=self.prev.copy(), + cur=self.cur.copy(), + ) + + def last(self) -> Optional[int]: + if len(self.prev) > 0: + return self.prev[-1] + else: + return None + + def prev_str(self, ctx_main: _LlamaContext, n: int) -> str: + return ctx_main.model.detokenize(self.prev[-n:]).decode("utf-8") + + def sample( + self, ctx_main: _LlamaContext, idx: int = 0, logits_array: Optional[npt.NDArray[np.single]] = None + ): + n_vocab = ctx_main.model.n_vocab() + id: int = 0 + + if logits_array is None: + logits = ctx_main.get_logits_ith(idx) + logits_array = np.array( + ctypes.cast(logits, ctypes.POINTER(ctypes.c_float * n_vocab)).contents, + dtype=np.single, + ) + + # apply logit_bias + for token, logit_bias in self.params.logit_bias.items(): + logits_array[token] += logit_bias + + token_data_array = _LlamaTokenDataArray( + n_vocab=n_vocab + ) # TODO: Only create this once + token_data_array.copy_logits(logits_array) + + # apply penalties + if len(self.prev) > 0: + nl_token = ctx_main.model.token_nl() + nl_logit = logits_array[nl_token] + last_tokens = self.prev[-self.params.penalty_last_n:] + last_tokens_size = min(len(last_tokens), self.params.penalty_last_n) + if last_tokens_size > 0: + last_tokens_p = (llama_cpp.llama_token * len(last_tokens))(*last_tokens) + ctx_main.sample_repetition_penalties( + token_data_array, + last_tokens_p, + last_tokens_size, + self.params.penalty_repeat, + self.params.penalty_freq, + self.params.penalty_present, + ) + if not self.params.penalize_nl: + token_data_array.candidates_data["logit"][nl_token] = nl_logit + + if self.grammar is not None: + ctx_main.sample_grammar(token_data_array, self.grammar) + + if self.params.temp < 0: + ctx_main.sample_softmax(token_data_array) + id = token_data_array.candidates_data["id"][0] + elif self.params.temp == 0: + id = ctx_main.sample_token_greedy(token_data_array) + else: + if self.params.mirostat == 1: + mirostat_m = 100 + ctx_main.sample_temp(token_data_array, self.params.temp) + id = ctx_main.sample_token_mirostat( + token_data_array, + self.params.mirostat_tau, + self.params.mirostat_eta, + mirostat_m, + ctypes.pointer(self.mirostat_mu), + ) + elif self.params.mirostat == 2: + ctx_main.sample_temp(token_data_array, self.params.temp) + id = ctx_main.sample_token_mirostat_v2( + token_data_array, + self.params.mirostat_tau, + self.params.mirostat_eta, + ctypes.pointer(self.mirostat_mu), + ) + else: + min_keep = max(1, self.params.n_probs) + ctx_main.sample_top_k( + token_data_array, self.params.top_k, min_keep=min_keep + ) + ctx_main.sample_tail_free( + token_data_array, self.params.tfs_z, min_keep=min_keep + ) + ctx_main.sample_typical( + token_data_array, self.params.typical_p, min_keep=min_keep + ) + ctx_main.sample_top_p( + token_data_array, self.params.top_p, min_keep=min_keep + ) + ctx_main.sample_min_p( + token_data_array, self.params.min_p, min_keep=min_keep + ) + ctx_main.sample_temp(token_data_array, self.params.temp) + id = ctx_main.sample_token(token_data_array) + return id + + def accept(self, ctx_main: _LlamaContext, id: int, apply_grammar: bool): + if apply_grammar and self.grammar is not None: + ctx_main.grammar_accept_token(self.grammar, id) + self.prev.append(id) diff --git a/llama-cpp-python/llama_cpp/_logger.py b/llama-cpp-python/llama_cpp/_logger.py new file mode 100644 index 0000000000000000000000000000000000000000..7638170a9718404493a4a63db634bf77cdadeaeb --- /dev/null +++ b/llama-cpp-python/llama_cpp/_logger.py @@ -0,0 +1,37 @@ +import sys +import ctypes +import logging + +import llama_cpp + +# enum ggml_log_level { +# GGML_LOG_LEVEL_ERROR = 2, +# GGML_LOG_LEVEL_WARN = 3, +# GGML_LOG_LEVEL_INFO = 4, +# GGML_LOG_LEVEL_DEBUG = 5 +# }; +GGML_LOG_LEVEL_TO_LOGGING_LEVEL = { + 2: logging.ERROR, + 3: logging.WARNING, + 4: logging.INFO, + 5: logging.DEBUG, +} + +logger = logging.getLogger("llama-cpp-python") + + +@llama_cpp.llama_log_callback +def llama_log_callback( + level: int, + text: bytes, + user_data: ctypes.c_void_p, +): + if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]: + print(text.decode("utf-8"), end="", flush=True, file=sys.stderr) + + +llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0)) + + +def set_verbose(verbose: bool): + logger.setLevel(logging.DEBUG if verbose else logging.ERROR) diff --git a/llama-cpp-python/llama_cpp/_utils.py b/llama-cpp-python/llama_cpp/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..781b265010a90d783dcd4f6e45a743ba8518fc66 --- /dev/null +++ b/llama-cpp-python/llama_cpp/_utils.py @@ -0,0 +1,77 @@ +import os +import sys + +from typing import Any, Dict + +# Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor +outnull_file = open(os.devnull, "w") +errnull_file = open(os.devnull, "w") + +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + +class suppress_stdout_stderr(object): + # NOTE: these must be "saved" here to avoid exceptions when using + # this context manager inside of a __del__ method + sys = sys + os = os + + def __init__(self, disable: bool = True): + self.disable = disable + + # Oddly enough this works better than the contextlib version + def __enter__(self): + if self.disable: + return self + + self.old_stdout_fileno_undup = STDOUT_FILENO + self.old_stderr_fileno_undup = STDERR_FILENO + + self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup) + self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup) + + self.old_stdout = self.sys.stdout + self.old_stderr = self.sys.stderr + + self.os.dup2(outnull_file.fileno(), self.old_stdout_fileno_undup) + self.os.dup2(errnull_file.fileno(), self.old_stderr_fileno_undup) + + self.sys.stdout = outnull_file + self.sys.stderr = errnull_file + return self + + def __exit__(self, *_): + if self.disable: + return + + # Check if sys.stdout and sys.stderr have fileno method + self.sys.stdout = self.old_stdout + self.sys.stderr = self.old_stderr + + self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup) + self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup) + + self.os.close(self.old_stdout_fileno) + self.os.close(self.old_stderr_fileno) + + +class MetaSingleton(type): + """ + Metaclass for implementing the Singleton pattern. + """ + + _instances: Dict[type, Any] = {} + + def __call__(cls, *args: Any, **kwargs: Any) -> Any: + if cls not in cls._instances: + cls._instances[cls] = super(MetaSingleton, cls).__call__(*args, **kwargs) + return cls._instances[cls] + + +class Singleton(object, metaclass=MetaSingleton): + """ + Base class for implementing the Singleton pattern. + """ + + def __init__(self): + super(Singleton, self).__init__() diff --git a/llama-cpp-python/llama_cpp/llama.py b/llama-cpp-python/llama_cpp/llama.py new file mode 100644 index 0000000000000000000000000000000000000000..17576a69b8b8122d63efe814d89dd82825f0eb6e --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama.py @@ -0,0 +1,2045 @@ +from __future__ import annotations + +import os +import sys +import uuid +import time +import json +import ctypes +import fnmatch +import multiprocessing + +from typing import ( + List, + Optional, + Union, + Generator, + Sequence, + Iterator, + Deque, + Callable, + Dict, +) +from collections import deque +from pathlib import Path + + +from llama_cpp.llama_types import List + +from .llama_types import * +from .llama_grammar import LlamaGrammar +from .llama_cache import ( + BaseLlamaCache, + LlamaCache, # type: ignore + LlamaDiskCache, # type: ignore + LlamaRAMCache, # type: ignore +) +from .llama_tokenizer import BaseLlamaTokenizer, LlamaTokenizer +import llama_cpp.llama_cpp as llama_cpp +import llama_cpp.llama_chat_format as llama_chat_format + +from llama_cpp.llama_speculative import LlamaDraftModel + +import numpy as np +import numpy.typing as npt + +from ._internals import ( + _LlamaModel, # type: ignore + _LlamaContext, # type: ignore + _LlamaBatch, # type: ignore + _LlamaTokenDataArray, # type: ignore + _LlamaSamplingParams, # type: ignore + _LlamaSamplingContext, # type: ignore + _normalize_embedding, # type: ignore +) +from ._logger import set_verbose +from ._utils import suppress_stdout_stderr + + +class Llama: + """High-level Python wrapper for a llama.cpp model.""" + + __backend_initialized = False + + def __init__( + self, + model_path: str, + *, + # Model Params + n_gpu_layers: int = 0, + split_mode: int = llama_cpp.LLAMA_SPLIT_MODE_LAYER, + main_gpu: int = 0, + tensor_split: Optional[List[float]] = None, + vocab_only: bool = False, + use_mmap: bool = True, + use_mlock: bool = False, + kv_overrides: Optional[Dict[str, Union[bool, int, float, str]]] = None, + # Context Params + seed: int = llama_cpp.LLAMA_DEFAULT_SEED, + n_ctx: int = 512, + n_batch: int = 512, + n_threads: Optional[int] = None, + n_threads_batch: Optional[int] = None, + rope_scaling_type: Optional[int] = llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED, + pooling_type: int = llama_cpp.LLAMA_POOLING_TYPE_UNSPECIFIED, + rope_freq_base: float = 0.0, + rope_freq_scale: float = 0.0, + yarn_ext_factor: float = -1.0, + yarn_attn_factor: float = 1.0, + yarn_beta_fast: float = 32.0, + yarn_beta_slow: float = 1.0, + yarn_orig_ctx: int = 0, + logits_all: bool = False, + embedding: bool = False, + offload_kqv: bool = True, + flash_attn: bool = False, + # Sampling Params + last_n_tokens_size: int = 64, + # LoRA Params + lora_base: Optional[str] = None, + lora_scale: float = 1.0, + lora_path: Optional[str] = None, + # Backend Params + numa: Union[bool, int] = False, + # Chat Format Params + chat_format: Optional[str] = None, + chat_handler: Optional[llama_chat_format.LlamaChatCompletionHandler] = None, + # Speculative Decoding + draft_model: Optional[LlamaDraftModel] = None, + # Tokenizer Override + tokenizer: Optional[BaseLlamaTokenizer] = None, + # KV cache quantization + type_k: Optional[int] = None, + type_v: Optional[int] = None, + # Misc + verbose: bool = True, + # Extra Params + **kwargs, # type: ignore + ): + """Load a llama.cpp model from `model_path`. + + Examples: + Basic usage + + >>> import llama_cpp + >>> model = llama_cpp.Llama( + ... model_path="path/to/model", + ... ) + >>> print(model("The quick brown fox jumps ", stop=["."])["choices"][0]["text"]) + the lazy dog + + Loading a chat model + + >>> import llama_cpp + >>> model = llama_cpp.Llama( + ... model_path="path/to/model", + ... chat_format="llama-2", + ... ) + >>> print(model.create_chat_completion( + ... messages=[{ + ... "role": "user", + ... "content": "what is the meaning of life?" + ... }] + ... )) + + Args: + model_path: Path to the model. + n_gpu_layers: Number of layers to offload to GPU (-ngl). If -1, all layers are offloaded. + split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options. + main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_LAYER: ignored + tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split. + vocab_only: Only load the vocabulary no weights. + use_mmap: Use mmap if possible. + use_mlock: Force the system to keep the model in RAM. + kv_overrides: Key-value overrides for the model. + seed: RNG seed, -1 for random + n_ctx: Text context, 0 = from model + n_batch: Prompt processing maximum batch size + n_threads: Number of threads to use for generation + n_threads_batch: Number of threads to use for batch processing + rope_scaling_type: RoPE scaling type, from `enum llama_rope_scaling_type`. ref: https://github.com/ggerganov/llama.cpp/pull/2054 + pooling_type: Pooling type, from `enum llama_pooling_type`. + rope_freq_base: RoPE base frequency, 0 = from model + rope_freq_scale: RoPE frequency scaling factor, 0 = from model + yarn_ext_factor: YaRN extrapolation mix factor, negative = from model + yarn_attn_factor: YaRN magnitude scaling factor + yarn_beta_fast: YaRN low correction dim + yarn_beta_slow: YaRN high correction dim + yarn_orig_ctx: YaRN original context size + logits_all: Return logits for all tokens, not just the last token. Must be True for completion to return logprobs. + embedding: Embedding mode only. + offload_kqv: Offload K, Q, V to GPU. + flash_attn: Use flash attention. + last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque. + lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model. + lora_path: Path to a LoRA file to apply to the model. + numa: numa policy + chat_format: String specifying the chat format to use when calling create_chat_completion. + chat_handler: Optional chat handler to use when calling create_chat_completion. + draft_model: Optional draft model to use for speculative decoding. + tokenizer: Optional tokenizer to override the default tokenizer from llama.cpp. + verbose: Print verbose output to stderr. + type_k: KV cache data type for K (default: f16) + type_v: KV cache data type for V (default: f16) + + Raises: + ValueError: If the model path does not exist. + + Returns: + A Llama instance. + """ + self.verbose = verbose + + set_verbose(verbose) + + if not Llama.__backend_initialized: + with suppress_stdout_stderr(disable=verbose): + llama_cpp.llama_backend_init() + Llama.__backend_initialized = True + + if isinstance(numa, bool): + self.numa = ( + llama_cpp.GGML_NUMA_STRATEGY_DISTRIBUTE + if numa + else llama_cpp.GGML_NUMA_STRATEGY_DISABLED + ) + else: + self.numa = numa + + if self.numa != llama_cpp.GGML_NUMA_STRATEGY_DISABLED: + with suppress_stdout_stderr(disable=verbose): + llama_cpp.llama_numa_init(self.numa) + + self.model_path = model_path + + # Model Params + self.model_params = llama_cpp.llama_model_default_params() + self.model_params.n_gpu_layers = ( + 0x7FFFFFFF if n_gpu_layers == -1 else n_gpu_layers + ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers + self.model_params.split_mode = split_mode + self.model_params.main_gpu = main_gpu + self.tensor_split = tensor_split + self._c_tensor_split = None + if self.tensor_split is not None: + if len(self.tensor_split) > llama_cpp.LLAMA_MAX_DEVICES: + raise ValueError( + f"Attempt to split tensors that exceed maximum supported devices. Current LLAMA_MAX_DEVICES={llama_cpp.LLAMA_MAX_DEVICES}" + ) + # Type conversion and expand the list to the length of LLAMA_MAX_DEVICES + FloatArray = ctypes.c_float * llama_cpp.LLAMA_MAX_DEVICES + self._c_tensor_split = FloatArray( + *tensor_split # type: ignore + ) # keep a reference to the array so it is not gc'd + self.model_params.tensor_split = self._c_tensor_split + self.model_params.vocab_only = vocab_only + self.model_params.use_mmap = use_mmap if lora_path is None else False + self.model_params.use_mlock = use_mlock + + # kv_overrides is the original python dict + self.kv_overrides = kv_overrides + if kv_overrides is not None: + # _kv_overrides_array is a ctypes.Array of llama_model_kv_override Structs + kvo_array_len = len(kv_overrides) + 1 # for sentinel element + self._kv_overrides_array = ( + llama_cpp.llama_model_kv_override * kvo_array_len + )() + + for i, (k, v) in enumerate(kv_overrides.items()): + self._kv_overrides_array[i].key = k.encode("utf-8") + if isinstance(v, bool): + self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_BOOL + self._kv_overrides_array[i].value.bool_value = v + elif isinstance(v, int): + self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_INT + self._kv_overrides_array[i].value.int_value = v + elif isinstance(v, float): + self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_FLOAT + self._kv_overrides_array[i].value.float_value = v + elif isinstance(v, str): # type: ignore + v_bytes = v.encode("utf-8") + if len(v_bytes) > 128: # TODO: Make this a constant + raise ValueError(f"Value for {k} is too long: {v}") + v_bytes = v_bytes.ljust(128, b"\0") + self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_STR + # copy min(v_bytes, 128) to str_value + ctypes.memmove( + self._kv_overrides_array[i].value.str_value, + v_bytes, + min(len(v_bytes), 128), + ) + else: + raise ValueError(f"Unknown value type for {k}: {v}") + + self._kv_overrides_array[-1].key = ( + b"\0" # ensure sentinel element is zeroed + ) + self.model_params.kv_overrides = self._kv_overrides_array + + self.n_batch = min(n_ctx, n_batch) # ??? + self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1) + self.n_threads_batch = n_threads_batch or multiprocessing.cpu_count() + + # Context Params + self.context_params = llama_cpp.llama_context_default_params() + self.context_params.seed = seed + self.context_params.n_ctx = n_ctx + self.context_params.n_batch = self.n_batch + self.context_params.n_threads = self.n_threads + self.context_params.n_threads_batch = self.n_threads_batch + self.context_params.rope_scaling_type = ( + rope_scaling_type + if rope_scaling_type is not None + else llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED + ) + self.context_params.pooling_type = pooling_type + self.context_params.rope_freq_base = ( + rope_freq_base if rope_freq_base != 0.0 else 0 + ) + self.context_params.rope_freq_scale = ( + rope_freq_scale if rope_freq_scale != 0.0 else 0 + ) + self.context_params.yarn_ext_factor = ( + yarn_ext_factor if yarn_ext_factor != 0.0 else 0 + ) + self.context_params.yarn_attn_factor = ( + yarn_attn_factor if yarn_attn_factor != 0.0 else 0 + ) + self.context_params.yarn_beta_fast = ( + yarn_beta_fast if yarn_beta_fast != 0.0 else 0 + ) + self.context_params.yarn_beta_slow = ( + yarn_beta_slow if yarn_beta_slow != 0.0 else 0 + ) + self.context_params.yarn_orig_ctx = yarn_orig_ctx if yarn_orig_ctx != 0 else 0 + self.context_params.logits_all = ( + logits_all if draft_model is None else True + ) # Must be set to True for speculative decoding + self.context_params.embeddings = embedding # TODO: Rename to embeddings + self.context_params.offload_kqv = offload_kqv + self.context_params.flash_attn = flash_attn + # KV cache quantization + if type_k is not None: + self.context_params.type_k = type_k + if type_v is not None: + self.context_params.type_v = type_v + # Sampling Params + self.last_n_tokens_size = last_n_tokens_size + + self.cache: Optional[BaseLlamaCache] = None + + self.lora_base = lora_base + self.lora_scale = lora_scale + self.lora_path = lora_path + + if not os.path.exists(model_path): + raise ValueError(f"Model path does not exist: {model_path}") + + self._model = _LlamaModel( + path_model=self.model_path, params=self.model_params, verbose=self.verbose + ) + + # Override tokenizer + self.tokenizer_ = tokenizer or LlamaTokenizer(self) + + # Set the default value for the context and correct the batch + if n_ctx == 0: + n_ctx = self._model.n_ctx_train() + self.n_batch = min(n_ctx, n_batch) + self.context_params.n_ctx = self._model.n_ctx_train() + self.context_params.n_batch = self.n_batch + + self._ctx = _LlamaContext( + model=self._model, + params=self.context_params, + verbose=self.verbose, + ) + + self._batch = _LlamaBatch( + n_tokens=self.n_batch, + embd=0, + n_seq_max=self.context_params.n_ctx, + verbose=self.verbose, + ) + + if self.lora_path: + if self._model.apply_lora_from_file( + self.lora_path, + self.lora_scale, + self.lora_base, + self.n_threads, + ): + raise RuntimeError( + f"Failed to apply LoRA from lora path: {self.lora_path} to base path: {self.lora_base}" + ) + + if self.verbose: + print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr) + + self.chat_format = chat_format + self.chat_handler = chat_handler + + self.draft_model = draft_model + + self._n_vocab = self.n_vocab() + self._n_ctx = self.n_ctx() + + self._token_nl = self.token_nl() + self._token_eos = self.token_eos() + + self._candidates = _LlamaTokenDataArray(n_vocab=self._n_vocab) + + self.n_tokens = 0 + self.input_ids: npt.NDArray[np.intc] = np.ndarray((n_ctx,), dtype=np.intc) + self.scores: npt.NDArray[np.single] = np.ndarray( + (n_ctx, self._n_vocab), dtype=np.single + ) + + self._mirostat_mu = ctypes.c_float( + 2.0 * 5.0 + ) # TODO: Move this to sampling context + + try: + self.metadata = self._model.metadata() + except Exception as e: + self.metadata = {} + if self.verbose: + print(f"Failed to load metadata: {e}", file=sys.stderr) + + if self.verbose: + print(f"Model metadata: {self.metadata}", file=sys.stderr) + + if ( + self.chat_format is None + and self.chat_handler is None + and "tokenizer.chat_template" in self.metadata + ): + chat_format = llama_chat_format.guess_chat_format_from_gguf_metadata( + self.metadata + ) + + if chat_format is not None: + self.chat_format = chat_format + if self.verbose: + print(f"Guessed chat format: {chat_format}", file=sys.stderr) + else: + template = self.metadata["tokenizer.chat_template"] + try: + eos_token_id = int(self.metadata["tokenizer.ggml.eos_token_id"]) + except: + eos_token_id = self.token_eos() + try: + bos_token_id = int(self.metadata["tokenizer.ggml.bos_token_id"]) + except: + bos_token_id = self.token_bos() + + eos_token = self._model.token_get_text(eos_token_id) + bos_token = self._model.token_get_text(bos_token_id) + + if self.verbose: + print(f"Using gguf chat template: {template}", file=sys.stderr) + print(f"Using chat eos_token: {eos_token}", file=sys.stderr) + print(f"Using chat bos_token: {bos_token}", file=sys.stderr) + + self.chat_handler = llama_chat_format.Jinja2ChatFormatter( + template=template, + eos_token=eos_token, + bos_token=bos_token, + stop_token_ids=[eos_token_id], + ).to_chat_handler() + + if self.chat_format is None and self.chat_handler is None: + self.chat_format = "llama-2" + if self.verbose: + print(f"Using fallback chat format: {chat_format}", file=sys.stderr) + + @property + def ctx(self) -> llama_cpp.llama_context_p: + assert self._ctx.ctx is not None + return self._ctx.ctx + + @property + def model(self) -> llama_cpp.llama_model_p: + assert self._model.model is not None + return self._model.model + + @property + def _input_ids(self) -> npt.NDArray[np.intc]: + return self.input_ids[: self.n_tokens] + + @property + def _scores(self) -> npt.NDArray[np.single]: + return self.scores[: self.n_tokens, :] + + @property + def eval_tokens(self) -> Deque[int]: + return deque(self.input_ids[: self.n_tokens].tolist(), maxlen=self._n_ctx) + + @property + def eval_logits(self) -> Deque[List[float]]: + return deque( + self.scores[: self.n_tokens, :].tolist(), + maxlen=self._n_ctx if self.context_params.logits_all else 1, + ) + + def tokenize( + self, text: bytes, add_bos: bool = True, special: bool = False + ) -> List[int]: + """Tokenize a string. + + Args: + text: The utf-8 encoded string to tokenize. + + Raises: + RuntimeError: If the tokenization failed. + + Returns: + A list of tokens. + """ + return self.tokenizer_.tokenize(text, add_bos, special) + + def detokenize( + self, tokens: List[int], prev_tokens: Optional[List[int]] = None + ) -> bytes: + """Detokenize a list of tokens. + + Args: + tokens: The list of tokens to detokenize. + prev_tokens: The list of previous tokens. Offset mapping will be performed if provided + + Returns: + The detokenized string. + """ + return self.tokenizer_.detokenize(tokens, prev_tokens=prev_tokens) + + def set_cache(self, cache: Optional[BaseLlamaCache]): + """Set the cache. + + Args: + cache: The cache to set. + """ + self.cache = cache + + def set_seed(self, seed: int): + """Set the random seed. + + Args: + seed: The random seed. + """ + assert self._ctx.ctx is not None + llama_cpp.llama_set_rng_seed(self._ctx.ctx, seed) + + def reset(self): + """Reset the model state.""" + self.n_tokens = 0 + + def eval(self, tokens: Sequence[int]): + """Evaluate a list of tokens. + + Args: + tokens: The list of tokens to evaluate. + """ + assert self._ctx.ctx is not None + assert self._batch.batch is not None + self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) + for i in range(0, len(tokens), self.n_batch): + batch = tokens[i : min(len(tokens), i + self.n_batch)] + n_past = self.n_tokens + n_tokens = len(batch) + self._batch.set_batch( + batch=batch, n_past=n_past, logits_all=self.context_params.logits_all + ) + self._ctx.decode(self._batch) + # Save tokens + self.input_ids[n_past : n_past + n_tokens] = batch + # Save logits + if self.context_params.logits_all: + rows = n_tokens + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits + else: + rows = 1 + cols = self._n_vocab + logits = self._ctx.get_logits()[: rows * cols] + self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits + # Update n_tokens + self.n_tokens += n_tokens + + def sample( + self, + top_k: int = 40, + top_p: float = 0.95, + min_p: float = 0.05, + typical_p: float = 1.0, + temp: float = 0.80, + repeat_penalty: float = 1.1, + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_eta: float = 0.1, + mirostat_tau: float = 5.0, + penalize_nl: bool = True, + logits_processor: Optional[LogitsProcessorList] = None, + grammar: Optional[LlamaGrammar] = None, + idx: Optional[int] = None, + ): + """Sample a token from the model. + + Args: + top_k: The top-k sampling parameter. + top_p: The top-p sampling parameter. + temp: The temperature parameter. + repeat_penalty: The repeat penalty parameter. + + Returns: + The sampled token. + """ + assert self._ctx is not None + assert self.n_tokens > 0 + + if idx is None: + logits: npt.NDArray[np.single] = self._scores[-1, :] + else: + logits = self._scores[idx, :] + + if logits_processor is not None: + logits[:] = ( + logits_processor(self._input_ids, logits) + if idx is None + else logits_processor(self._input_ids[: idx + 1], logits) + ) + + sampling_params = _LlamaSamplingParams( + top_k=top_k, + top_p=top_p, + min_p=min_p, + tfs_z=tfs_z, + typical_p=typical_p, + temp=temp, + penalty_last_n=self.last_n_tokens_size, + penalty_repeat=repeat_penalty, + penalty_freq=frequency_penalty, + penalty_present=presence_penalty, + mirostat=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + penalize_nl=penalize_nl, + ) + sampling_context = _LlamaSamplingContext( + params=sampling_params, + grammar=grammar, + ) + sampling_context.prev = list(self.eval_tokens) + id = sampling_context.sample(ctx_main=self._ctx, logits_array=logits) + sampling_context.accept( + ctx_main=self._ctx, + id=id, + apply_grammar=grammar is not None, + ) + return id + + def generate( + self, + tokens: Sequence[int], + top_k: int = 40, + top_p: float = 0.95, + min_p: float = 0.05, + typical_p: float = 1.0, + temp: float = 0.80, + repeat_penalty: float = 1.1, + reset: bool = True, + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + penalize_nl: bool = True, + logits_processor: Optional[LogitsProcessorList] = None, + stopping_criteria: Optional[StoppingCriteriaList] = None, + grammar: Optional[LlamaGrammar] = None, + ) -> Generator[int, Optional[Sequence[int]], None]: + """Create a generator of tokens from a prompt. + + Examples: + >>> llama = Llama("models/ggml-7b.bin") + >>> tokens = llama.tokenize(b"Hello, world!") + >>> for token in llama.generate(tokens, top_k=40, top_p=0.95, temp=1.0, repeat_penalty=1.1): + ... print(llama.detokenize([token])) + + Args: + tokens: The prompt tokens. + top_k: The top-k sampling parameter. + top_p: The top-p sampling parameter. + temp: The temperature parameter. + repeat_penalty: The repeat penalty parameter. + reset: Whether to reset the model state. + + Yields: + The generated tokens. + """ + # Reset mirostat sampling + self._mirostat_mu = ctypes.c_float(2.0 * mirostat_tau) + + # Check for kv cache prefix match + if reset and self.n_tokens > 0: + longest_prefix = 0 + for a, b in zip(self._input_ids, tokens[:-1]): + if a == b: + longest_prefix += 1 + else: + break + if longest_prefix > 0: + if self.verbose: + print("Llama.generate: prefix-match hit", file=sys.stderr) + reset = False + tokens = tokens[longest_prefix:] + self.n_tokens = longest_prefix + + # Reset the model state + if reset: + self.reset() + + # Reset the grammar + if grammar is not None: + grammar.reset() + + sample_idx = self.n_tokens + len(tokens) - 1 + tokens = list(tokens) + + # Eval and sample + while True: + self.eval(tokens) + while sample_idx < self.n_tokens: + token = self.sample( + top_k=top_k, + top_p=top_p, + min_p=min_p, + typical_p=typical_p, + temp=temp, + repeat_penalty=repeat_penalty, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + logits_processor=logits_processor, + grammar=grammar, + penalize_nl=penalize_nl, + idx=sample_idx, + ) + + sample_idx += 1 + if stopping_criteria is not None and stopping_criteria( + self._input_ids, self._scores[-1, :] + ): + return + tokens_or_none = yield token + tokens.clear() + tokens.append(token) + if tokens_or_none is not None: + tokens.extend(tokens_or_none) + + if sample_idx < self.n_tokens and token != self._input_ids[sample_idx]: + self.n_tokens = sample_idx + self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) + break + + if self.draft_model is not None: + self.input_ids[self.n_tokens : self.n_tokens + len(tokens)] = tokens + draft_tokens = self.draft_model( + self.input_ids[: self.n_tokens + len(tokens)] + ) + tokens.extend( + draft_tokens.astype(int)[ + : self._n_ctx - self.n_tokens - len(tokens) + ] + ) + + def create_embedding( + self, input: Union[str, List[str]], model: Optional[str] = None + ) -> CreateEmbeddingResponse: + """Embed a string. + + Args: + input: The utf-8 encoded string to embed. + + Returns: + An embedding object. + """ + assert self._model.model is not None + model_name: str = model if model is not None else self.model_path + + input = input if isinstance(input, list) else [input] + + # get numeric embeddings + embeds: Union[List[List[float]], List[List[List[float]]]] + total_tokens: int + embeds, total_tokens = self.embed(input, return_count=True) # type: ignore + + # convert to CreateEmbeddingResponse + data: List[Embedding] = [ + { + "object": "embedding", + "embedding": emb, + "index": idx, + } + for idx, emb in enumerate(embeds) + ] + + return { + "object": "list", + "data": data, + "model": model_name, + "usage": { + "prompt_tokens": total_tokens, + "total_tokens": total_tokens, + }, + } + + def embed( + self, + input: Union[str, List[str]], + normalize: bool = False, + truncate: bool = True, + return_count: bool = False, + ): + """Embed a string. + + Args: + input: The utf-8 encoded string to embed. + + Returns: + A list of embeddings + """ + assert self._ctx.ctx is not None + n_embd = self.n_embd() + n_batch = self.n_batch + + # get pooling information + pooling_type = self.pooling_type() + logits_all = pooling_type == llama_cpp.LLAMA_POOLING_TYPE_NONE + + if self.context_params.embeddings == False: + raise RuntimeError( + "Llama model must be created with embedding=True to call this method" + ) + + if self.verbose: + llama_cpp.llama_reset_timings(self._ctx.ctx) + + if isinstance(input, str): + inputs = [input] + else: + inputs = input + + # reset batch + self._batch.reset() + + # decode and fetch embeddings + data: Union[List[List[float]], List[List[List[float]]]] = [] + + def decode_batch(seq_sizes: List[int]): + assert self._ctx.ctx is not None + llama_cpp.llama_kv_cache_clear(self._ctx.ctx) + self._ctx.decode(self._batch) + self._batch.reset() + + # store embeddings + if pooling_type == llama_cpp.LLAMA_POOLING_TYPE_NONE: + pos: int = 0 + for i, size in enumerate(seq_sizes): + ptr = llama_cpp.llama_get_embeddings(self._ctx.ctx) + embedding: List[List[float]] = [ + ptr[pos + j * n_embd : pos + (j + 1) * n_embd] for j in range(size) + ] + if normalize: + embedding = [_normalize_embedding(e) for e in embedding] + data.append(embedding) + pos += size + else: + for i in range(len(seq_sizes)): + ptr = llama_cpp.llama_get_embeddings_seq(self._ctx.ctx, i) + embedding: List[float] = ptr[:n_embd] + if normalize: + embedding = _normalize_embedding(embedding) + data.append(embedding) + + # init state + total_tokens = 0 + s_batch = [] + t_batch = 0 + p_batch = 0 + + # accumulate batches and encode + for text in inputs: + tokens = self.tokenize(text.encode("utf-8")) + if truncate: + tokens = tokens[:n_batch] + + n_tokens = len(tokens) + total_tokens += n_tokens + + # check for overrun + if n_tokens > n_batch: + raise ValueError( + f"Requested tokens ({n_tokens}) exceed batch size of {n_batch}" + ) + + # time to eval batch + if t_batch + n_tokens > n_batch: + decode_batch(s_batch) + s_batch = [] + t_batch = 0 + p_batch = 0 + + # add to batch + self._batch.add_sequence(tokens, p_batch, logits_all) + + # update batch stats + s_batch.append(n_tokens) + t_batch += n_tokens + p_batch += 1 + + # hanlde last batch + decode_batch(s_batch) + + if self.verbose: + llama_cpp.llama_print_timings(self._ctx.ctx) + + output = data[0] if isinstance(input, str) else data + + llama_cpp.llama_kv_cache_clear(self._ctx.ctx) + self.reset() + + if return_count: + return output, total_tokens + else: + return output + + def _create_completion( + self, + prompt: Union[str, List[int]], + suffix: Optional[str] = None, + max_tokens: Optional[int] = 16, + temperature: float = 0.8, + top_p: float = 0.95, + min_p: float = 0.05, + typical_p: float = 1.0, + logprobs: Optional[int] = None, + echo: bool = False, + stop: Optional[Union[str, List[str]]] = [], + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + repeat_penalty: float = 1.1, + top_k: int = 40, + stream: bool = False, + seed: Optional[int] = None, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + stopping_criteria: Optional[StoppingCriteriaList] = None, + logits_processor: Optional[LogitsProcessorList] = None, + grammar: Optional[LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + ) -> Union[ + Iterator[CreateCompletionResponse], Iterator[CreateCompletionStreamResponse] + ]: + assert self._ctx is not None + assert suffix is None or suffix.__class__ is str + + completion_id: str = f"cmpl-{str(uuid.uuid4())}" + created: int = int(time.time()) + # If prompt is empty, initialize completion with BOS token to avoid + # detokenization including a space at the beginning of the completion + completion_tokens: List[int] = [] if len(prompt) > 0 else [self.token_bos()] + # Add blank space to start of prompt to match OG llama tokenizer + prompt_tokens: List[int] = ( + ( + self.tokenize(prompt.encode("utf-8"), special=True) + if prompt != "" + else [self.token_bos()] + ) + if isinstance(prompt, str) + else prompt + ) + text: bytes = b"" + returned_tokens: int = 0 + stop = ( + stop if isinstance(stop, list) else [stop] if isinstance(stop, str) else [] + ) + model_name: str = model if model is not None else self.model_path + + # NOTE: This likely doesn't work correctly for the first token in the prompt + # because of the extra space added to the start of the prompt_tokens + if logit_bias is not None: + logit_bias_map = {int(k): float(v) for k, v in logit_bias.items()} + + def logit_bias_processor( + input_ids: npt.NDArray[np.intc], + scores: npt.NDArray[np.single], + ) -> npt.NDArray[np.single]: + new_scores = np.copy( + scores + ) # Does it make sense to copy the whole array or can we just overwrite the original one? + for input_id, score in logit_bias_map.items(): + new_scores[input_id] = score + scores[input_id] + return new_scores + + _logit_bias_processor = LogitsProcessorList([logit_bias_processor]) + if logits_processor is None: + logits_processor = _logit_bias_processor + else: + logits_processor = logits_processor.extend(_logit_bias_processor) + + if self.verbose: + self._ctx.reset_timings() + + if len(prompt_tokens) >= self._n_ctx: + raise ValueError( + f"Requested tokens ({len(prompt_tokens)}) exceed context window of {llama_cpp.llama_n_ctx(self.ctx)}" + ) + + if max_tokens is None or max_tokens <= 0: + # Unlimited, depending on n_ctx. + max_tokens = self._n_ctx - len(prompt_tokens) + + # Truncate max_tokens if requested tokens would exceed the context window + max_tokens = ( + max_tokens + if max_tokens + len(prompt_tokens) < self._n_ctx + else (self._n_ctx - len(prompt_tokens)) + ) + + if stop != []: + stop_sequences = [s.encode("utf-8") for s in stop] + else: + stop_sequences = [] + + if logprobs is not None and self.context_params.logits_all is False: + raise ValueError( + "logprobs is not supported for models created with logits_all=False" + ) + + if self.cache: + try: + cache_item = self.cache[prompt_tokens] + cache_prefix_len = Llama.longest_token_prefix( + cache_item.input_ids.tolist(), prompt_tokens + ) + eval_prefix_len = Llama.longest_token_prefix( + self._input_ids.tolist(), prompt_tokens + ) + if cache_prefix_len > eval_prefix_len: + self.load_state(cache_item) + if self.verbose: + print("Llama._create_completion: cache hit", file=sys.stderr) + except KeyError: + if self.verbose: + print("Llama._create_completion: cache miss", file=sys.stderr) + + if seed is not None: + self._ctx.set_rng_seed(seed) + + finish_reason = "length" + multibyte_fix = 0 + for token in self.generate( + prompt_tokens, + top_k=top_k, + top_p=top_p, + min_p=min_p, + typical_p=typical_p, + temp=temperature, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + repeat_penalty=repeat_penalty, + stopping_criteria=stopping_criteria, + logits_processor=logits_processor, + grammar=grammar, + ): + assert self._model.model is not None + if llama_cpp.llama_token_is_eog(self._model.model, token): + text = self.detokenize(completion_tokens, prev_tokens=prompt_tokens) + finish_reason = "stop" + break + + completion_tokens.append(token) + + all_text = self.detokenize(completion_tokens, prev_tokens=prompt_tokens) + + # Contains multi-byte UTF8 + for k, char in enumerate(all_text[-3:]): + k = 3 - k + for num, pattern in [(2, 192), (3, 224), (4, 240)]: + # Bitwise AND check + if num > k and pattern & char == pattern: + multibyte_fix = num - k + + # Stop incomplete bytes from passing + if multibyte_fix > 0: + multibyte_fix -= 1 + continue + + any_stop = [s for s in stop_sequences if s in all_text] + if len(any_stop) > 0: + first_stop = any_stop[0] + text = all_text[: all_text.index(first_stop)] + finish_reason = "stop" + break + + if stream: + remaining_tokens = completion_tokens[returned_tokens:] + remaining_text = self.detokenize(remaining_tokens, prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]) + remaining_length = len(remaining_text) + + # We want to avoid yielding any characters from + # the generated text if they are part of a stop + # sequence. + first_stop_position = 0 + for s in stop_sequences: + for i in range(min(len(s), remaining_length), 0, -1): + if remaining_text.endswith(s[:i]): + if i > first_stop_position: + first_stop_position = i + break + + token_end_position = 0 + + if logprobs is not None: + # not sure how to handle this branch when dealing + # with CJK output, so keep it unchanged + for token in remaining_tokens: + if token == self.token_bos(): + continue + token_end_position += len(self.detokenize([token], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens])) + # Check if stop sequence is in the token + if token_end_position > ( + remaining_length - first_stop_position + ): + break + token_str = self.detokenize([token], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]).decode( + "utf-8", errors="ignore" + ) + text_offset = len(prompt) + len( + self.detokenize(completion_tokens[:returned_tokens], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]).decode( + "utf-8", errors="ignore" + ) + ) + token_offset = len(prompt_tokens) + returned_tokens + logits = self._scores[token_offset - 1, :] + current_logprobs = Llama.logits_to_logprobs(logits).tolist() + sorted_logprobs = list( + sorted( + zip(current_logprobs, range(len(current_logprobs))), + reverse=True, + ) + ) + top_logprob = { + self.detokenize([i]).decode( + "utf-8", errors="ignore" + ): logprob + for logprob, i in sorted_logprobs[:logprobs] + } + top_logprob.update({token_str: current_logprobs[int(token)]}) + logprobs_or_none = { + "tokens": [ + self.detokenize([token], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]).decode( + "utf-8", errors="ignore" + ) + ], + "text_offset": [text_offset], + "token_logprobs": [current_logprobs[int(token)]], + "top_logprobs": [top_logprob], + } + returned_tokens += 1 + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": self.detokenize([token], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]).decode( + "utf-8", errors="ignore" + ), + "index": 0, + "logprobs": logprobs_or_none, + "finish_reason": None, + } + ], + } + else: + while len(remaining_tokens) > 0: + decode_success = False + for i in range(1, len(remaining_tokens) + 1): + try: + bs = self.detokenize(remaining_tokens[:i], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]) + ts = bs.decode("utf-8") + decode_success = True + break + except UnicodeError: + pass + else: + break + if not decode_success: + # all remaining tokens cannot be decoded to a UTF-8 character + break + token_end_position += len(bs) + if token_end_position > ( + remaining_length - first_stop_position + ): + break + remaining_tokens = remaining_tokens[i:] + returned_tokens += i + + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": ts, + "index": 0, + "logprobs": None, + "finish_reason": None, + } + ], + } + + if len(completion_tokens) >= max_tokens: + text = self.detokenize(completion_tokens, prev_tokens=prompt_tokens) + finish_reason = "length" + break + + if stopping_criteria is not None and stopping_criteria( + self._input_ids, self._scores[-1, :] + ): + text = self.detokenize(completion_tokens, prev_tokens=prompt_tokens) + finish_reason = "stop" + + if self.verbose: + self._ctx.print_timings() + + if stream: + remaining_tokens = completion_tokens[returned_tokens:] + all_text = self.detokenize(remaining_tokens, prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]) + any_stop = [s for s in stop_sequences if s in all_text] + if len(any_stop) > 0: + end = min(all_text.index(stop) for stop in any_stop) + else: + end = len(all_text) + + token_end_position = 0 + for token in remaining_tokens: + token_end_position += len(self.detokenize([token], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens])) + + logprobs_or_none: Optional[CompletionLogprobs] = None + if logprobs is not None: + if token == self.token_bos(): + continue + token_str = self.detokenize([token]).decode( + "utf-8", errors="ignore" + ) + text_offset = len(prompt) + len( + self.detokenize(completion_tokens[:returned_tokens], prev_tokens=prompt_tokens + completion_tokens[:returned_tokens]) + ) + token_offset = len(prompt_tokens) + returned_tokens - 1 + logits = self._scores[token_offset, :] + current_logprobs = Llama.logits_to_logprobs(logits).tolist() + sorted_logprobs = list( + sorted( + zip(current_logprobs, range(len(current_logprobs))), + reverse=True, + ) + ) + top_logprob = { + self.detokenize([i]).decode("utf-8", errors="ignore"): logprob + for logprob, i in sorted_logprobs[:logprobs] + } + top_logprob.update({token_str: current_logprobs[int(token)]}) + logprobs_or_none = { + "tokens": [ + self.detokenize([token]).decode("utf-8", errors="ignore") + ], + "text_offset": [text_offset], + "token_logprobs": [current_logprobs[int(token)]], + "top_logprobs": [top_logprob], + } + + if token_end_position >= end: + last_text = self.detokenize([token]) + if token_end_position == end - 1: + break + returned_tokens += 1 + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": last_text[ + : len(last_text) - (token_end_position - end) + ].decode("utf-8", errors="ignore"), + "index": 0, + "logprobs": logprobs_or_none, + "finish_reason": None, + } + ], + } + break + returned_tokens += 1 + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": self.detokenize([token]).decode( + "utf-8", errors="ignore" + ), + "index": 0, + "logprobs": logprobs_or_none, + "finish_reason": None, + } + ], + } + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": "", + "index": 0, + "logprobs": None, + "finish_reason": finish_reason, + } + ], + } + if self.cache: + if self.verbose: + print("Llama._create_completion: cache save", file=sys.stderr) + self.cache[prompt_tokens + completion_tokens] = self.save_state() + print("Llama._create_completion: cache saved", file=sys.stderr) + return + + if self.cache: + if self.verbose: + print("Llama._create_completion: cache save", file=sys.stderr) + self.cache[prompt_tokens + completion_tokens] = self.save_state() + + text_str = text.decode("utf-8", errors="ignore") + + if echo: + text_str = prompt + text_str + + if suffix is not None: + text_str = text_str + suffix + + logprobs_or_none: Optional[CompletionLogprobs] = None + if logprobs is not None: + text_offset = 0 if echo else len(prompt) + token_offset = 0 if echo else len(prompt_tokens[1:]) + text_offsets: List[int] = [] + token_logprobs: List[Optional[float]] = [] + tokens: List[str] = [] + top_logprobs: List[Optional[Dict[str, float]]] = [] + + if echo: + # Remove leading BOS token + all_tokens = prompt_tokens[1:] + completion_tokens + else: + all_tokens = completion_tokens + + all_token_strs = [ + self.detokenize([token], prev_tokens=all_tokens[:i]).decode("utf-8", errors="ignore") + for i, token in enumerate(all_tokens) + ] + all_logprobs = Llama.logits_to_logprobs(self._scores)[token_offset:] + # TODO: may be able to change this loop to use np.take_along_dim + for idx, (token, token_str, logprobs_token) in enumerate( + zip(all_tokens, all_token_strs, all_logprobs) + ): + if token == self.token_bos(): + continue + text_offsets.append( + text_offset + + len( + self.detokenize(all_tokens[:idx]).decode( + "utf-8", errors="ignore" + ) + ) + ) + tokens.append(token_str) + sorted_logprobs = list( + sorted( + zip(logprobs_token, range(len(logprobs_token))), reverse=True + ) + ) + token_logprobs.append(logprobs_token[int(token)]) + top_logprob: Optional[Dict[str, float]] = { + self.detokenize([i], prev_tokens=all_tokens[:idx]).decode("utf-8", errors="ignore"): logprob + for logprob, i in sorted_logprobs[:logprobs] + } + top_logprob.update({token_str: logprobs_token[int(token)]}) + top_logprobs.append(top_logprob) + # Weird idosincracy of the OpenAI API where + # token_logprobs and top_logprobs are null for + # the first token. + if echo and len(all_tokens) > 0: + token_logprobs[0] = None + top_logprobs[0] = None + logprobs_or_none = { + "tokens": tokens, + "text_offset": text_offsets, + "token_logprobs": token_logprobs, + "top_logprobs": top_logprobs, + } + + yield { + "id": completion_id, + "object": "text_completion", + "created": created, + "model": model_name, + "choices": [ + { + "text": text_str, + "index": 0, + "logprobs": logprobs_or_none, + "finish_reason": finish_reason, + } + ], + "usage": { + "prompt_tokens": len(prompt_tokens), + "completion_tokens": len(completion_tokens), + "total_tokens": len(prompt_tokens) + len(completion_tokens), + }, + } + + def create_completion( + self, + prompt: Union[str, List[int]], + suffix: Optional[str] = None, + max_tokens: Optional[int] = 16, + temperature: float = 0.8, + top_p: float = 0.95, + min_p: float = 0.05, + typical_p: float = 1.0, + logprobs: Optional[int] = None, + echo: bool = False, + stop: Optional[Union[str, List[str]]] = [], + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + repeat_penalty: float = 1.1, + top_k: int = 40, + stream: bool = False, + seed: Optional[int] = None, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + stopping_criteria: Optional[StoppingCriteriaList] = None, + logits_processor: Optional[LogitsProcessorList] = None, + grammar: Optional[LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + ) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + suffix: A suffix to append to the generated text. If None, no suffix is appended. + max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx. + temperature: The temperature to use for sampling. + top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841 + typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. + logprobs: The number of logprobs to return. If None, no logprobs are returned. + echo: Whether to echo the prompt. + stop: A list of strings to stop generation when encountered. + frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt. + presence_penalty: The penalty to apply to tokens based on their presence in the prompt. + repeat_penalty: The penalty to apply to repeated tokens. + top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + stream: Whether to stream the results. + seed: The seed to use for sampling. + tfs_z: The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. + mirostat_mode: The mirostat sampling mode. + mirostat_tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + mirostat_eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + model: The name to use for the model in the completion object. + stopping_criteria: A list of stopping criteria to use. + logits_processor: A list of logits processors to use. + grammar: A grammar to use for constrained sampling. + logit_bias: A logit bias to use. + + Raises: + ValueError: If the requested tokens exceed the context window. + RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt. + + Returns: + Response object containing the generated text. + """ + completion_or_chunks = self._create_completion( + prompt=prompt, + suffix=suffix, + max_tokens=-1 if max_tokens is None else max_tokens, + temperature=temperature, + top_p=top_p, + min_p=min_p, + typical_p=typical_p, + logprobs=logprobs, + echo=echo, + stop=stop, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + repeat_penalty=repeat_penalty, + top_k=top_k, + stream=stream, + seed=seed, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + stopping_criteria=stopping_criteria, + logits_processor=logits_processor, + grammar=grammar, + logit_bias=logit_bias, + ) + if stream: + chunks: Iterator[CreateCompletionStreamResponse] = completion_or_chunks + return chunks + completion: Completion = next(completion_or_chunks) # type: ignore + return completion + + def __call__( + self, + prompt: str, + suffix: Optional[str] = None, + max_tokens: Optional[int] = 16, + temperature: float = 0.8, + top_p: float = 0.95, + min_p: float = 0.05, + typical_p: float = 1.0, + logprobs: Optional[int] = None, + echo: bool = False, + stop: Optional[Union[str, List[str]]] = [], + frequency_penalty: float = 0.0, + presence_penalty: float = 0.0, + repeat_penalty: float = 1.1, + top_k: int = 40, + stream: bool = False, + seed: Optional[int] = None, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + stopping_criteria: Optional[StoppingCriteriaList] = None, + logits_processor: Optional[LogitsProcessorList] = None, + grammar: Optional[LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + ) -> Union[CreateCompletionResponse, Iterator[CreateCompletionStreamResponse]]: + """Generate text from a prompt. + + Args: + prompt: The prompt to generate text from. + suffix: A suffix to append to the generated text. If None, no suffix is appended. + max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx. + temperature: The temperature to use for sampling. + top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841 + typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. + logprobs: The number of logprobs to return. If None, no logprobs are returned. + echo: Whether to echo the prompt. + stop: A list of strings to stop generation when encountered. + frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt. + presence_penalty: The penalty to apply to tokens based on their presence in the prompt. + repeat_penalty: The penalty to apply to repeated tokens. + top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + stream: Whether to stream the results. + seed: The seed to use for sampling. + tfs_z: The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. + mirostat_mode: The mirostat sampling mode. + mirostat_tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + mirostat_eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + model: The name to use for the model in the completion object. + stopping_criteria: A list of stopping criteria to use. + logits_processor: A list of logits processors to use. + grammar: A grammar to use for constrained sampling. + logit_bias: A logit bias to use. + + Raises: + ValueError: If the requested tokens exceed the context window. + RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt. + + Returns: + Response object containing the generated text. + """ + return self.create_completion( + prompt=prompt, + suffix=suffix, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + min_p=min_p, + typical_p=typical_p, + logprobs=logprobs, + echo=echo, + stop=stop, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + repeat_penalty=repeat_penalty, + top_k=top_k, + stream=stream, + seed=seed, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + stopping_criteria=stopping_criteria, + logits_processor=logits_processor, + grammar=grammar, + logit_bias=logit_bias, + ) + + def create_chat_completion( + self, + messages: List[ChatCompletionRequestMessage], + functions: Optional[List[ChatCompletionFunction]] = None, + function_call: Optional[ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[ChatCompletionTool]] = None, + tool_choice: Optional[ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + seed: Optional[int] = None, + response_format: Optional[ChatCompletionRequestResponseFormat] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[LogitsProcessorList] = None, + grammar: Optional[LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + ) -> Union[ + CreateChatCompletionResponse, Iterator[CreateChatCompletionStreamResponse] + ]: + """Generate a chat completion from a list of messages. + + Args: + messages: A list of messages to generate a response for. + functions: A list of functions to use for the chat completion. + function_call: A function call to use for the chat completion. + tools: A list of tools to use for the chat completion. + tool_choice: A tool choice to use for the chat completion. + temperature: The temperature to use for sampling. + top_p: The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + top_k: The top-k value to use for sampling. Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 + min_p: The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841 + typical_p: The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. + stream: Whether to stream the results. + stop: A list of strings to stop generation when encountered. + seed: The seed to use for sampling. + response_format: The response format to use for the chat completion. Use { "type": "json_object" } to contstrain output to only valid json. + max_tokens: The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx. + presence_penalty: The penalty to apply to tokens based on their presence in the prompt. + frequency_penalty: The penalty to apply to tokens based on their frequency in the prompt. + repeat_penalty: The penalty to apply to repeated tokens. + tfs_z: The tail-free sampling parameter. + mirostat_mode: The mirostat sampling mode. + mirostat_tau: The mirostat sampling tau parameter. + mirostat_eta: The mirostat sampling eta parameter. + model: The name to use for the model in the completion object. + logits_processor: A list of logits processors to use. + grammar: A grammar to use. + logit_bias: A logit bias to use. + + Returns: + Generated chat completion or a stream of chat completion chunks. + """ + handler = self.chat_handler or llama_chat_format.get_chat_completion_handler( + self.chat_format + ) + return handler( + llama=self, + messages=messages, + functions=functions, + function_call=function_call, + tools=tools, + tool_choice=tool_choice, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + logprobs=logprobs, + top_logprobs=top_logprobs, + stream=stream, + stop=stop, + seed=seed, + response_format=response_format, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + logit_bias=logit_bias, + ) + + def create_chat_completion_openai_v1( + self, + *args: Any, + **kwargs: Any, + ): + """Generate a chat completion with return type based on the the OpenAI v1 API. + + OpenAI python package is required to use this method. + + You can install it with `pip install openai`. + + Args: + *args: Positional arguments to pass to create_chat_completion. + **kwargs: Keyword arguments to pass to create_chat_completion. + + Returns: + Generated chat completion or a stream of chat completion chunks. + """ + try: + from openai.types.chat import ChatCompletion, ChatCompletionChunk + + stream = kwargs.get("stream", False) # type: ignore + assert isinstance(stream, bool) + if stream: + return (ChatCompletionChunk(**chunk) for chunk in self.create_chat_completion(*args, **kwargs)) # type: ignore + else: + return ChatCompletion(**self.create_chat_completion(*args, **kwargs)) # type: ignore + except ImportError: + raise ImportError( + "To use create_chat_completion_openai_v1, you must install the openai package." + "You can install it with `pip install openai`." + ) + + def __getstate__(self): + return dict( + model_path=self.model_path, + # Model Params + n_gpu_layers=self.model_params.n_gpu_layers, + split_mode=self.model_params.split_mode, + main_gpu=self.model_params.main_gpu, + tensor_split=self.tensor_split, + vocab_only=self.model_params.vocab_only, + use_mmap=self.model_params.use_mmap, + use_mlock=self.model_params.use_mlock, + kv_overrides=self.kv_overrides, + # Context Params + seed=self.context_params.seed, + n_ctx=self.context_params.n_ctx, + n_batch=self.n_batch, + n_threads=self.context_params.n_threads, + n_threads_batch=self.context_params.n_threads_batch, + rope_scaling_type=self.context_params.rope_scaling_type, + pooling_type=self.context_params.pooling_type, + rope_freq_base=self.context_params.rope_freq_base, + rope_freq_scale=self.context_params.rope_freq_scale, + yarn_ext_factor=self.context_params.yarn_ext_factor, + yarn_attn_factor=self.context_params.yarn_attn_factor, + yarn_beta_fast=self.context_params.yarn_beta_fast, + yarn_beta_slow=self.context_params.yarn_beta_slow, + yarn_orig_ctx=self.context_params.yarn_orig_ctx, + logits_all=self.context_params.logits_all, + embedding=self.context_params.embeddings, + offload_kqv=self.context_params.offload_kqv, + flash_attn=self.context_params.flash_attn, + # Sampling Params + last_n_tokens_size=self.last_n_tokens_size, + # LoRA Params + lora_base=self.lora_base, + lora_scale=self.lora_scale, + lora_path=self.lora_path, + # Backend Params + numa=self.numa, + # Chat Format Params + chat_format=self.chat_format, + chat_handler=self.chat_handler, + # Speculative Decidng + draft_model=self.draft_model, + # KV cache quantization + type_k=self.context_params.type_k, + type_v=self.context_params.type_v, + # Misc + verbose=self.verbose, + ) + + def __setstate__(self, state): + self.__init__(**state) + + def save_state(self) -> LlamaState: + assert self._ctx.ctx is not None + if self.verbose: + print("Llama.save_state: saving llama state", file=sys.stderr) + state_size = llama_cpp.llama_get_state_size(self._ctx.ctx) + if self.verbose: + print(f"Llama.save_state: got state size: {state_size}", file=sys.stderr) + llama_state = (ctypes.c_uint8 * int(state_size))() + if self.verbose: + print("Llama.save_state: allocated state", file=sys.stderr) + n_bytes = llama_cpp.llama_copy_state_data(self._ctx.ctx, llama_state) + if self.verbose: + print(f"Llama.save_state: copied llama state: {n_bytes}", file=sys.stderr) + if int(n_bytes) > int(state_size): + raise RuntimeError("Failed to copy llama state data") + llama_state_compact = (ctypes.c_uint8 * int(n_bytes))() + llama_cpp.ctypes.memmove(llama_state_compact, llama_state, int(n_bytes)) + if self.verbose: + print( + f"Llama.save_state: saving {n_bytes} bytes of llama state", + file=sys.stderr, + ) + return LlamaState( + scores=self._scores.copy(), + input_ids=self.input_ids.copy(), + n_tokens=self.n_tokens, + llama_state=bytes(llama_state_compact), + llama_state_size=n_bytes, + ) + + def load_state(self, state: LlamaState) -> None: + assert self._ctx.ctx is not None + # Only filling in up to `n_tokens` and then zero-ing out the rest + self.scores[: state.n_tokens, :] = state.scores.copy() + self.scores[state.n_tokens :, :] = 0.0 + self.input_ids = state.input_ids.copy() + self.n_tokens = state.n_tokens + state_size = state.llama_state_size + LLamaStateArrayType = ctypes.c_uint8 * state_size + llama_state = LLamaStateArrayType.from_buffer_copy(state.llama_state) + + if llama_cpp.llama_set_state_data(self._ctx.ctx, llama_state) != state_size: + raise RuntimeError("Failed to set llama state data") + + def n_ctx(self) -> int: + """Return the context window size.""" + return self._ctx.n_ctx() + + def n_embd(self) -> int: + """Return the embedding size.""" + return self._model.n_embd() + + def n_vocab(self) -> int: + """Return the vocabulary size.""" + return self._model.n_vocab() + + def tokenizer(self) -> LlamaTokenizer: + """Return the llama tokenizer for this model.""" + return LlamaTokenizer(self) + + def token_eos(self) -> int: + """Return the end-of-sequence token.""" + return self._model.token_eos() + + def token_bos(self) -> int: + """Return the beginning-of-sequence token.""" + return self._model.token_bos() + + def token_nl(self) -> int: + """Return the newline token.""" + return self._model.token_nl() + + def pooling_type(self) -> str: + """Return the pooling type.""" + return self._ctx.pooling_type() + + @staticmethod + def logits_to_logprobs( + logits: Union[npt.NDArray[np.single], List], axis: int = -1 + ) -> npt.NDArray[np.single]: + # https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.log_softmax.html + logits_maxs: np.ndarray = np.amax(logits, axis=axis, keepdims=True) + if logits_maxs.ndim > 0: + logits_maxs[~np.isfinite(logits_maxs)] = 0 + elif not np.isfinite(logits_maxs): + logits_maxs = 0 + subtract_maxs = np.subtract(logits, logits_maxs, dtype=np.single) + exp = np.exp(subtract_maxs) + # Suppress warnings about log of zero + with np.errstate(divide="ignore"): + summed = np.sum(exp, axis=axis, keepdims=True) + out = np.log(summed) + return subtract_maxs - out + + @staticmethod + def longest_token_prefix(a: Sequence[int], b: Sequence[int]): + longest_prefix = 0 + for _a, _b in zip(a, b): + if _a == _b: + longest_prefix += 1 + else: + break + return longest_prefix + + @classmethod + def from_pretrained( + cls, + repo_id: str, + filename: Optional[str], + local_dir: Optional[Union[str, os.PathLike[str]]] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + cache_dir: Optional[Union[str, os.PathLike[str]]] = None, + **kwargs: Any, + ) -> "Llama": + """Create a Llama model from a pretrained model name or path. + This method requires the huggingface-hub package. + You can install it with `pip install huggingface-hub`. + + Args: + repo_id: The model repo id. + filename: A filename or glob pattern to match the model file in the repo. + local_dir: The local directory to save the model to. + local_dir_use_symlinks: Whether to use symlinks when downloading the model. + **kwargs: Additional keyword arguments to pass to the Llama constructor. + + Returns: + A Llama model.""" + try: + from huggingface_hub import hf_hub_download, HfFileSystem + from huggingface_hub.utils import validate_repo_id + except ImportError: + raise ImportError( + "Llama.from_pretrained requires the huggingface-hub package. " + "You can install it with `pip install huggingface-hub`." + ) + + validate_repo_id(repo_id) + + hffs = HfFileSystem() + + files = [ + file["name"] if isinstance(file, dict) else file + for file in hffs.ls(repo_id) + ] + + # split each file into repo_id, subfolder, filename + file_list: List[str] = [] + for file in files: + rel_path = Path(file).relative_to(repo_id) + file_list.append(str(rel_path)) + + matching_files = [file for file in file_list if fnmatch.fnmatch(file, filename)] # type: ignore + + if len(matching_files) == 0: + raise ValueError( + f"No file found in {repo_id} that match {filename}\n\n" + f"Available Files:\n{json.dumps(file_list)}" + ) + + if len(matching_files) > 1: + raise ValueError( + f"Multiple files found in {repo_id} matching {filename}\n\n" + f"Available Files:\n{json.dumps(files)}" + ) + + (matching_file,) = matching_files + + subfolder = str(Path(matching_file).parent) + filename = Path(matching_file).name + + # download the file + hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + cache_dir=cache_dir, + ) + + if local_dir is None: + model_path = hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + cache_dir=cache_dir, + local_files_only=True, + ) + else: + model_path = os.path.join(local_dir, filename) + + return cls( + model_path=model_path, + **kwargs, + ) + + +class LlamaState: + def __init__( + self, + input_ids: npt.NDArray[np.intc], + scores: npt.NDArray[np.single], + n_tokens: int, + llama_state: bytes, + llama_state_size: int, + ): + self.input_ids = input_ids + self.scores = scores + self.n_tokens = n_tokens + self.llama_state = llama_state + self.llama_state_size = llama_state_size + + +LogitsProcessor = Callable[ + [npt.NDArray[np.intc], npt.NDArray[np.single]], npt.NDArray[np.single] +] + + +class LogitsProcessorList(List[LogitsProcessor]): + def __call__( + self, input_ids: npt.NDArray[np.intc], scores: npt.NDArray[np.single] + ) -> npt.NDArray[np.single]: + for processor in self: + scores = processor(input_ids, scores) + return scores + + +StoppingCriteria = Callable[[npt.NDArray[np.intc], npt.NDArray[np.single]], bool] + + +class StoppingCriteriaList(List[StoppingCriteria]): + def __call__( + self, input_ids: npt.NDArray[np.intc], logits: npt.NDArray[np.single] + ) -> bool: + return any([stopping_criteria(input_ids, logits) for stopping_criteria in self]) diff --git a/llama-cpp-python/llama_cpp/llama_cache.py b/llama-cpp-python/llama_cpp/llama_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..9e9870a52245d4f245df5a4e1b89fda121d78214 --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_cache.py @@ -0,0 +1,150 @@ +import sys +from abc import ABC, abstractmethod +from typing import ( + Optional, + Sequence, + Tuple, +) +from collections import OrderedDict + +import diskcache + +import llama_cpp.llama + +from .llama_types import * + + +class BaseLlamaCache(ABC): + """Base cache class for a llama.cpp model.""" + + def __init__(self, capacity_bytes: int = (2 << 30)): + self.capacity_bytes = capacity_bytes + + @property + @abstractmethod + def cache_size(self) -> int: + raise NotImplementedError + + def _find_longest_prefix_key( + self, + key: Tuple[int, ...], + ) -> Optional[Tuple[int, ...]]: + pass + + @abstractmethod + def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaState": + raise NotImplementedError + + @abstractmethod + def __contains__(self, key: Sequence[int]) -> bool: + raise NotImplementedError + + @abstractmethod + def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.LlamaState") -> None: + raise NotImplementedError + + +class LlamaRAMCache(BaseLlamaCache): + """Cache for a llama.cpp model using RAM.""" + + def __init__(self, capacity_bytes: int = (2 << 30)): + super().__init__(capacity_bytes) + self.capacity_bytes = capacity_bytes + self.cache_state: OrderedDict[Tuple[int, ...], "llama_cpp.llama.LlamaState"] = OrderedDict() + + @property + def cache_size(self): + return sum([state.llama_state_size for state in self.cache_state.values()]) + + def _find_longest_prefix_key( + self, + key: Tuple[int, ...], + ) -> Optional[Tuple[int, ...]]: + min_len = 0 + min_key = None + keys = ( + (k, llama_cpp.llama.Llama.longest_token_prefix(k, key)) for k in self.cache_state.keys() + ) + for k, prefix_len in keys: + if prefix_len > min_len: + min_len = prefix_len + min_key = k + return min_key + + def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaState": + key = tuple(key) + _key = self._find_longest_prefix_key(key) + if _key is None: + raise KeyError("Key not found") + value = self.cache_state[_key] + self.cache_state.move_to_end(_key) + return value + + def __contains__(self, key: Sequence[int]) -> bool: + return self._find_longest_prefix_key(tuple(key)) is not None + + def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.LlamaState"): + key = tuple(key) + if key in self.cache_state: + del self.cache_state[key] + self.cache_state[key] = value + while self.cache_size > self.capacity_bytes and len(self.cache_state) > 0: + self.cache_state.popitem(last=False) + + +# Alias for backwards compatibility +LlamaCache = LlamaRAMCache + + +class LlamaDiskCache(BaseLlamaCache): + """Cache for a llama.cpp model using disk.""" + + def __init__( + self, cache_dir: str = ".cache/llama_cache", capacity_bytes: int = (2 << 30) + ): + super().__init__(capacity_bytes) + self.cache = diskcache.Cache(cache_dir) + + @property + def cache_size(self): + return int(self.cache.volume()) # type: ignore + + def _find_longest_prefix_key( + self, + key: Tuple[int, ...], + ) -> Optional[Tuple[int, ...]]: + min_len = 0 + min_key: Optional[Tuple[int, ...]] = None + for k in self.cache.iterkeys(): # type: ignore + prefix_len = llama_cpp.llama.Llama.longest_token_prefix(k, key) + if prefix_len > min_len: + min_len = prefix_len + min_key = k # type: ignore + return min_key + + def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaState": + key = tuple(key) + _key = self._find_longest_prefix_key(key) + if _key is None: + raise KeyError("Key not found") + value: "llama_cpp.llama.LlamaState" = self.cache.pop(_key) # type: ignore + # NOTE: This puts an integer as key in cache, which breaks, + # Llama.longest_token_prefix(k, key) above since k is not a tuple of ints/tokens + # self.cache.push(_key, side="front") # type: ignore + return value + + def __contains__(self, key: Sequence[int]) -> bool: + return self._find_longest_prefix_key(tuple(key)) is not None + + def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.LlamaState"): + print("LlamaDiskCache.__setitem__: called", file=sys.stderr) + key = tuple(key) + if key in self.cache: + print("LlamaDiskCache.__setitem__: delete", file=sys.stderr) + del self.cache[key] + self.cache[key] = value + print("LlamaDiskCache.__setitem__: set", file=sys.stderr) + while self.cache_size > self.capacity_bytes and len(self.cache) > 0: + key_to_remove = next(iter(self.cache)) + del self.cache[key_to_remove] + print("LlamaDiskCache.__setitem__: trim", file=sys.stderr) diff --git a/llama-cpp-python/llama_cpp/llama_chat_format.py b/llama-cpp-python/llama_cpp/llama_chat_format.py new file mode 100644 index 0000000000000000000000000000000000000000..3ab94e0d3cc0b4e5228a9a1aa7a1b76461a31788 --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_chat_format.py @@ -0,0 +1,3567 @@ +from __future__ import annotations + +import os +import json +import ctypes +import dataclasses +import random +import string + +from contextlib import ExitStack +from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol, cast + +import jinja2 + +import numpy as np +import numpy.typing as npt + +import llama_cpp.llama as llama +import llama_cpp.llama_types as llama_types +import llama_cpp.llama_grammar as llama_grammar + +from ._logger import logger +from ._utils import suppress_stdout_stderr, Singleton + +### Common Chat Templates and Special Tokens ### + +# Source: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/blob/main/tokenizer_config.json +CHATML_CHAT_TEMPLATE = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" +CHATML_BOS_TOKEN = "" +CHATML_EOS_TOKEN = "<|im_end|>" + +# Source: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json +MISTRAL_INSTRUCT_CHAT_TEMPLATE = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" +MISTRAL_INSTRUCT_BOS_TOKEN = "" +MISTRAL_INSTRUCT_EOS_TOKEN = "" + +# Source: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json +MIXTRAL_INSTRUCT_CHAT_TEMPLATE = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" + +# Source: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json +LLAMA3_INSTRUCT_CHAT_TEMPLATE = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" + +### Chat Completion Handler ### + + +class LlamaChatCompletionHandler(Protocol): + """Base Protocol for a llama chat completion handler. + + Very generic protocol that can be used to implement any chat format. + The only hard requirement is that it must return a ChatCompletion when + stream=False and an iterator of ChatCompletionChunks when stream=True.""" + + def __call__( + self, + *, + # llama.cpp instance + llama: llama.Llama, + # openai api parameters + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + seed: Optional[int] = None, + response_format: Optional[ + llama_types.ChatCompletionRequestResponseFormat + ] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + model: Optional[str] = None, + logit_bias: Optional[Dict[str, float]] = None, + # llama.cpp parameters + min_p: float = 0.05, + typical_p: float = 1.0, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + **kwargs, # type: ignore + ) -> Union[ + llama_types.CreateChatCompletionResponse, + Iterator[llama_types.CreateChatCompletionStreamResponse], + ]: ... + + +class LlamaChatCompletionHandlerNotFoundException(Exception): + pass + + +class LlamaChatCompletionHandlerRegistry(Singleton): + _chat_handlers: Dict[str, LlamaChatCompletionHandler] = {} + + def register_chat_completion_handler( + self, + name: str, + chat_handler: LlamaChatCompletionHandler, + overwrite: bool = False, + ): + if not overwrite and name in self._chat_handlers: + raise ValueError( + f"Formatter with name '{name}' is already registered. Use `overwrite=True` to overwrite it." + ) + self._chat_handlers[name] = chat_handler + + def unregister_chat_handler(self, name: str): + if name in self._chat_handlers: + del self._chat_handlers[name] + else: + raise ValueError(f"No formatter registered under the name '{name}'.") + + def get_chat_completion_handler_by_name( + self, name: str + ) -> LlamaChatCompletionHandler: + try: + chat_handler = self._chat_handlers[name] + return chat_handler + except KeyError: + raise LlamaChatCompletionHandlerNotFoundException( + f"Invalid chat handler: {name} (valid formats: {list(self._chat_handlers.keys())})" + ) + + +def get_chat_completion_handler(name: str) -> LlamaChatCompletionHandler: + return LlamaChatCompletionHandlerRegistry().get_chat_completion_handler_by_name( + name + ) + + +def register_chat_completion_handler(name: str): + def decorator(f: LlamaChatCompletionHandler): + LlamaChatCompletionHandlerRegistry().register_chat_completion_handler(name, f) + return f + + return decorator + + +### Chat Formatter ### + + +@dataclasses.dataclass +class ChatFormatterResponse: + """Dataclass that stores completion parameters for a given chat format and + create_chat_completion request. + + prompt contains the formatted prompt generated from the chat format and messages. + stop contains the stop token or list of stop tokens to use for the chat format.""" + + prompt: str + stop: Optional[Union[str, List[str]]] = None + stopping_criteria: Optional[llama.StoppingCriteriaList] = None + + +class ChatFormatter(Protocol): + """Base Protocol for a chat formatter. A chat formatter is a function that + takes a list of messages and returns a chat format response which can be used + to generate a completion. The response can also include a stop token or list + of stop tokens to use for the completion.""" + + def __call__( + self, + *, + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, + ) -> ChatFormatterResponse: ... + + +class Jinja2ChatFormatter(ChatFormatter): + def __init__( + self, + template: str, + eos_token: str, + bos_token: str, + add_generation_prompt: bool = True, + stop_token_ids: Optional[List[int]] = None, + ): + """A chat formatter that uses jinja2 templates to format the prompt.""" + self.template = template + self.eos_token = eos_token + self.bos_token = bos_token + self.add_generation_prompt = add_generation_prompt + self.stop_token_ids = set(stop_token_ids) if stop_token_ids is not None else None + + self._environment = jinja2.Environment( + loader=jinja2.BaseLoader(), + trim_blocks=True, + lstrip_blocks=True, + ).from_string(self.template) + + def __call__( + self, + *, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + **kwargs: Any, + ) -> ChatFormatterResponse: + def raise_exception(message: str): + raise ValueError(message) + + prompt = self._environment.render( + messages=messages, + eos_token=self.eos_token, + bos_token=self.bos_token, + raise_exception=raise_exception, + add_generation_prompt=self.add_generation_prompt, + functions=functions, + function_call=function_call, + tools=tools, + tool_choice=tool_choice, + ) + + stopping_criteria = None + if self.stop_token_ids is not None: + def stop_on_last_token( + tokens: npt.NDArray[np.intc], + logits: npt.NDArray[np.single] + ) -> bool: + return tokens[-1] in self.stop_token_ids + stopping_criteria = llama.StoppingCriteriaList([stop_on_last_token]) + + return ChatFormatterResponse(prompt=prompt, stop=[self.eos_token], stopping_criteria=stopping_criteria) + + def to_chat_handler(self) -> LlamaChatCompletionHandler: + return chat_formatter_to_chat_completion_handler(self) + + +def _convert_text_completion_to_chat( + completion: llama_types.Completion, +) -> llama_types.ChatCompletion: + assert "usage" in completion + return { + "id": "chat" + completion["id"], + "object": "chat.completion", + "created": completion["created"], + "model": completion["model"], + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": completion["choices"][0]["text"], + }, + "logprobs": completion["choices"][0]["logprobs"], + "finish_reason": completion["choices"][0]["finish_reason"], + } + ], + "usage": completion["usage"], + } + + +def _convert_text_completion_chunks_to_chat( + chunks: Iterator[llama_types.CreateCompletionStreamResponse], +) -> Iterator[llama_types.ChatCompletionChunk]: + for i, chunk in enumerate(chunks): + if i == 0: + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + }, + "logprobs": None, + "finish_reason": None, + } + ], + } + yield { + "id": "chat" + chunk["id"], + "model": chunk["model"], + "created": chunk["created"], + "object": "chat.completion.chunk", + "choices": [ + { + "index": 0, + "delta": ( + { + "content": chunk["choices"][0]["text"], + } + if chunk["choices"][0]["finish_reason"] is None + else {} + ), + "logprobs": chunk["choices"][0]["logprobs"], + "finish_reason": chunk["choices"][0]["finish_reason"], + } + ], + } + + +def _convert_completion_to_chat( + completion_or_chunks: Union[ + llama_types.CreateCompletionResponse, + Iterator[llama_types.CreateCompletionStreamResponse], + ], + stream: bool = False, +) -> Union[ + llama_types.CreateChatCompletionResponse, Iterator[llama_types.ChatCompletionChunk] +]: + if stream: + chunks: Iterator[llama_types.CreateCompletionStreamResponse] = completion_or_chunks # type: ignore + return _convert_text_completion_chunks_to_chat(chunks) + else: + completion: llama_types.Completion = completion_or_chunks # type: ignore + return _convert_text_completion_to_chat(completion) + + +def _convert_completion_to_chat_function( + tool_name: str, + completion_or_chunks: Union[ + llama_types.CreateCompletionResponse, + Iterator[llama_types.CreateCompletionStreamResponse], + ], + stream: bool, +): + if not stream: + completion: llama_types.CreateCompletionResponse = completion_or_chunks # type: ignore + assert "usage" in completion + tool_id = "call_" + "_0_" + tool_name + "_" + completion["id"] + # TODO: Fix for legacy function calls + chat_completion: llama_types.CreateChatCompletionResponse = { + "id": "chat" + completion["id"], + "object": "chat.completion", + "created": completion["created"], + "model": completion["model"], + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": None, + "function_call": { + "name": tool_name, + "arguments": completion["choices"][0]["text"], + }, + "tool_calls": [ + { + "id": tool_id, + "type": "function", + "function": { + "name": tool_name, + "arguments": completion["choices"][0]["text"], + }, + } + ], + }, + "logprobs": completion["choices"][0]["logprobs"], + "finish_reason": "tool_calls", + } + ], + "usage": completion["usage"], + } + return chat_completion + else: + chunks: Iterator[llama_types.CreateCompletionStreamResponse] = completion_or_chunks # type: ignore + + def _stream_response_to_function_stream( + chunks: Iterator[llama_types.CreateCompletionStreamResponse], + ) -> Iterator[llama_types.CreateChatCompletionStreamResponse]: + # blank first message + first = True + id_ = None + created = None + model = None + tool_id = None + for chunk in chunks: + if first: + id_ = "chat" + chunk["id"] + created = chunk["created"] + model = chunk["model"] + tool_id = "call_" + "_0_" + tool_name + "_" + chunk["id"] + yield { + "id": id_, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [ + { + "index": 0, + "finish_reason": None, + "logprobs": None, + "delta": { + "role": "assistant", + "content": None, + "function_call": None, + "tool_calls": None, + }, + } + ], + } + yield { + "id": "chat" + chunk["id"], + "object": "chat.completion.chunk", + "created": chunk["created"], + "model": chunk["model"], + "choices": [ + { + "index": 0, + "finish_reason": None, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": None, + "content": None, + "function_call": { + "name": tool_name, + "arguments": chunk["choices"][0]["text"], + }, + "tool_calls": [ + { + "index": 0, + "id": tool_id, + "type": "function", + "function": { + "name": tool_name, + "arguments": chunk["choices"][0]["text"], + }, + } + ], + }, + } + ], + } + first = False + continue + assert tool_id is not None + yield { + "id": "chat" + chunk["id"], + "object": "chat.completion.chunk", + "created": chunk["created"], + "model": chunk["model"], + "choices": [ + { + "index": 0, + "finish_reason": None, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": None, + "content": None, + "function_call": { + "name": tool_name, + "arguments": chunk["choices"][0]["text"], + }, + "tool_calls": [ + { + "index": 0, + "id": tool_id, + "type": "function", + "function": { + "name": tool_name, + "arguments": chunk["choices"][0][ + "text" + ], + }, + } + ], + }, + } + ], + } + + if id_ is not None and created is not None and model is not None: + yield { + "id": id_, + "object": "chat.completion.chunk", + "created": created, + "model": model, + "choices": [ + { + "index": 0, + "finish_reason": "tool_calls", + "logprobs": None, + "delta": { + "role": None, + "content": None, + "function_call": None, + "tool_calls": None, + }, + } + ], + } + + return _stream_response_to_function_stream(chunks) + + + +def chat_formatter_to_chat_completion_handler( + chat_formatter: ChatFormatter, +) -> LlamaChatCompletionHandler: + def chat_completion_handler( + *, + llama: llama.Llama, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + seed: Optional[int] = None, + response_format: Optional[ + llama_types.ChatCompletionRequestResponseFormat + ] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + **kwargs, # type: ignore + ) -> Union[ + llama_types.CreateChatCompletionResponse, + Iterator[llama_types.CreateChatCompletionStreamResponse], + ]: + result = chat_formatter( + messages=messages, + functions=functions, + function_call=function_call, + tools=tools, + tool_choice=tool_choice, + ) + prompt = result.prompt + if result.stop is not None: + stop = [] if stop is None else [stop] if isinstance(stop, str) else stop + rstop = result.stop if isinstance(result.stop, list) else [result.stop] + stop = stop + rstop + + stopping_criteria = None + if result.stopping_criteria is not None: + stopping_criteria = result.stopping_criteria + + if response_format is not None and response_format["type"] == "json_object": + grammar = _grammar_for_response_format(response_format, verbose=llama.verbose) + + # Convert legacy functions to tools + if functions is not None: + tools = [ + { + "type": "function", + "function": function, + } + for function in functions + ] + + # Convert legacy function_call to tool_choice + if function_call is not None: + if isinstance(function_call, str) and ( + function_call == "none" or function_call == "auto" + ): + tool_choice = function_call + if isinstance(function_call, dict) and "name" in function_call: + tool_choice = { + "type": "function", + "function": { + "name": function_call["name"], + }, + } + + tool = None + if tool_choice is not None and isinstance(tool_choice, dict) and tools is not None: + name = tool_choice["function"]["name"] + tool = next((t for t in tools if t["function"]["name"] == name), None) + if tool is None: + raise ValueError(f"Tool choice '{name}' not found in tools.") + schema = tool["function"]["parameters"] + try: + # create grammar from json schema + grammar = llama_grammar.LlamaGrammar.from_json_schema( + json.dumps(schema), verbose=llama.verbose + ) + except Exception as e: + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + + completion_or_chunks = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + logprobs=top_logprobs if logprobs else None, + stream=stream, + stop=stop, + seed=seed, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + stopping_criteria=stopping_criteria, + grammar=grammar, + logit_bias=logit_bias, + ) + if tool is not None: + tool_name = tool["function"]["name"] + return _convert_completion_to_chat_function( + tool_name, completion_or_chunks, stream + ) + return _convert_completion_to_chat(completion_or_chunks, stream=stream) + + return chat_completion_handler + + +def hf_autotokenizer_to_chat_formatter( + pretrained_model_name_or_path: Union[str, os.PathLike[str]] +) -> ChatFormatter: + # https://huggingface.co/docs/transformers/main/chat_templating + # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1#instruction-format + # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/blob/main/tokenizer_config.json + from transformers import AutoTokenizer # type: ignore + + tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path) # type: ignore + + def format_autotokenizer( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, + ) -> ChatFormatterResponse: + tokenizer.use_default_system_prompt = False # type: ignore + prompt: str = tokenizer.apply_chat_template(messages, tokenize=False) # type: ignore + assert isinstance(prompt, str) + # Return formatted prompt and eos token by default + return ChatFormatterResponse(prompt=prompt, stop=tokenizer.eos_token) + + return format_autotokenizer + + +def hf_autotokenizer_to_chat_completion_handler( + pretrained_model_name_or_path: Union[str, os.PathLike[str]] +) -> LlamaChatCompletionHandler: + chat_formatter = hf_autotokenizer_to_chat_formatter(pretrained_model_name_or_path) + return chat_formatter_to_chat_completion_handler(chat_formatter) + + +def hf_tokenizer_config_to_chat_formatter( + tokenizer_config: Dict[str, Any], + add_generation_prompt: bool = True, +) -> ChatFormatter: + assert isinstance(tokenizer_config, dict) + + assert "chat_template" in tokenizer_config + assert isinstance(tokenizer_config["chat_template"], str) + chat_template = tokenizer_config["chat_template"] + + assert "bos_token" in tokenizer_config + assert isinstance(tokenizer_config["bos_token"], str) + bos_token = tokenizer_config["bos_token"] + + assert "eos_token" in tokenizer_config + assert isinstance(tokenizer_config["eos_token"], str) + eos_token = tokenizer_config["eos_token"] + + env = jinja2.Environment( + loader=jinja2.BaseLoader(), + trim_blocks=True, + lstrip_blocks=True, + ).from_string(chat_template) + + def format_tokenizer_config( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, + ) -> ChatFormatterResponse: + # TODO: veryify this is correct + # Add a blank assistant message to the end of the messages to prompt the model to generate a response + if add_generation_prompt: + messages = [ + *messages, + llama_types.ChatCompletionRequestAssistantMessage( + role="assistant", content="" + ), + ] + prompt = env.render( + messages=messages, + bos_token=bos_token, + eos_token=eos_token, + ) + return ChatFormatterResponse(prompt=prompt, stop=[eos_token, bos_token]) + + return format_tokenizer_config + + +def hf_tokenizer_config_to_chat_completion_handler( + tokenizer_config: Dict[str, Any], + add_generation_prompt: bool = True, +) -> LlamaChatCompletionHandler: + chat_formatter = hf_tokenizer_config_to_chat_formatter( + tokenizer_config, add_generation_prompt=add_generation_prompt + ) + return chat_formatter_to_chat_completion_handler(chat_formatter) + + +def guess_chat_format_from_gguf_metadata(metadata: Dict[str, str]) -> Optional[str]: + if "tokenizer.chat_template" not in metadata: + return None + + if metadata["tokenizer.chat_template"] == CHATML_CHAT_TEMPLATE: + return "chatml" + + if (metadata["tokenizer.chat_template"] == MISTRAL_INSTRUCT_CHAT_TEMPLATE or + metadata["tokenizer.chat_template"] == MIXTRAL_INSTRUCT_CHAT_TEMPLATE): + return "mistral-instruct" + + if metadata["tokenizer.chat_template"] == LLAMA3_INSTRUCT_CHAT_TEMPLATE: + return "llama-3" + + return None + + +### Utility functions for formatting chat prompts ### +# TODO: Replace these with jinja2 templates + + +def _get_system_message( + messages: List[llama_types.ChatCompletionRequestMessage], +) -> str: + """Get the first system message.""" + for message in messages: + if message["role"] == "system": + return message["content"] or "" + return "" + + +def _map_roles( + messages: List[llama_types.ChatCompletionRequestMessage], + role_map: Dict[str, str], +) -> List[Tuple[str, Optional[str]]]: + """Map the message roles.""" + output: List[Tuple[str, Optional[str]]] = [] + for message in messages: + role = message["role"] + if role in role_map: + content: str | None = ( + message["content"] if isinstance(message["content"], str) else None + ) + output.append((role_map[role], content)) + return output + + +def _format_llama2( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str +) -> str: + """Format the prompt with the llama2 style.""" + seps = [sep, sep2] + ret = system_message + sep + for i, (role, message) in enumerate(messages): + if system_message and i == 0: + m = message or "" + ret += m + seps[i % 2] + elif message: + ret += role + message + " " + seps[i % 2] + else: + ret += role + " " + return ret + + +def _format_add_colon_single( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str +) -> str: + """Format the prompt with the add-colon-single style.""" + ret = system_message + sep + for role, message in messages: + if message: + ret += role + ": " + message + sep + else: + ret += role + ":" + return ret + + +def _format_add_colon_two( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str, sep2: str +) -> str: + """Format the prompt with the add-colon-two style.""" + seps = [sep, sep2] + ret = system_message + seps[0] + for i, (role, message) in enumerate(messages): + if message: + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + return ret + + +def _format_no_colon_single( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str +) -> str: + """Format the prompt with the no-colon-single style.""" + ret = system_message + for role, message in messages: + if message: + ret += role + message + sep + else: + ret += role + return ret + + +def _format_add_colon_space_single( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str +) -> str: + """Format the prompt with the add-colon-space-single style.""" + ret = system_message + sep + for role, message in messages: + if message: + ret += role + ": " + message + sep + else: + ret += role + ": " # must be end with a space + return ret + + +def _format_chatml( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str +) -> str: + """Format the prompt with the chatml style.""" + ret = "" if system_message == "" else system_message + sep + "\n" + for role, message in messages: + if message: + ret += role + "\n" + message + sep + "\n" + else: + ret += role + "\n" + return ret + + +def _format_chatglm3( + system_message: str, messages: List[Tuple[str, Optional[str]]], sep: str +) -> str: + """Format the prompt with the chatglm3 style.""" + ret = "" + if system_message: + ret += system_message + for role, message in messages: + if message: + ret += role + "\n" + " " + message + else: + ret += role + return ret + +def _grammar_for_json(verbose:bool=False): + return llama_grammar.LlamaGrammar.from_string(llama_grammar.JSON_GBNF, verbose=verbose) + +def _grammar_for_json_schema( + schema: str, + verbose: bool = False, + fallback_to_json: bool = True +): + try: + return llama_grammar.LlamaGrammar.from_json_schema(schema, verbose=verbose) + except Exception as e: + if fallback_to_json: + return _grammar_for_json(verbose=verbose) + else: + raise e + +def _grammar_for_response_format( + response_format: llama_types.ChatCompletionRequestResponseFormat, + verbose: bool = False +): + if response_format["type"] != "json_object": + return None + + if "schema" in response_format: + return _grammar_for_json_schema( + json.dumps(response_format["schema"]), verbose=verbose + ) + else: + return _grammar_for_json(verbose=verbose) + +### Chat Formats ### + + +def register_chat_format(name: str): + def decorator(f: ChatFormatter): + chat_completion_handler = chat_formatter_to_chat_completion_handler(f) + LlamaChatCompletionHandlerRegistry().register_chat_completion_handler( + name, chat_completion_handler + ) + return f + + return decorator + + +# see https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/tokenization_llama.py +# system prompt is "embedded" in the first message +@register_chat_format("llama-2") +def format_llama2( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_template = "[INST] <>\n{system_message}\n<>" + _roles = dict(user="[INST]", assistant="[/INST]") + _messages = _map_roles(messages, _roles) + system_message = _get_system_message(messages) + if system_message: + system_message = _system_template.format(system_message=system_message) + _prompt = _format_llama2(system_message, _messages, " ", "") + "[/INST]" + return ChatFormatterResponse(prompt=_prompt) + + +# Chat format for Llama-3 models, see more details at: +# https://github.com/meta-llama/llama3/blob/main/llama/tokenizer.py#L202-L229 +@register_chat_format("llama-3") +def format_llama3( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict( + system="<|start_header_id|>system<|end_header_id|>\n\n", + user="<|start_header_id|>user<|end_header_id|>\n\n", + assistant="<|start_header_id|>assistant<|end_header_id|>\n\n", + ) + _begin_token = "<|begin_of_text|>" + _sep = "<|eot_id|>" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(_begin_token, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +@register_chat_format("alpaca") +def format_alpaca( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict(user="### Instruction", assistant="### Response") + _sep = "\n\n" + _sep2 = "" + system_message = _get_system_message(messages) + _messages = _map_roles(messages, _roles) + _prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("qwen") +def format_qwen( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict(user="<|im_start|>user", assistant="<|im_start|>assistant") + system_message = "You are a helpful assistant." + system_template = "<|im_start|>system\n{system_message}" + system_message = system_template.format(system_message=system_message) + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _sep = "<|im_end|>" + _prompt = _format_chatml(system_message, _messages, _sep) + _sep2 = "<|endoftext|>" + return ChatFormatterResponse(prompt=_prompt, stop=_sep2) + + +@register_chat_format("vicuna") +def format( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_message = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions." + _roles = dict(user="USER", assistant="ASSISTANT") + _sep = " " + _sep2 = "" + system_message = _system_message + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_two(system_message, _messages, _sep, _sep2) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("oasst_llama") +def format_oasst_llama( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_template = "[INST] <>\n{system_message}\n<>\n\n" + _roles = dict(user="<|prompter|>", assistant="<|assistant|>") + _sep = "" + system_message = _get_system_message(messages) + system_message = _system_template.format(system_message=system_message) + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("baichuan-2") +def format_baichuan2( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_template = "{system_message}" + _roles = dict(user="", assistant="") + _sep = "" + system_message = _get_system_message(messages) + system_message = _system_template.format(system_message=system_message) + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("baichuan") +def format_baichuan( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_template = "{system_message}" + _roles = dict(user="", assistant="") + _sep = "" + system_message = _get_system_message(messages) + system_message = _system_template.format(system_message=system_message) + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("openbuddy") +def format_openbuddy( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_message = """You are a helpful, respectful and honest INTP-T AI Assistant named Buddy. You are talking to a human User. +Always answer as helpfully and logically as possible, while being safe. Your answers should not include any harmful, political, religious, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. +You can speak fluently in many languages, for example: English, Chinese. +You cannot access the internet, but you have vast knowledge, cutoff: 2021-09. +You are trained by OpenBuddy team, (https://openbuddy.ai, https://github.com/OpenBuddy/OpenBuddy), you are based on LLaMA and Falcon transformers model, not related to GPT or OpenAI. + +""" + _roles = dict(user="User", assistant="Assistant") + _sep = "\n" + system_message = _system_message + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("redpajama-incite") +def format_redpajama_incite( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _system_message = _get_system_message(messages) + _roles = dict(user="", assistant="") + _sep = "\n" + _stop = "" + system_message = _system_message + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_stop) + + +@register_chat_format("snoozy") +def format_snoozy( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = "### Instruction:\n{system_message}" + default_system_message = "The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response." + _system_message = _get_system_message(messages) + _system_message = ( + _system_message if _system_message != "" else default_system_message + ) + system_message = system_template.format(system_message=_system_message) + _roles = dict(user="### Prompt", assistant="### Response") + _sep = "\n" + _stop = "###" + system_message = _system_message + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_stop) + + +@register_chat_format("phind") +def format_phind( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict(user="### User Message", assistant="### Assistant") + _sep = "\n\n" + _system_message = "### System Prompt\nYou are an intelligent programming assistant." + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_single(_system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("intel") +def format_intel( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict(user="### User:", assistant="### Assistant:") + _sep = "\n" + _system_message = "### System:\n{system_message}" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_add_colon_single(_system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("open-orca") +def format_open_orca( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = "{system_message}" + system_message = ( + "You are a helpful assistant. Please answer truthfully and write out your " + "thinking step by step to be sure you get the right answer. If you make a mistake or encounter " + "an error in your thinking, say so out loud and attempt to correct it. If you don't know or " + "aren't sure about something, say so clearly. You will act as a professional logician, mathematician, " + "and physicist. You will also act as the most appropriate type of expert to answer any particular " + "question or solve the relevant problem; state which expert type your are, if so. Also think of " + "any particular named expert that would be ideal to answer the relevant question or solve the " + "relevant problem; name and act as them, if appropriate." + ) + roles = ("User", "Assistant") + sep = "<|end_of_turn|>\n" + # stop_token_ids=[32000, 32001], # "<|end_of_turn|>" + stop_str = "User" + system_message = system_template.format(system_message=system_message) + _messages = _map_roles(messages, dict(zip(roles, roles))) + _messages.append((roles[1], None)) + _prompt = _format_add_colon_space_single(system_message, _messages, sep) + return ChatFormatterResponse(prompt=_prompt, stop=stop_str) + + +@register_chat_format("mistrallite") +def format_mistrallite( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _roles = dict(user="<|prompter|>", assistant="\n<|assistant|>") + _sep = " " + system_template = """<|system|>{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt) + + +@register_chat_format("zephyr") +def format_zephyr( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = """<|system|> +{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict(user="<|user|>\n", assistant="<|assistant|>\n") + _sep = "" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatml(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +@register_chat_format("pygmalion") +def format_pygmalion( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = """<|system|>{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict(user="<|user|>", assistant="<|model|>") + _sep = "\n" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatml(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +@register_chat_format("chatml") +def format_chatml( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = """<|im_start|>system +{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict(user="<|im_start|>user", assistant="<|im_start|>assistant") + _sep = "<|im_end|>" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatml(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +@register_chat_format("mistral-instruct") +def format_mistral_instruct( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + bos = "" + eos = "" + stop = eos + prompt = bos + for message in messages: + if ( + message["role"] == "user" + and message["content"] is not None + and isinstance(message["content"], str) + ): + prompt += "[INST] " + message["content"] + elif ( + message["role"] == "assistant" + and message["content"] is not None + ): + prompt += " [/INST]" + message["content"] + eos + prompt += " [/INST]" + return ChatFormatterResponse(prompt=prompt, stop=stop) + + +@register_chat_format("chatglm3") +def format_chatglm3( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = """<|system|> +{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict(user="<|user|>", assistant="<|assistant|>") + _sep = "" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatglm3(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +@register_chat_format("openchat") +def format_openchat( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = "{system_message}<|end_of_turn|>" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict( + user="GPT4 Correct User: ", assistant="<|end_of_turn|>GPT4 Correct Assistant: " + ) + _sep = "<|end_of_turn|>" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatml(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +# Chat format for Saiga models, see more details and available models: +# https://huggingface.co/collections/IlyaGusev/saiga2-saigamistral-6505d4ccc3d1e53166b636cd +@register_chat_format("saiga") +def format_saiga( + messages: list[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + _message_template = "{role}\n{content}" + _roles = dict(user="user", bot="bot", system="system") + _messages = _map_roles(messages, _roles) + + _prompt = "" + for role, content in _messages: + if content: + _prompt += _message_template.format(role=role, content=content) + else: + _prompt += f"{role}\n" + # Response template + _prompt += "bot" + return ChatFormatterResponse(prompt=_prompt.strip()) + + +# Chat format for Google's Gemma models, see more details and available models: +# https://huggingface.co/collections/google/gemma-release-65d5efbccdbb8c4202ec078b +@register_chat_format("gemma") +def format_gemma( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_message = _get_system_message(messages) + if system_message != "": + logger.debug( + "`role='system'` messages are not allowed on Google's Gemma models." + ) + _roles = dict(user="user\n", assistant="model\n") + _sep = "\n" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_no_colon_single(system_message="", messages=_messages, sep=_sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + +# Tricky chat formats that require custom chat handlers + + +@register_chat_completion_handler("functionary") +def functionary_chat_handler( + llama: llama.Llama, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + **kwargs, # type: ignore +) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]: + SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary""" + + def generate_type_definition( + param: Dict[str, llama_types.JsonType], indent_level: int, shared_defs + ) -> str: + indent = " " * indent_level + if "$ref" in param: + # Reference to a shared definition + ref_name = param["$ref"].split("/")[ + -1 + ] # Extract the type name from the reference + return ref_name + elif param.get("type") == "array": + items = param.get("items", {}) + item_type = generate_type_definition(items, indent_level + 1, shared_defs) + return f"Array<{item_type}>" + elif param.get("type") == "object": + properties = param.get("properties", {}) + nested_schema = "{\n" + for nested_param_name, nested_param in properties.items(): + nested_param_type = generate_type_definition( + nested_param, indent_level + 1, shared_defs + ) + nested_schema += ( + f"{indent} {nested_param_name}: {nested_param_type},\n" + ) + nested_schema += indent + "}" + return nested_schema + elif "enum" in param: + # Enum type + return " | ".join([f'"{enum_value}"' for enum_value in param["enum"]]) + else: + # Simple type + return param.get("type", "any") + + def generate_shared_definitions(shared_defs, indent_level: int) -> str: + indent = " " * indent_level + shared_definitions = "" + for def_name, def_properties in shared_defs.items(): + shared_definitions += f"{indent}type {def_name} = " + if def_properties.get("type") == "object": + shared_definitions += generate_type_definition( + def_properties, indent_level, shared_defs + ) + elif "enum" in def_properties: + # Enum type + shared_definitions += " | ".join( + [f'"{enum_value}"' for enum_value in def_properties["enum"]] + ) + shared_definitions += ";\n" + return shared_definitions + + def generate_schema_from_functions(functions, namespace="functions") -> str: + schema = ( + "// Supported function definitions that should be called when necessary.\n" + ) + schema += f"namespace {namespace} {{\n\n" + + # Generate shared definitions + shared_definitions = {} + for function in functions: + parameters = function.get("parameters", {}) + shared_definitions.update(parameters.get("$defs", {})) + + schema += generate_shared_definitions(shared_definitions, 1) + + for function in functions: + function_name = function["name"] + description = function.get("description", "") + parameters = function.get("parameters", {}) + required_params = parameters.get("required", []) + + schema += f" // {description}\n" + schema += f" type {function_name} = (_: {{\n" + + for param_name, param in parameters.get("properties", {}).items(): + param_description = param.get("description", "") + param_type = generate_type_definition(param, 2, shared_definitions) + optional_indicator = "" if param_name in required_params else "?" + schema += f" // {param_description}\n" + schema += f" {param_name}{optional_indicator}: {param_type},\n" + schema += " }) => any;\n\n" + + schema += "}} // namespace {}\n".format(namespace) + return schema + + def prepare_messages_for_inference( + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunctions]] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + ): + all_messages: List[llama_types.ChatCompletionRequestMessage] = [] + if functions is not None: + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", content=generate_schema_from_functions(functions) + ) + ) + + if tools is not None: + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", + content=generate_schema_from_functions( + [ + tool["function"] + for tool in tools + if tool["type"] == "function" + ] + ), + ) + ) + + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", content=SYSTEM_MESSAGE + ) + ) + + for message in messages: + # Function call responses + if message["role"] == "function" and "name" in message: + message["name"] = f"functions.{message['name']}" + # Function call requests by assistant + if "function_call" in message: + message["function_call"][ + "name" + ] = f"functions.{message['function_call']['name']}" + all_messages.append(message) + + all_messages.append( + llama_types.ChatCompletionRequestAssistantMessage( + role="assistant", content=None + ) + ) + + def message_to_str(msg: llama_types.ChatCompletionRequestMessage): + if msg["role"] == "system": + return f"system:\n{msg['content']}\n" + + elif msg["role"] == "function" and "name" in msg: + return f"function name={msg['name']}:\n{msg['content']}\n" + elif msg["role"] == "function" and "function_call" in msg: + return f"function name={msg['function_call']['name']}:\n{msg['function_call']['arguments']}\n" + elif msg["role"] == "tool": + if msg["content"] is not None: + return f"function name={msg['tool_call_id']}:\n{msg['content']}\n" + else: + return f"function name={msg['tool_call_id']}\n" + elif msg["role"] == "user": + if msg["content"] is None: + return "user:\n\n" + else: + return f"user:\n{msg['content']}\n" + elif msg["role"] == "assistant": + if msg["content"] is not None and "function_call" in msg: + return f"assistant:\n{msg['content']}\nassistant to={msg['function_call']['name']}:\n{msg['function_call']['arguments']}\n" + elif "function_call" in msg: + return f"assistant to={msg['function_call']['name']}:\n{msg['function_call']['arguments']}\n" + elif "tool_calls" in msg and len(msg["tool_calls"]) > 0: + for tool_call in msg[ + "tool_calls" + ]: # NOTE: probably doesn't work with the functionary model + return f"assistant to={tool_call['id']}:\n{tool_call['function']['arguments']}\n" + elif msg["content"] is None: + return "assistant" + else: + return f"assistant:\n{msg['content']}\n" + else: + raise ValueError(f"Unsupported role: {msg['role']}") + + return "".join([message_to_str(msg) for msg in all_messages]) + + if tools is not None: + functions = [tool["function"] for tool in tools if tool["type"] == "function"] + + if tool_choice is not None: + function_call = ( + tool_choice if isinstance(tool_choice, str) else tool_choice["function"] + ) + + prompt = prepare_messages_for_inference(messages, functions, tools) + + if function_call is None and (functions is None or len(functions) == 0): + completion_or_completion_chunks = llama.create_completion( + prompt=prompt + ":\n", + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=["user:", ""], + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + ) + return _convert_completion_to_chat(completion_or_completion_chunks, stream=stream) # type: ignore + + if function_call is None or ( + isinstance(function_call, str) and function_call == "auto" + ): + stop = "\n" + completion: llama_types.Completion = llama.create_completion( + prompt=prompt, stop=stop, stream=False + ) # type: ignore + completion_text = completion["choices"][0]["text"] + # strip " to=functions." and ending ":" + function_call = completion_text.split(".")[-1][:-1] + new_prompt = prompt + completion_text + stop + elif isinstance(function_call, str) and function_call != "none": + new_prompt = prompt + f":\n" + elif isinstance(function_call, dict): + new_prompt = prompt + f" to=functions.{function_call['name']}:\n" + function_call = function_call["name"] + else: + new_prompt = prompt + f":\n" + + function_body = None + for function in functions or []: + if function["name"] == function_call: + function_body = function["parameters"] + break + for tool in tools or []: + if tool["type"] == "function" and tool["function"]["name"] == function_call: + function_body = tool["function"]["parameters"] + break + + if function_body is not None: + try: + with suppress_stdout_stderr(disable=llama.verbose): + grammar_text = llama_grammar.json_schema_to_gbnf( + json.dumps(function_body) + ) + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.json_schema_to_gbnf(json.dumps(function_body)), + verbose=llama.verbose, + ) + print(grammar_text) + except Exception as e: + if llama.verbose: + print( + "Failed to parse function body as JSON schema, falling back to default grammar" + ) + print(e) + with suppress_stdout_stderr(disable=llama.verbose): + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, + verbose=llama.verbose, + ) + else: + with suppress_stdout_stderr(disable=llama.verbose): + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + + completion: llama_types.Completion = llama.create_completion( + prompt=new_prompt, + stop=["user:", ""], + stream=False, + grammar=grammar, + max_tokens=max_tokens, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + ) # type: ignore + + assert "usage" in completion + assert isinstance(function_call, str) + assert stream is False # TODO: support stream mode + + if llama.verbose: + print(new_prompt) + print(completion["choices"][0]["text"]) + + # TODO: support stream mode + return llama_types.CreateChatCompletionResponse( + id="chat" + completion["id"], + object="chat.completion", + created=completion["created"], + model=completion["model"], + choices=[ + { + "index": 0, + "message": { + "role": "assistant", + "content": None, + "function_call": { + "name": function_call, + "arguments": completion["choices"][0]["text"], + }, + "tool_calls": [ + { + "id": function_call, + "type": "function", + "function": { + "name": function_call, + "arguments": completion["choices"][0]["text"], + }, + } + ], + }, + "logprobs": completion["choices"][0]["logprobs"], + "finish_reason": "tool_calls", + } + ], + usage=completion["usage"], + ) + + +@register_chat_completion_handler("functionary-v1") +@register_chat_completion_handler("functionary-v2") +def functionary_v1_v2_chat_handler( + llama: llama.Llama, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + **kwargs, # type: ignore +) -> Union[llama_types.ChatCompletion, Iterator[llama_types.ChatCompletionChunk]]: + SYSTEM_MESSAGE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary""" + + tokenizer = llama.tokenizer_ + assert hasattr( + tokenizer, "hf_tokenizer" + ), "Please provide a valid hf_tokenizer_path from https://huggingface.co/meetkai when initializing the Llama class" + from transformers import AutoTokenizer + + if "<|START_OF_FUNCTION_CALL|>" in tokenizer.hf_tokenizer.additional_special_tokens: + version = "v1" + END_SYSTEM_TOKEN = "<|END_OF_SYSTEM|>" + END_USER_TOKEN = "<|END_OF_USER|>" + END_ASSISTANT_TOKEN = "<|END_OF_ASSISTANT|>" + END_FUNCTION_RESULT_TOKEN = "<|END_OF_FUNCTION_RESULT|>" + START_FUNCTION_CALL_TOKEN = "<|START_OF_FUNCTION_CALL|>" + END_FUNCTION_CALL_TOKEN = "<|END_OF_FUNCTION_CALL|>" + else: + version = "v2" + RECIPIENT_TOKEN = "<|recipient|>" + FROM_TOKEN = "<|from|>" + STOP_TOKEN = "<|stop|>" + CONTENT_TOKEN = "<|content|>" + + def generate_type_definition( + param: Dict[str, llama_types.JsonType], indent_level: int, shared_defs + ) -> str: + indent = " " * indent_level + if "$ref" in param: + # Reference to a shared definition + ref_name = param["$ref"].split("/")[ + -1 + ] # Extract the type name from the reference + return ref_name + elif param.get("type") == "array": + items = param.get("items", {}) + item_type = generate_type_definition(items, indent_level + 1, shared_defs) + return f"Array<{item_type}>" + elif param.get("type") == "object": + properties = param.get("properties", {}) + nested_schema = "{\n" + for nested_param_name, nested_param in properties.items(): + nested_param_type = generate_type_definition( + nested_param, indent_level + 1, shared_defs + ) + nested_schema += ( + f"{indent} {nested_param_name}: {nested_param_type},\n" + ) + nested_schema += indent + "}" + return nested_schema + elif "enum" in param: + # Enum type + return " | ".join([f'"{enum_value}"' for enum_value in param["enum"]]) + else: + # Simple type + return param.get("type", "any") + + def generate_shared_definitions(shared_defs, indent_level: int) -> str: + indent = " " * indent_level + shared_definitions = "" + for def_name, def_properties in shared_defs.items(): + shared_definitions += f"{indent}type {def_name} = " + if def_properties.get("type") == "object": + shared_definitions += generate_type_definition( + def_properties, indent_level, shared_defs + ) + elif "enum" in def_properties: + # Enum type + shared_definitions += " | ".join( + [f'"{enum_value}"' for enum_value in def_properties["enum"]] + ) + shared_definitions += ";\n" + return shared_definitions + + def generate_schema_from_functions(functions, namespace="functions") -> str: + schema = ( + "// Supported function definitions that should be called when necessary.\n" + ) + schema += f"namespace {namespace} {{\n\n" + + # Generate shared definitions + shared_definitions = {} + for function in functions: + parameters = function.get("parameters", {}) + shared_definitions.update(parameters.get("$defs", {})) + + schema += generate_shared_definitions(shared_definitions, 1) + + for function in functions: + function_name = function["name"] + description = function.get("description", "") + parameters = function.get("parameters", {}) + required_params = parameters.get("required", []) + + schema += f"// {description}\n" + schema += f"type {function_name} = (_: {{\n" + + for param_name, param in parameters.get("properties", {}).items(): + param_description = param.get("description", "") + param_type = generate_type_definition(param, 2, shared_definitions) + optional_indicator = "" if param_name in required_params else "?" + schema += f"// {param_description}\n" + schema += f"{param_name}{optional_indicator}: {param_type},\n" + schema += "}) => any;\n\n" + + schema += "}} // namespace {}".format(namespace) + return schema + + def prepare_messages_for_inference( + messages: List[llama_types.ChatCompletionRequestMessage], + tokenizer: AutoTokenizer, + version: Literal["v1", "v2"], + functions: Optional[List[llama_types.ChatCompletionFunctions]] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Union[Dict, str] = "auto", + ): + all_messages: List[llama_types.ChatCompletionRequestMessage] = [] + if tool_choice == "none": + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", content=generate_schema_from_functions([]) + ) + ) + else: + if functions is not None: + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", content=generate_schema_from_functions(functions) + ) + ) + elif tools is not None and tool_choice != "none": + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", + content=generate_schema_from_functions( + [ + tool["function"] + for tool in tools + if tool["type"] == "function" + ] + ), + ) + ) + + all_messages.append( + llama_types.ChatCompletionRequestSystemMessage( + role="system", content=SYSTEM_MESSAGE + ) + ) + + for message in messages: + # Function call responses + if message["role"] == "function" and "name" in message: + message["name"] = f"functions.{message['name']}" + # Function call requests by assistant + if "function_call" in message: + message["function_call"][ + "name" + ] = f"functions.{message['function_call']['name']}" + all_messages.append(message) + + if version == "v1": + suffix = "assistant:\n" + else: + suffix = "<|from|>assistant\n<|recipient|>" + + return ( + tokenizer.hf_tokenizer.apply_chat_template(all_messages, tokenize=False) + + suffix + ) + + if tools is not None: + functions = [tool["function"] for tool in tools if tool["type"] == "function"] + + if tool_choice is not None: + function_call = ( + tool_choice if isinstance(tool_choice, str) else tool_choice["function"] + ) + elif function_call is not None: + pass + else: + function_call = "auto" + + prompt = prepare_messages_for_inference( + messages, tokenizer, version, functions, tools, function_call + ) + + # If no tools/functions are provided + if function_call == "none" or functions is None or len(functions) == 0: + if version == "v1": + stop = END_ASSISTANT_TOKEN + else: + stop = STOP_TOKEN + prompt += "all\n<|content|>" + + completion_or_completion_chunks = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=stop, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + ) + if stream is False: + completion_or_completion_chunks["choices"][0]["text"] = completion_or_completion_chunks["choices"][0]["text"].lstrip() + return _convert_completion_to_chat(completion_or_completion_chunks, stream=stream) # type: ignore + + def get_grammar(function_call): + function_body = None + for function in functions or []: + if function["name"] == function_call: + function_body = function["parameters"] + break + for tool in tools or []: + if tool["type"] == "function" and tool["function"]["name"] == function_call: + function_body = tool["function"]["parameters"] + break + + try: + with suppress_stdout_stderr(disable=llama.verbose): + grammar_text = llama_grammar.json_schema_to_gbnf( + json.dumps(function_body) + ) + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.json_schema_to_gbnf(json.dumps(function_body)) + ) + print(grammar_text) + except Exception as e: + if llama.verbose: + print( + "Failed to parse function body as JSON schema, falling back to default grammar" + ) + print(e) + with suppress_stdout_stderr(disable=llama.verbose): + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + + return grammar + + def create_completion(prompt, stop, grammar): + completion = cast(llama_types.Completion, llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=stop, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + )) + + return completion + + content = "" + function_calls, function_bodies = [], [] + completion_tokens = 0 + + def generate_streaming(tools, functions, function_call, prompt): + assert version == "v2", "Streaming for v1 is not supported" + + chunk_id, chunk_created = None, None + + # If tool_choice/function_call is provided + if isinstance(function_call, dict): + prompt += f"{function_call['name']}\n{CONTENT_TOKEN}" + grammar = get_grammar(function_call["name"]) + stops = [STOP_TOKEN, FROM_TOKEN] + tool_id = "".join([random.choice(string.ascii_letters + string.digits) for _ in range(24)]) + completion = create_completion(prompt=prompt, stop=stops, grammar=grammar) + completion_text = "" + first = True + for chunk in completion: + # Yield the tool/function name first + if first: + if tools is not None: + func_call_dict = { + "tool_calls": [ + { + "index": 0, + "id": "call_" + tool_id, + "type": "function", + "function": {"name": function_call["name"], "arguments": ""}, + } + ] + } + else: + func_call_dict = {"function_call": {"name": function_call["name"], "arguments": ""}} + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + model=chunk["model"], + choices=[ + {"index": 0, "logprobs": None, "delta": {"role": None, "content": None, **func_call_dict}} + ], + ) + first = False + if tools is not None: + func_call_dict = { + "tool_calls": [ + { + "index": 0, + "id": "call_" + tool_id, + "type": "function", + "function": { + "name": None, + "arguments": chunk["choices"][0]["text"].rstrip(), + }, + } + ] + } + else: + func_call_dict = {"function_call": {"name": None, "arguments": chunk["choices"][0]["text"].rstrip()}} + if len(chunk["choices"][0]["text"].rstrip()) > 0: + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + model=chunk["model"], + choices=[ + { + "index": 0, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": None, + "content": None, + **func_call_dict, + }, + } + ], + ) + # Yield tool_call/function_call stop message + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk["id"], + object="chat.completion.chunk", + created=chunk["created"], + model=chunk["model"], + choices=[ + { + "index": 0, + "finish_reason": "tool_calls" if tools is not None else "function_call", + "logprobs": None, + "delta": { + "role": None, "content": None, "function_call": None, "tool_calls": None + }, + } + ], + ) + # If "auto" or no tool_choice/function_call + elif isinstance(function_call, str) and function_call == "auto": + tool_index = 0 + while True: + # Generate function name first + grammar = None + stops = CONTENT_TOKEN + completion = create_completion(prompt=prompt, stop=stops, grammar=grammar) + completion_text = "" + for chunk in completion: + completion_text += chunk["choices"][0]["text"] + if chunk_id is None: + chunk_id = chunk["id"] + if chunk_created is None: + chunk_created = chunk["created"] + function_name = completion_text.strip() + if function_name == "all": + prompt += "all\n<|content|>" + # Yield the first empty message for content + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + model=chunk["model"], + created=chunk_created, + object="chat.completion.chunk", + choices=[ + { + "index": 0, + "delta": {"role": "assistant", "content": ""}, + "logprobs": None, + "finish_reason": None, + } + ], + ) + else: + prompt += f"{function_name}\n<|content|>" + grammar = get_grammar(function_name) + tool_id = "".join([random.choice(string.ascii_letters + string.digits) for _ in range(24)]) + if tools is not None: + func_call_dict = { + "tool_calls": [ + { + "index": tool_index, + "id": "call_" + tool_id, + "type": "function", + "function": {"name": function_name, "arguments": ""}, + } + ] + } + else: + func_call_dict = {"function_call": {"name": function_name, "arguments": ""}} + # Stream function name + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + object="chat.completion.chunk", + created=chunk_created, + model=chunk["model"], + choices=[ + { + "index": 0, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": "assistant", + "content": None, + **func_call_dict, + }, + } + ], + ) + # Generate content + stops = [RECIPIENT_TOKEN, STOP_TOKEN] + completion = create_completion(prompt=prompt, stop=stops, grammar=grammar) + if function_name == "all": + completion_text = "" + stop_sequence, buffer, is_end = "\n<|from|>assistant\n<|recipient|>", [], False + for i, chunk in enumerate(completion): + completion_text += chunk["choices"][0]["text"] + if is_end: + buffer.append(chunk["choices"][0]["text"].strip(" ")) + if stop_sequence.startswith("".join(buffer)): + continue + else: + buffer.pop() + while len(buffer) > 0: + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + object="chat.completion.chunk", + created=chunk_created, + model=chunk["model"], + choices=[ + { + "index": 0, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": "assistant", "content": buffer.pop(0) + }, + } + ], + ) + is_end = False + elif chunk["choices"][0]["text"] == "\n": + is_end = True + buffer.append(chunk["choices"][0]["text"].strip(" ")) + continue + + if len(buffer) == 0 and len(chunk["choices"][0]["text"]) > 0: + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + object="chat.completion.chunk", + created=chunk_created, + model=chunk["model"], + choices=[ + { + "index": 0, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": "assistant", + "content": chunk["choices"][0]["text"] if i > 0 else chunk["choices"][0]["text"].lstrip() + }, + } + ], + ) + # Check whether the model wants to generate another turn + if "<|from|> assistant" in completion_text or "<|from|>assistant" in completion_text: + if completion_text.endswith("\n<|from|>assistant\n"): + cleaned_completion_text = completion_text[:-len("\n<|from|>assistant\n")].strip() + elif completion_text.endswith("\n<|from|> assistant\n"): + cleaned_completion_text = completion_text[:-len("\n<|from|> assistant\n")].strip() + else: + cleaned_completion_text = completion_text.strip() + prompt += f"{cleaned_completion_text}\n<|from|>assistant\n<|recipient|>" + else: + # Yield stop message + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + model=chunk["model"], + created=chunk_created, + object="chat.completion.chunk", + choices=[ + { + "index": 0, + "delta": {}, + "logprobs": None, + "finish_reason": "stop", + } + ], + ) + break + else: + # Check whether the model wants to generate another turn + completion_text = "" + for chunk in completion: + completion_text += chunk["choices"][0]["text"] + if len(chunk["choices"][0]["text"].rstrip()) > 0: + if tools is not None: + func_call_dict = { + "tool_calls": [ + { + "index": tool_index, + "id": "call_" + tool_id, + "type": "function", + "function": { + "name": None, + "arguments": chunk["choices"][0]["text"].rstrip(), + }, + } + ] + } + else: + func_call_dict = {"function_call": {"name": None, "arguments": chunk["choices"][0]["text"].rstrip()}} + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + object="chat.completion.chunk", + created=chunk_created, + model=chunk["model"], + choices=[ + { + "index": 0, + "logprobs": chunk["choices"][0]["logprobs"], + "delta": { + "role": None, + "content": None, + **func_call_dict, + }, + } + ], + ) + prompt += completion_text.strip() + grammar = None + completion = create_completion(prompt=prompt, stop=stops, grammar=grammar) + completion_text += "".join([chunk["choices"][0]["text"] for chunk in completion]) + if ("<|from|> assistant" in completion_text or "<|from|>assistant" in completion_text) and tools is not None: + prompt += "\n<|from|>assistant\n<|recipient|>" + tool_index += 1 + else: + # Yield tool_call/function_call stop message + yield llama_types.CreateChatCompletionStreamResponse( + id="chat" + chunk_id, + object="chat.completion.chunk", + created=chunk_created, + model=chunk["model"], + choices=[ + { + "index": 0, + "finish_reason": "tool_calls" if tools is not None else "function_call", + "logprobs": None, + "delta": { + "role": None, "content": None, "function_call": None, "tool_calls": None + }, + } + ], + ) + break + + if stream is not False: + return generate_streaming( + tools=tools, functions=functions, function_call=function_call, prompt=prompt + ) + else: + if version == "v1": + # If no or "auto" tool_choice/function_call + if isinstance(function_call, str) and function_call == "auto": + stops = ["\n", END_ASSISTANT_TOKEN] + # If tool_choice/function_call is provided + elif isinstance(function_call, dict): + prompt += f"{START_FUNCTION_CALL_TOKEN}{function_call['name']}:\n" + stops = END_FUNCTION_CALL_TOKEN + function_call = function_call["name"] + function_calls.append(function_call) + grammar = get_grammar(function_call) + else: + prompt = prompt + stops = ["\n", END_ASSISTANT_TOKEN] + + completion = create_completion(stop=stops) + completion_text = completion["choices"][0]["text"] + completion_tokens += completion["usage"]["completion_tokens"] + + + # If the generation does not involve a function call + if ( + START_FUNCTION_CALL_TOKEN not in prompt + and START_FUNCTION_CALL_TOKEN not in completion_text + ): + completion["usage"]["completion_tokens"] = completion_tokens + return _convert_completion_to_chat(completion, stream=stream) # type: ignore + # If the generation involves a function call in completion, generate the parameters + elif ( + START_FUNCTION_CALL_TOKEN not in prompt + and START_FUNCTION_CALL_TOKEN in completion_text + ): + prompt += ( + completion_text.replace( + f"{START_FUNCTION_CALL_TOKEN} ", START_FUNCTION_CALL_TOKEN + ) + + "\n" + ) + function_calls.append( + completion_text.split(START_FUNCTION_CALL_TOKEN)[-1][:-1].strip() + ) + grammar = get_grammar(function_calls[-1]) + completion = create_completion(stop=END_FUNCTION_CALL_TOKEN) + completion_tokens += completion["usage"]["completion_tokens"] + function_bodies.append(completion["choices"][0]["text"].strip()) + # If the prompt involves a function call, just append generated parameters to function_bodies + else: + function_bodies.append(completion_text.strip()) + else: + # If tool_choice/function_call is provided + if isinstance(function_call, dict): + prompt += f"{function_call['name']}\n{CONTENT_TOKEN}" + function_call = function_call["name"] + function_calls.append(function_call) + grammar = get_grammar(function_call) + stops = [STOP_TOKEN, FROM_TOKEN] + completion = create_completion(stop=stops) + completion_text = completion["choices"][0]["text"] + completion_tokens += completion["usage"]["completion_tokens"] + function_bodies.append(completion_text.strip()) + # If "auto" or no tool_choice/function_call + elif isinstance(function_call, str) and function_call == "auto": + while True: + # Generate function name first + grammar = None + stops = CONTENT_TOKEN + completion = create_completion(stop=stops) + completion_text = completion["choices"][0]["text"] + completion_tokens += completion["usage"]["completion_tokens"] + function_name = completion_text.strip() + if function_name == "all": + prompt += "all\n<|content|>" + else: + function_call = completion_text.strip() + prompt += f"{function_call}\n<|content|>" + function_calls.append(function_call) + grammar = get_grammar(function_call) + # Generate content + stops = [RECIPIENT_TOKEN, STOP_TOKEN] + completion = create_completion(stop=stops) + completion_text = completion["choices"][0]["text"] + completion_tokens += completion["usage"]["completion_tokens"] + if function_name == "all": + if completion_text.endswith("\n<|from|>assistant\n"): + content += completion_text[:-len("\n<|from|>assistant\n")] + if completion_text.endswith("\n<|from|> assistant\n"): + content += completion_text[-len("\n<|from|> assistant\n")] + else: + content += completion_text + content = content.lstrip() + # Check whether the model wants to generate another turn + if "<|from|> assistant" in completion_text or "<|from|>assistant" in completion_text: + if completion_text.endswith("\n<|from|>assistant\n"): + cleaned_completion_text = completion_text[:-len("\n<|from|>assistant\n")].strip() + elif completion_text.endswith("\n<|from|> assistant\n"): + cleaned_completion_text = completion_text[-len("\n<|from|> assistant\n")].strip() + else: + cleaned_completion_text = completion_text.strip() + prompt += f"{cleaned_completion_text}\n<|from|>assistant\n<|recipient|>" + else: + break + else: + function_bodies.append(completion_text.strip()) + # Check whether the model wants to generate another turn + prompt += completion_text.strip() + grammar = None + completion = create_completion(stop=stops) + completion_tokens += completion["usage"]["completion_tokens"] + if "<|from|> assistant" in completion["choices"][0]["text"] or "<|from|>assistant" in completion["choices"][0]["text"]: + prompt += "\n<|from|>assistant\n<|recipient|>" + else: + break + + assert "usage" in completion + assert len(function_calls) == len(function_bodies) + + tool_calls: List[llama_types.ChatCompletionMessageToolCall] = [] + for function_call, function_body in zip(function_calls, function_bodies): + tool_calls.append( + { + "id": "call_" + + "".join( + [ + random.choice(string.ascii_letters + string.digits) + for _ in range(24) + ] + ), + "type": "function", + "function": { + "name": function_call, + "arguments": function_body, + }, + } + ) + + # TODO: support stream mode + function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = {} + if len(tool_calls) > 0: + if tools is not None: + function_call_dict["tool_calls"] = tool_calls + else: + function_call_dict["function_call"] = { + "name": tool_calls[0]["function"]["name"], + "arguments": tool_calls[0]["function"]["arguments"], + } + completion["usage"]["completion_tokens"] = completion_tokens + return llama_types.CreateChatCompletionResponse( + id="chat" + completion["id"], + object="chat.completion", + created=completion["created"], + model=completion["model"], + choices=[ + { + "index": 0, + "logprobs": completion["choices"][0]["logprobs"], + "message": { + "role": "assistant", + "content": None if content == "" else content, + **function_call_dict, + }, + "finish_reason": "tool_calls" if len(tool_calls) > 0 else "stop", + } + ], + usage=completion["usage"], + ) + + +class Llava15ChatHandler: + DEFAULT_SYSTEM_MESSAGE: Optional[str] = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." + + CHAT_FORMAT = ( + "{% for message in messages %}" + "{% if message.role == 'system' %}" + "{{ message.content }}" + "{% endif %}" + "{% if message.role == 'user' %}" + "{% if message.content is string %}" + "\nUSER: {{ message.content }}" + "{% endif %}" + "{% if message.content is iterable %}" + "\nUSER: " + + "{% for content in message.content %}" + "{% if content.type == 'image_url' and content.image_url is string %}" + "{{ content.image_url }}" + "{% endif %}" + "{% if content.type == 'image_url' and content.image_url is mapping %}" + "{{ content.image_url.url }}" + "{% endif %}" + "{% endfor %}" + + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "{{ content.text }}" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + "{% endif %}" + "{% if message.role == 'assistant' and message.content is not none %}" + "\nASSISTANT: {{ message.content }}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt %}" + "\nASSISTANT: " + "{% endif %}" + ) + + def __init__(self, clip_model_path: str, verbose: bool = True): + import llama_cpp.llava_cpp as llava_cpp + + self.clip_model_path = clip_model_path + self.verbose = verbose + + self._llava_cpp = llava_cpp # TODO: Fix + self._exit_stack = ExitStack() + self._last_image_embed: Optional[llava_cpp.CtypesPointer[llava_cpp.llava_image_embed]] = None + self._last_image_hash: Optional[int] = None + + if not os.path.exists(clip_model_path): + raise ValueError(f"Clip model path does not exist: {clip_model_path}") + + with suppress_stdout_stderr(disable=self.verbose): + clip_ctx = self._llava_cpp.clip_model_load( + self.clip_model_path.encode(), 0 + ) + + if clip_ctx is None: + raise ValueError(f"Failed to load clip model: {clip_model_path}") + + self.clip_ctx = clip_ctx + + def clip_free(): + with suppress_stdout_stderr(disable=self.verbose): + self._llava_cpp.clip_free(self.clip_ctx) + + self._exit_stack.callback(clip_free) + + def last_image_embed_free(): + with suppress_stdout_stderr(disable=self.verbose): + if self._last_image_embed is not None: + self._llava_cpp.llava_image_embed_free(self._last_image_embed) + self._last_image_embed = None + + self._exit_stack.callback(last_image_embed_free) + + def load_image(self, image_url: str) -> bytes: + return self._load_image(image_url) + + def __call__( + self, + *, + llama: llama.Llama, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + seed: Optional[int] = None, + response_format: Optional[ + llama_types.ChatCompletionRequestResponseFormat + ] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + logit_bias: Optional[Dict[str, float]] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + **kwargs, # type: ignore + ) -> Union[ + llama_types.CreateChatCompletionResponse, + Iterator[llama_types.CreateChatCompletionStreamResponse], + ]: + assert self.clip_ctx is not None + + system_prompt = _get_system_message(messages) + if system_prompt == "" and self.DEFAULT_SYSTEM_MESSAGE is not None: + messages = [llama_types.ChatCompletionRequestSystemMessage(role="system", content=self.DEFAULT_SYSTEM_MESSAGE)] + messages + + image_urls = self.get_image_urls(messages) + template = jinja2.Template(self.CHAT_FORMAT) + text = template.render(messages=messages, add_generation_prompt=True) + split_text = self.split_text_on_image_urls(text, image_urls) + + def embed_image_bytes(image_bytes: bytes): + if self._last_image_embed is not None and self._last_image_hash is not None and hash(image_bytes) == self._last_image_hash: + return self._last_image_embed + with suppress_stdout_stderr(disable=self.verbose): + embed = ( + self._llava_cpp.llava_image_embed_make_with_bytes( + self.clip_ctx, + llama.context_params.n_threads_batch, + (ctypes.c_uint8 * len(image_bytes)).from_buffer(bytearray(image_bytes)), + len(image_bytes), + ) + ) + self._last_image_embed = embed + self._last_image_hash = hash(image_bytes) + return embed + + # Evaluate prompt + llama.reset() + for i, (type_, value) in enumerate(split_text): + if type_ == "text": + tokens = llama.tokenize(value.encode("utf8"), add_bos=i == 0) + if llama.n_tokens + len(tokens) > llama.n_ctx(): + raise ValueError("Prompt exceeds n_ctx") # TODO: Fix + llama.eval(tokens) + else: + image_bytes = self.load_image(value) + embed = embed_image_bytes(image_bytes) + if llama.n_tokens + embed.contents.n_image_pos > llama.n_ctx(): + raise ValueError("Prompt exceeds n_ctx") # TODO: Fix + n_past = ctypes.c_int(llama.n_tokens) + n_past_p = ctypes.pointer(n_past) + with suppress_stdout_stderr(disable=self.verbose): + self._llava_cpp.llava_eval_image_embed( + llama.ctx, + embed, + llama.n_batch, + n_past_p, + ) + llama.n_tokens = n_past.value + + # Get prompt tokens to avoid a cache miss + prompt = llama.input_ids[: llama.n_tokens].tolist() + + if response_format is not None and response_format["type"] == "json_object": + grammar = _grammar_for_response_format(response_format) + + # Convert legacy functions to tools + if functions is not None: + tools = [ + { + "type": "function", + "function": function, + } + for function in functions + ] + + # Convert legacy function_call to tool_choice + if function_call is not None: + if isinstance(function_call, str) and ( + function_call == "none" or function_call == "auto" + ): + tool_choice = function_call + if isinstance(function_call, dict) and "name" in function_call: + tool_choice = { + "type": "function", + "function": { + "name": function_call["name"], + }, + } + + tool = None + if tool_choice is not None and isinstance(tool_choice, dict) and tools is not None: + name = tool_choice["function"]["name"] + tool = next((t for t in tools if t["function"]["name"] == name), None) + if tool is None: + raise ValueError(f"Tool choice '{name}' not found in tools.") + schema = tool["function"]["parameters"] + try: + # create grammar from json schema + grammar = llama_grammar.LlamaGrammar.from_json_schema( + json.dumps(schema), verbose=llama.verbose + ) + except Exception as e: + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + + completion_or_chunks = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + logprobs=top_logprobs if logprobs else None, + stream=stream, + stop=stop, + seed=seed, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + logit_bias=logit_bias, + ) + if tool is not None: + tool_name = tool["function"]["name"] + return _convert_completion_to_chat_function( + tool_name, completion_or_chunks, stream + ) + return _convert_completion_to_chat(completion_or_chunks, stream=stream) + + @staticmethod + def _load_image(image_url: str) -> bytes: + # TODO: Add Pillow support for other image formats beyond (jpg, png) + if image_url.startswith("data:"): + import base64 + + image_bytes = base64.b64decode(image_url.split(",")[1]) + return image_bytes + else: + import urllib.request + + with urllib.request.urlopen(image_url) as f: + image_bytes = f.read() + return image_bytes + + @staticmethod + def get_image_urls(messages: List[llama_types.ChatCompletionRequestMessage]): + image_urls: List[str] = [] + for message in messages: + if message["role"] == "user": + if message["content"] is None: + continue + for content in message["content"]: + if isinstance(content, dict) and "type" in content: + if content["type"] == "image_url": + if ( + isinstance(content["image_url"], dict) + and "url" in content["image_url"] + ): + image_urls.append(content["image_url"]["url"]) + else: + image_urls.append(content["image_url"]) + return image_urls + + @staticmethod + def split_text_on_image_urls(text: str, image_urls: List[str]): + def find_first(s: str, substrs: List[str]): + for i, substr in enumerate(substrs): + pos = s.find(substr) + if pos != -1: + return pos, i + return None, None + + split_text: List[Tuple[Literal["text", "image_url"], str]] = [] + remaining = text + while remaining: + # Find first image_url + pos, i = find_first(remaining, image_urls) + if pos is not None and i is not None: + if pos > 0: + split_text.append(("text", remaining[:pos])) + split_text.append(("image_url", image_urls[i])) + remaining = remaining[pos + len(image_urls[i]) :] + else: + split_text.append(("text", remaining)) + remaining = "" + return split_text + + @classmethod + def from_pretrained( + cls, + repo_id: str, + filename: Optional[str], + local_dir: Optional[Union[str, os.PathLike[str]]] = None, + local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto", + cache_dir: Optional[Union[str, os.PathLike[str]]] = None, + **kwargs: Any, + ) -> "Llava15ChatHandler": + import fnmatch + from pathlib import Path + try: + from huggingface_hub import hf_hub_download, HfFileSystem # type: ignore + from huggingface_hub.utils import validate_repo_id # type: ignore + except ImportError: + raise ImportError( + "Llama.from_pretrained requires the huggingface-hub package. " + "You can install it with `pip install huggingface-hub`." + ) + + validate_repo_id(repo_id) + + hffs = HfFileSystem() + + files = [ + file["name"] if isinstance(file, dict) else file + for file in hffs.ls(repo_id) # type: ignore + ] + + # split each file into repo_id, subfolder, filename + file_list: List[str] = [] + for file in files: + rel_path = Path(file).relative_to(repo_id) + file_list.append(str(rel_path)) + + matching_files = [file for file in file_list if fnmatch.fnmatch(file, filename)] # type: ignore + + if len(matching_files) == 0: + raise ValueError( + f"No file found in {repo_id} that match {filename}\n\n" + f"Available Files:\n{json.dumps(file_list)}" + ) + + if len(matching_files) > 1: + raise ValueError( + f"Multiple files found in {repo_id} matching {filename}\n\n" + f"Available Files:\n{json.dumps(files)}" + ) + + (matching_file,) = matching_files + + subfolder = str(Path(matching_file).parent) + filename = Path(matching_file).name + + # download the file + hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + local_dir=cast(Union[str, Path, None], local_dir), + local_dir_use_symlinks=local_dir_use_symlinks, + cache_dir=cast(Union[str, Path, None], cache_dir), + ) + + if local_dir is None: + model_path = hf_hub_download( + repo_id=repo_id, + filename=filename, + subfolder=subfolder, + local_dir=local_dir, + local_dir_use_symlinks=local_dir_use_symlinks, + cache_dir=cast(Union[str, Path, None], cache_dir), + local_files_only=True, + ) + else: + model_path = os.path.join(local_dir, filename) + + return cls( + clip_model_path=model_path, + **kwargs, + ) + +class ObsidianChatHandler(Llava15ChatHandler): + # Prompt Format + # The model followed ChatML format. However, with ### as the seperator + + # <|im_start|>user + # What is this sign about?\n + # ### + # <|im_start|>assistant + # The sign is about bullying, and it is placed on a black background with a red background. + # ### + + CHAT_FORMAT = ( + "{% for message in messages %}" + # System message + "{% if message.role == 'system' %}" + "<|im_start|>system\n" + "{{ message.content }}\n" + "###\n" + "{% endif %}" + # User message + "{% if message.role == 'user' %}" + "<|im_start|>user\n" + "{% if message.content is string %}" + "{{ message.content }}" + "{% endif %}" + "{% if message.content is iterable %}" + + "{% for content in message.content %}" + "{% if content.type == 'image_url' and content.image_url is string %}" + "{{ content.image_url }}" + "{% endif %}" + "{% if content.type == 'image_url' and content.image_url is mapping %}" + "{{ content.image_url.url }}" + "{% endif %}" + "{% endfor %}" + + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "{{ content.text }}" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + "###\n" + "{% endif %}" + # Assistant message + "{% if message.role == 'assistant' %}" + "<|im_start|>assistant\n" + "{{ message.content }}" + "###\n" + "{% endif %}" + "{% endfor %}" + # Generation prompt + "{% if add_generation_prompt %}" + "<|im_start|>assistant\n" + "{% endif %}" + ) + +class MoondreamChatHandler(Llava15ChatHandler): + # Chat Format: + # f"\n\n{chat_history}Question: {question}\n\nAnswer:" + CHAT_FORMAT = ( + "{% for message in messages %}" + "{% if message.role == 'user' %}" + "{% if message.content is iterable %}" + + # + "{% for content in message.content %}" + "{% if content.type == 'image_url' %}" + "{% if content.image_url is string %}" + "{{ content.image_url }}\n\n" + "{% endif %}" + "{% if content.image_url is mapping %}" + "{{ content.image_url.url }}\n\n" + "{% endif %}" + "{% endif %}" + "{% endfor %}" + + # Question: + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "Question: {{ content.text }}\n\n" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + + # Question: + "{% if message.content is string %}" + "Question: {{ message.content }}\n\n" + "{% endif %}" + + "{% endif %}" + + # Answer: + "{% if message.role == 'assistant' %}" + "Answer:{{ message.content }}\n\n" + "{% endif %}" + "{% endfor %}" + + # Generation prompt + "{% if add_generation_prompt %}" + "Answer:" + "{% endif %}" + ) + +class Llava16ChatHandler(Llava15ChatHandler): + DEFAULT_SYSTEM_MESSAGE = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. " + + # Example prompt + # "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: \nWhat is shown in this image? ASSISTANT:" + + CHAT_FORMAT = ( + "{% for message in messages %}" + "{% if message.role == 'system' %}" + "{{ message.content }}" + "{% endif %}" + "{% if message.role == 'user' %}" + "{% if message.content is iterable %}" + + # + "{% for content in message.content %}" + "{% if content.type == 'image_url' %}" + "{% if content.image_url is string %}" + "{{ content.image_url }}\n" + "{% endif %}" + "{% if content.image_url is mapping %}" + "{{ content.image_url.url }}\n" + "{% endif %}" + "{% endif %}" + "{% endfor %}" + + # Question: + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "{{ content.text }}" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + + # Question: + "{% if message.content is string %}" + "{{ message.content }}" + "{% endif %}" + + "{% endif %}" + + # Answer: + "{% if message.role == 'assistant' %}" + "{{ message.content }}" + "{% endif %}" + "{% endfor %}" + + # Generation prompt + "{% if add_generation_prompt %}" + "Answer:" + "{% endif %}" + ) + +class NanoLlavaChatHandler(Llava15ChatHandler): + # Prompt Format + # The model follow the ChatML standard, however, without \n at the end of <|im_end|>: + + # <|im_start|>system + # Answer the question<|im_end|><|im_start|>user + # + # What is the picture about?<|im_end|><|im_start|>assistant + + CHAT_FORMAT = ( + "{% for message in messages %}" + # System message + "{% if message.role == 'system' %}" + "<|im_start|>system\n" + "{{ message.content }}" + "<|im_end|>" + "{% endif %}" + # User message + "{% if message.role == 'user' %}" + "<|im_start|>user\n" + "{% if message.content is string %}" + "{{ message.content }}" + "{% endif %}" + "{% if message.content is iterable %}" + + "{% for content in message.content %}" + "{% if content.type == 'image_url' and content.image_url is string %}" + "{{ content.image_url }}" + "{% endif %}" + "{% if content.type == 'image_url' and content.image_url is mapping %}" + "{{ content.image_url.url }}" + "{% endif %}" + "{% endfor %}" + + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "{{ content.text }}" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + "<|im_end|>" + "{% endif %}" + # Assistant message + "{% if message.role == 'assistant' %}" + "<|im_start|>assistant\n" + "{{ message.content }}" + "<|im_end|>" + "{% endif %}" + "{% endfor %}" + # Generation prompt + "{% if add_generation_prompt %}" + "<|im_start|>assistant\n" + "{% endif %}" + ) + +class Llama3VisionAlpha(Llava15ChatHandler): + # question = "" + q + + # prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" + DEFAULT_SYSTEM_MESSAGE = None + + CHAT_FORMAT = ( + "{% for message in messages %}" + + "<|start_header_id|>" + + "{% if message.role == 'user' %}" + + "user<|end_header_id|>\n\n" + + "{% if message.content is iterable %}" + + # + "{% for content in message.content %}" + "{% if content.type == 'image_url' %}" + "{% if content.image_url is string %}" + "{{ content.image_url }}" + "{% endif %}" + "{% if content.image_url is mapping %}" + "{{ content.image_url.url }}" + "{% endif %}" + "{% endif %}" + "{% endfor %}" + + # Question: + "{% for content in message.content %}" + "{% if content.type == 'text' %}" + "{{ content.text }}" + "{% endif %}" + "{% endfor %}" + + "{% endif %}" + + # Question: + "{% if message.content is string %}" + "{{ message.content }}" + "{% endif %}" + + "{% endif %}" + + # Answer: + "{% if message.role == 'assistant' %}" + "assistant<|end_header_id|>\n\n" + "{{ message.content }}" + "{% endif %}" + + "<|eot_id|>" + + "{% endfor %}" + + # Generation prompt + "{% if add_generation_prompt %}" + "<|start_header_id|>assistant<|end_header_id|>\n\n" + "{% endif %}" + ) + +@register_chat_completion_handler("chatml-function-calling") +def chatml_function_calling( + llama: llama.Llama, + messages: List[llama_types.ChatCompletionRequestMessage], + functions: Optional[List[llama_types.ChatCompletionFunction]] = None, + function_call: Optional[llama_types.ChatCompletionRequestFunctionCall] = None, + tools: Optional[List[llama_types.ChatCompletionTool]] = None, + tool_choice: Optional[llama_types.ChatCompletionToolChoiceOption] = None, + temperature: float = 0.2, + top_p: float = 0.95, + top_k: int = 40, + min_p: float = 0.05, + typical_p: float = 1.0, + stream: bool = False, + stop: Optional[Union[str, List[str]]] = [], + response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None, + max_tokens: Optional[int] = None, + presence_penalty: float = 0.0, + frequency_penalty: float = 0.0, + repeat_penalty: float = 1.1, + tfs_z: float = 1.0, + mirostat_mode: int = 0, + mirostat_tau: float = 5.0, + mirostat_eta: float = 0.1, + model: Optional[str] = None, + logits_processor: Optional[llama.LogitsProcessorList] = None, + grammar: Optional[llama.LlamaGrammar] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + **kwargs, # type: ignore +) -> Union[ + llama_types.CreateChatCompletionResponse, + Iterator[llama_types.CreateChatCompletionStreamResponse], +]: + print(logprobs) + function_calling_template = ( + "{% for message in messages %}" + "<|im_start|>{{ message.role }}\n" + # System message + "{% if message.role == 'system' %}" + "{{ message.content }}" + "{% if tool_calls %}" + "\n\nYou have access to the following functions:\n" + "{% for tool in tools %}" + "\nfunctions.{{ tool.function.name }}:\n" + "{{ tool.function.parameters | tojson }}" + "\n{% endfor %}" + "\n\nYou can respond to users messages with either a single message or one or more function calls." + "\n\nTo respond with a message begin the message with 'message:', use the following format:" + "\n\nmessage:" + "\n" + "\n\nTo respond with one or more function calls begin the message with 'functions.:', use the following format:" + "\n\nfunctions.:" + '\n{ "arg1": "value1", "arg2": "value2" }' + "\nfunctions.:" + '\n{ "arg1": "value1", "arg2": "value2" }' + "{% endif %}" + "<|im_end|>\n" + "{% endif %}" + # User message + "{% if message.role == 'user' %}" + "{{ message.content }}" + "<|im_end|>\n" + "{% endif %}" + # Assistant message + "{% if message.role == 'assistant' %}" + ## Reglar message + "{% if message.content and message.content | length > 0 %}" + "{% if tool_calls %}" + "message:\n" + "{% endif %}" + "{{ message.content }}" + "<|im_end|>\n" + "{% endif %}" + ## Function calls + "{% if 'tool_calls' in message %}" + "{% for tool_call in message.tool_calls %}" + "functions.{{ tool_call.function.name }}:\n" + "{{ tool_call.function.arguments }}" + "{% endfor %}" + "<|im_end|>\n" + "{% endif %}" + "{% endif %}" + "{% endfor %}" + "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}" + ) + template_renderer = jinja2.Environment( + loader=jinja2.BaseLoader(), + autoescape=jinja2.select_autoescape(["html", "xml"]), + undefined=jinja2.StrictUndefined, + ).from_string(function_calling_template) + + # Convert legacy functions to tools + if functions is not None: + tools = [ + { + "type": "function", + "function": function, + } + for function in functions + ] + + # Convert legacy function_call to tool_choice + if function_call is not None: + if isinstance(function_call, str) and ( + function_call == "none" or function_call == "auto" + ): + tool_choice = function_call + if isinstance(function_call, dict) and "name" in function_call: + tool_choice = { + "type": "function", + "function": { + "name": function_call["name"], + }, + } + + stop = [stop, "<|im_end|>"] if isinstance(stop, str) else stop + ["<|im_end|>"] if stop else ["<|im_end|>"] + + # Case 1: No tool choice by user + if ( + tool_choice is None + or (isinstance(tool_choice, str) and tool_choice == "none") + or tools is None + or len(tools) == 0 + ): + prompt = template_renderer.render( + messages=messages, + tools=[], + tool_calls=None, + add_generation_prompt=True, + ) + + if response_format is not None and response_format["type"] == "json_object": + grammar = _grammar_for_response_format(response_format) + + return _convert_completion_to_chat( + llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=stop, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + logprobs=top_logprobs if logprobs else None, + ), + stream=stream, + ) + + # Case 2: Tool choice by user + if isinstance(tool_choice, dict): + tool_name = tool_choice["function"]["name"] + tool = next( + (tool for tool in tools if tool["function"]["name"] == tool_name), None + ) + if tool is None: + raise ValueError(f"Tool with name '{tool_name}' not found in tools") + prompt = template_renderer.render( + messages=messages, + tools=tools, + tool_calls=True, + add_generation_prompt=True, + ) + prompt += f"functions.{tool_name}:\n" + try: + grammar = llama_grammar.LlamaGrammar.from_json_schema( + json.dumps(tool["function"]["parameters"]), verbose=llama.verbose + ) + except Exception as e: + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + if llama.verbose: + print( + "Failed to parse function body as JSON schema, falling back to default grammar" + ) + print(e) + completion_or_chunks = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=stop, + max_tokens=max_tokens, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + ) + return _convert_completion_to_chat_function( + tool_name, completion_or_chunks, stream + ) + + # Case 3: Automatic tool choice + assert isinstance(tool_choice, str) and tool_choice == "auto" + function_names = " | ".join( + [f'''"functions.{tool['function']['name']}:"''' for tool in tools] + ) + initial_gbnf_tool_grammar = ( + """root ::= functions | "message:"\n""" + f"""functions ::= {function_names}\n""" + ) + follow_up_gbnf_tool_grammar = ( + """root ::= functions | "<|im_end|>"\n""" + f"""functions ::= {function_names}\n""" + ) + prompt = template_renderer.render( + messages=messages, + tools=tools, + tool_calls=True, + add_generation_prompt=True, + ) + completion_or_chunks = llama.create_completion( + prompt=prompt, + temperature=0, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=False, + stop=[":"], + max_tokens=None, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=llama_grammar.LlamaGrammar.from_string( + initial_gbnf_tool_grammar, verbose=llama.verbose + ), + ) + completion: llama_types.CreateCompletionResponse = completion_or_chunks # type: ignore + text = completion["choices"][0]["text"] + if "message" in text: + return _convert_completion_to_chat( + llama.create_completion( + prompt=prompt + "message:\n", + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=stream, + stop=["<|im_end|>"], + logprobs=top_logprobs if logprobs else None, + max_tokens=None, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=llama_grammar.LlamaGrammar.from_string( + follow_up_gbnf_tool_grammar, verbose=llama.verbose + ), + ), + stream=stream, + ) + + # One or more function calls + tool_name = text[len("functions.") :] + tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None) + if not stream: + completions: List[llama_types.CreateCompletionResponse] = [] + completions_tool_name: List[str] = [] + while tool is not None: + prompt += f"functions.{tool_name}:\n" + try: + grammar = llama_grammar.LlamaGrammar.from_json_schema( + json.dumps(tool["function"]["parameters"]), verbose=llama.verbose + ) + except Exception as e: + grammar = llama_grammar.LlamaGrammar.from_string( + llama_grammar.JSON_GBNF, verbose=llama.verbose + ) + if llama.verbose: + print( + "Failed to parse function body as JSON schema, falling back to default grammar" + ) + print(e) + completion_or_chunks = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=False, + stop=stop, + max_tokens=None, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=grammar, + ) + completion_or_chunks = cast(llama_types.CreateCompletionResponse, completion_or_chunks) + completions.append(completion_or_chunks) + completions_tool_name.append(tool_name) + prompt += completion_or_chunks["choices"][0]["text"] + prompt += "\n" + + response = llama.create_completion( + prompt=prompt, + temperature=temperature, + top_p=top_p, + top_k=top_k, + min_p=min_p, + typical_p=typical_p, + stream=False, + stop=stop, + max_tokens=None, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + repeat_penalty=repeat_penalty, + tfs_z=tfs_z, + mirostat_mode=mirostat_mode, + mirostat_tau=mirostat_tau, + mirostat_eta=mirostat_eta, + model=model, + logits_processor=logits_processor, + grammar=llama_grammar.LlamaGrammar.from_string( + follow_up_gbnf_tool_grammar, verbose=llama.verbose + ), + ) + response = cast(llama_types.CreateCompletionResponse, response) + + tool_name = response["choices"][0]["text"][len("functions.") :] + tool = next( + (tool for tool in tools if tool["function"]["name"] == tool_name), None + ) + + # Merge completions + function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = { + "function_call": { + "name": tool_name, + "arguments": completions[0]["choices"][0]["text"], + } + } if len(completions) == 1 else {} + return { + "id": "chat" + completion["id"], + "object": "chat.completion", + "created": completion["created"], + "model": completion["model"], + "choices": [ + { + "finish_reason": "tool_calls", + "index": 0, + "logprobs": completion["choices"][0]["logprobs"], + "message": { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_" + + f"_{i}_" + + tool_name + + "_" + + completion["id"], + "type": "function", + "function": { + "name": tool_name, + "arguments": completion["choices"][0]["text"], + }, + } + for i, (tool_name, completion) in enumerate( + zip(completions_tool_name, completions) + ) + ], + **function_call_dict + }, + } + ], + "usage": { + "completion_tokens": sum( + completion["usage"]["completion_tokens"] if "usage" in completion else 0 + for completion in completions + ), + "prompt_tokens": sum( + completion["usage"]["prompt_tokens"] if "usage" in completion else 0 + for completion in completions + ), + "total_tokens": sum( + completion["usage"]["total_tokens"] if "usage" in completion else 0 + for completion in completions + ), + }, + } + + raise ValueError("Automatic streaming tool choice is not supported") \ No newline at end of file diff --git a/llama-cpp-python/llama_cpp/llama_cpp.py b/llama-cpp-python/llama_cpp/llama_cpp.py new file mode 100644 index 0000000000000000000000000000000000000000..46aa51662f650f3f841269604bc37a97aea22c7f --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_cpp.py @@ -0,0 +1,3367 @@ +from __future__ import annotations + +import sys +import os +import ctypes +import functools +import pathlib + +from typing import ( + Any, + Callable, + List, + Union, + NewType, + Optional, + TYPE_CHECKING, + TypeVar, + Generic, +) +from typing_extensions import TypeAlias + + +# Load the library +def _load_shared_library(lib_base_name: str): + # Construct the paths to the possible shared library names + _base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) + # Searching for the library in the current directory under the name "libllama" (default name + # for llamacpp) and "llama" (default name for this repo) + _lib_paths: List[pathlib.Path] = [] + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + _lib_paths += [ + _base_path / f"lib{lib_base_name}.so", + ] + elif sys.platform == "darwin": + _lib_paths += [ + _base_path / f"lib{lib_base_name}.so", + _base_path / f"lib{lib_base_name}.dylib", + ] + elif sys.platform == "win32": + _lib_paths += [ + _base_path / f"{lib_base_name}.dll", + _base_path / f"lib{lib_base_name}.dll", + ] + else: + raise RuntimeError("Unsupported platform") + + if "LLAMA_CPP_LIB" in os.environ: + lib_base_name = os.environ["LLAMA_CPP_LIB"] + _lib = pathlib.Path(lib_base_name) + _base_path = _lib.parent.resolve() + _lib_paths = [_lib.resolve()] + + cdll_args = dict() # type: ignore + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + if "CUDA_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib")) + if "HIP_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin")) + os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib")) + cdll_args["winmode"] = ctypes.RTLD_GLOBAL + + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore + except Exception as e: + raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") + + raise FileNotFoundError( + f"Shared library with base name '{lib_base_name}' not found" + ) + + +# Specify the base name of the shared library to load +_lib_base_name = "llama" + +# Load the library +_lib = _load_shared_library(_lib_base_name) + + +# ctypes sane type hint helpers +# +# - Generic Pointer and Array types +# - PointerOrRef type with a type hinted byref function +# +# NOTE: Only use these for static type checking not for runtime checks +# no good will come of that + +if TYPE_CHECKING: + CtypesCData = TypeVar("CtypesCData", bound=ctypes._CData) # type: ignore + + CtypesArray: TypeAlias = ctypes.Array[CtypesCData] # type: ignore + + CtypesPointer: TypeAlias = ctypes._Pointer[CtypesCData] # type: ignore + + CtypesVoidPointer: TypeAlias = ctypes.c_void_p + + class CtypesRef(Generic[CtypesCData]): + pass + + CtypesPointerOrRef: TypeAlias = Union[ + CtypesPointer[CtypesCData], CtypesRef[CtypesCData] + ] + + CtypesFuncPointer: TypeAlias = ctypes._FuncPointer # type: ignore + +F = TypeVar("F", bound=Callable[..., Any]) + + +def ctypes_function_for_shared_library(lib: ctypes.CDLL): + def ctypes_function( + name: str, argtypes: List[Any], restype: Any, enabled: bool = True + ): + def decorator(f: F) -> F: + if enabled: + func = getattr(lib, name) + func.argtypes = argtypes + func.restype = restype + functools.wraps(f)(func) + return func + else: + return f + + return decorator + + return ctypes_function + + +ctypes_function = ctypes_function_for_shared_library(_lib) + + +def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCData]: + """Type-annotated version of ctypes.byref""" + ... + + +byref = ctypes.byref # type: ignore + +# from ggml.h +# // NOTE: always add types at the end of the enum to keep backward compatibility +# enum ggml_type { +# GGML_TYPE_F32 = 0, +# GGML_TYPE_F16 = 1, +# GGML_TYPE_Q4_0 = 2, +# GGML_TYPE_Q4_1 = 3, +# // GGML_TYPE_Q4_2 = 4, support has been removed +# // GGML_TYPE_Q4_3 = 5, support has been removed +# GGML_TYPE_Q5_0 = 6, +# GGML_TYPE_Q5_1 = 7, +# GGML_TYPE_Q8_0 = 8, +# GGML_TYPE_Q8_1 = 9, +# GGML_TYPE_Q2_K = 10, +# GGML_TYPE_Q3_K = 11, +# GGML_TYPE_Q4_K = 12, +# GGML_TYPE_Q5_K = 13, +# GGML_TYPE_Q6_K = 14, +# GGML_TYPE_Q8_K = 15, +# GGML_TYPE_IQ2_XXS = 16, +# GGML_TYPE_IQ2_XS = 17, +# GGML_TYPE_IQ3_XXS = 18, +# GGML_TYPE_IQ1_S = 19, +# GGML_TYPE_IQ4_NL = 20, +# GGML_TYPE_IQ3_S = 21, +# GGML_TYPE_IQ2_S = 22, +# GGML_TYPE_IQ4_XS = 23, +# GGML_TYPE_I8 = 24, +# GGML_TYPE_I16 = 25, +# GGML_TYPE_I32 = 26, +# GGML_TYPE_I64 = 27, +# GGML_TYPE_F64 = 28, +# GGML_TYPE_IQ1_M = 29, +# GGML_TYPE_COUNT, +# }; +GGML_TYPE_F32 = 0 +GGML_TYPE_F16 = 1 +GGML_TYPE_Q4_0 = 2 +GGML_TYPE_Q4_1 = 3 +GGML_TYPE_Q5_0 = 6 +GGML_TYPE_Q5_1 = 7 +GGML_TYPE_Q8_0 = 8 +GGML_TYPE_Q8_1 = 9 +GGML_TYPE_Q2_K = 10 +GGML_TYPE_Q3_K = 11 +GGML_TYPE_Q4_K = 12 +GGML_TYPE_Q5_K = 13 +GGML_TYPE_Q6_K = 14 +GGML_TYPE_Q8_K = 15 +GGML_TYPE_IQ2_XXS = 16 +GGML_TYPE_IQ2_XS = 17 +GGML_TYPE_IQ3_XXS = 18 +GGML_TYPE_IQ1_S = 19 +GGML_TYPE_IQ4_NL = 20 +GGML_TYPE_IQ3_S = 21 +GGML_TYPE_IQ2_S = 22 +GGML_TYPE_IQ4_XS = 23 +GGML_TYPE_I8 = 24 +GGML_TYPE_I16 = 25 +GGML_TYPE_I32 = 26 +GGML_TYPE_I64 = 27 +GGML_TYPE_F64 = 28 +GGML_TYPE_IQ1_M = 29 +GGML_TYPE_COUNT = 30 + +# from ggml-backend.h +# typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); +ggml_backend_sched_eval_callback = ctypes.CFUNCTYPE( + ctypes.c_bool, ctypes.c_void_p, ctypes.c_bool, ctypes.c_void_p +) + +# // Abort callback +# // If not NULL, called before ggml computation +# // If it returns true, the computation is aborted +# typedef bool (*ggml_abort_callback)(void * data); +ggml_abort_callback = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p) + +# llama.h bindings + +_lib.llama_max_devices.argtypes = [] +_lib.llama_max_devices.restype = ctypes.c_size_t + +LLAMA_MAX_DEVICES = _lib.llama_max_devices() + +# define LLAMA_DEFAULT_SEED 0xFFFFFFFF +LLAMA_DEFAULT_SEED = 0xFFFFFFFF + +# define LLAMA_MAX_RNG_STATE (64*1024) +LLAMA_MAX_RNG_STATE = 64 * 1024 + +# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla' +LLAMA_FILE_MAGIC_GGLA = 0x67676C61 + +# define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn' +LLAMA_FILE_MAGIC_GGSN = 0x6767736E + +# define LLAMA_FILE_MAGIC_GGSQ 0x67677371u // 'ggsq' +LLAMA_FILE_MAGIC_GGSQ = 0x67677371 + +# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN +LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN +# define LLAMA_SESSION_VERSION 6 +LLAMA_SESSION_VERSION = 6 + +# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ +LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ +# define LLAMA_STATE_SEQ_VERSION 1 +LLAMA_STATE_SEQ_VERSION = 1 + +# struct llama_model; +llama_model_p = NewType("llama_model_p", int) +llama_model_p_ctypes = ctypes.c_void_p + +# struct llama_context; +llama_context_p = NewType("llama_context_p", int) +llama_context_p_ctypes = ctypes.c_void_p + + +# typedef int32_t llama_pos; +llama_pos = ctypes.c_int32 +# typedef int32_t llama_token; +llama_token = ctypes.c_int32 +llama_token_p = ctypes.POINTER(llama_token) +# typedef int32_t llama_seq_id; +llama_seq_id = ctypes.c_int32 + + +# enum llama_vocab_type { +# LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab +# LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback +# LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE +# LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece +# }; +LLAMA_VOCAB_TYPE_NONE = 0 +"""For models without vocab""" +LLAMA_VOCAB_TYPE_SPM = 1 +"""LLaMA tokenizer based on byte-level BPE with byte fallback""" +LLAMA_VOCAB_TYPE_BPE = 2 +"""GPT-2 tokenizer based on byte-level BPE""" +LLAMA_VOCAB_TYPE_WPM = 3 +"""BERT tokenizer based on WordPiece""" + + +# // pre-tokenization types +# enum llama_vocab_pre_type { +# LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0, +# LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1, +# LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2, +# LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3, +# LLAMA_VOCAB_PRE_TYPE_FALCON = 4, +# LLAMA_VOCAB_PRE_TYPE_MPT = 5, +# LLAMA_VOCAB_PRE_TYPE_STARCODER = 6, +# LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, +# }; +LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0 +LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1 +LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2 +LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3 +LLAMA_VOCAB_PRE_TYPE_FALCON = 4 +LLAMA_VOCAB_PRE_TYPE_MPT = 5 +LLAMA_VOCAB_PRE_TYPE_STARCODER = 6 +LLAMA_VOCAB_PRE_TYPE_GPT2 = 7 + + +# // note: these values should be synchronized with ggml_rope +# // TODO: maybe move this enum to ggml.h (ggml_rope_type) +# enum llama_rope_type { +# LLAMA_ROPE_TYPE_NONE = -1, +# LLAMA_ROPE_TYPE_NORM = 0, +# LLAMA_ROPE_TYPE_NEOX = 2, +# LLAMA_ROPE_TYPE_GLM = 4, +# }; +LLAMA_ROPE_TYPE_NONE = -1 +LLAMA_ROPE_TYPE_NORM = 0 +LLAMA_ROPE_TYPE_NEOX = 2 +LLAMA_ROPE_TYPE_GLM = 4 + + +# enum llama_token_type { +# LLAMA_TOKEN_TYPE_UNDEFINED = 0, +# LLAMA_TOKEN_TYPE_NORMAL = 1, +# LLAMA_TOKEN_TYPE_UNKNOWN = 2, +# LLAMA_TOKEN_TYPE_CONTROL = 3, +# LLAMA_TOKEN_TYPE_USER_DEFINED = 4, +# LLAMA_TOKEN_TYPE_UNUSED = 5, +# LLAMA_TOKEN_TYPE_BYTE = 6, +# }; +LLAMA_TOKEN_TYPE_UNDEFINED = 0 +LLAMA_TOKEN_TYPE_NORMAL = 1 +LLAMA_TOKEN_TYPE_UNKNOWN = 2 +LLAMA_TOKEN_TYPE_CONTROL = 3 +LLAMA_TOKEN_TYPE_USER_DEFINED = 4 +LLAMA_TOKEN_TYPE_UNUSED = 5 +LLAMA_TOKEN_TYPE_BYTE = 6 + + +# // model file types +# enum llama_ftype { +# LLAMA_FTYPE_ALL_F32 = 0, +# LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 +# // LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // support has been removed +# // LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // support has been removed +# LLAMA_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q3_K_S = 11, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q3_K_M = 12, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q3_K_L = 13, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q4_K_S = 14, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q4_K_M = 15, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q5_K_S = 16, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors + +# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file +# }; +LLAMA_FTYPE_ALL_F32 = 0 +LLAMA_FTYPE_MOSTLY_F16 = 1 +LLAMA_FTYPE_MOSTLY_Q4_0 = 2 +LLAMA_FTYPE_MOSTLY_Q4_1 = 3 +LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4 +LLAMA_FTYPE_MOSTLY_Q8_0 = 7 +LLAMA_FTYPE_MOSTLY_Q5_0 = 8 +LLAMA_FTYPE_MOSTLY_Q5_1 = 9 +LLAMA_FTYPE_MOSTLY_Q2_K = 10 +LLAMA_FTYPE_MOSTLY_Q3_K_S = 11 +LLAMA_FTYPE_MOSTLY_Q3_K_M = 12 +LLAMA_FTYPE_MOSTLY_Q3_K_L = 13 +LLAMA_FTYPE_MOSTLY_Q4_K_S = 14 +LLAMA_FTYPE_MOSTLY_Q4_K_M = 15 +LLAMA_FTYPE_MOSTLY_Q5_K_S = 16 +LLAMA_FTYPE_MOSTLY_Q5_K_M = 17 +LLAMA_FTYPE_MOSTLY_Q6_K = 18 +LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19 +LLAMA_FTYPE_MOSTLY_IQ2_XS = 20 +LLAMA_FTYPE_MOSTLY_Q2_K_S = 21 +LLAMA_FTYPE_MOSTLY_IQ3_XS = 22 +LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23 +LLAMA_FTYPE_MOSTLY_IQ1_S = 24 +LLAMA_FTYPE_MOSTLY_IQ4_NL = 25 +LLAMA_FTYPE_MOSTLY_IQ3_S = 26 +LLAMA_FTYPE_MOSTLY_IQ3_M = 27 +LLAMA_FTYPE_MOSTLY_IQ2_S = 28 +LLAMA_FTYPE_MOSTLY_IQ2_M = 29 +LLAMA_FTYPE_MOSTLY_IQ4_XS = 30 +LLAMA_FTYPE_GUESSED = 1024 + +# enum llama_rope_scaling_type { +# LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED = -1, +# LLAMA_ROPE_SCALING_TYPE_NONE = 0, +# LLAMA_ROPE_SCALING_TYPE_LINEAR = 1, +# LLAMA_ROPE_SCALING_TYPE_YARN = 2, +# LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN, +# }; +LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED = -1 +LLAMA_ROPE_SCALING_TYPE_NONE = 0 +LLAMA_ROPE_SCALING_TYPE_LINEAR = 1 +LLAMA_ROPE_SCALING_TYPE_YARN = 2 +LLAMA_ROPE_SCALING_TYPE_MAX_VALUE = LLAMA_ROPE_SCALING_TYPE_YARN + +# enum llama_pooling_type { +# LLAMA_POOLING_TYPE_UNSPECIFIED = -1, +# LLAMA_POOLING_TYPE_NONE = 0, +# LLAMA_POOLING_TYPE_MEAN = 1, +# LLAMA_POOLING_TYPE_CLS = 2, +# }; +LLAMA_POOLING_TYPE_UNSPECIFIED = -1 +LLAMA_POOLING_TYPE_NONE = 0 +LLAMA_POOLING_TYPE_MEAN = 1 +LLAMA_POOLING_TYPE_CLS = 2 + +# enum llama_split_mode { +# LLAMA_SPLIT_MODE_NONE = 0, // single GPU +# LLAMA_SPLIT_MODE_LAYER = 1, // split layers and KV across GPUs +# LLAMA_SPLIT_MODE_ROW = 2, // split rows across GPUs +# }; +LLAMA_SPLIT_MODE_NONE = 0 +LLAMA_SPLIT_MODE_LAYER = 1 +LLAMA_SPLIT_MODE_ROW = 2 + + +# typedef struct llama_token_data { +# llama_token id; // token id +# float logit; // log-odds of the token +# float p; // probability of the token +# } llama_token_data; +class llama_token_data(ctypes.Structure): + """Used to store token data + + Attributes: + id (llama_token): token id + logit (float): log-odds of the token + p (float): probability of the token""" + + if TYPE_CHECKING: + id: llama_token + logit: float + p: float + + _fields_ = [ + ("id", llama_token), + ("logit", ctypes.c_float), + ("p", ctypes.c_float), + ] + + +llama_token_data_p = ctypes.POINTER(llama_token_data) + + +# typedef struct llama_token_data_array { +# llama_token_data * data; +# size_t size; +# bool sorted; +# } llama_token_data_array; +class llama_token_data_array(ctypes.Structure): + """Used to sample tokens given logits + + Attributes: + data (ctypes.Array[llama_token_data]): token data + size (int): size of the array + sorted (bool): whether the array is sorted""" + + if TYPE_CHECKING: + data: CtypesArray[llama_token_data] + size: int + sorted: bool + + _fields_ = [ + ("data", llama_token_data_p), + ("size", ctypes.c_size_t), + ("sorted", ctypes.c_bool), + ] + + +llama_token_data_array_p = ctypes.POINTER(llama_token_data_array) + +# typedef bool (*llama_progress_callback)(float progress, void *ctx); +llama_progress_callback = ctypes.CFUNCTYPE( + ctypes.c_bool, ctypes.c_float, ctypes.c_void_p +) + + +# // Input data for llama_decode +# // A llama_batch object can contain input about one or many sequences +# // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens +# // +# // - token : the token ids of the input (used when embd is NULL) +# // - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL) +# // - pos : the positions of the respective token in the sequence +# // - seq_id : the sequence to which the respective token belongs +# // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output +# // +# typedef struct llama_batch { +# int32_t n_tokens; + +# llama_token * token; +# float * embd; +# llama_pos * pos; +# int32_t * n_seq_id; +# llama_seq_id ** seq_id; +# int8_t * logits; // TODO: rename this to "output" + + +# // NOTE: helpers for smooth API transition - can be deprecated in the future +# // for future-proof code, use the above fields instead and ignore everything below +# // +# // pos[i] = all_pos_0 + i*all_pos_1 +# // +# llama_pos all_pos_0; // used if pos == NULL +# llama_pos all_pos_1; // used if pos == NULL +# llama_seq_id all_seq_id; // used if seq_id == NULL +# } llama_batch; +class llama_batch(ctypes.Structure): + """Input data for llama_decode + + A llama_batch object can contain input about one or many sequences + + The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens + + Attributes: + n_tokens (int): number of tokens + token (ctypes.Array[llama_token]): the token ids of the input (used when embd is NULL) + embd (ctypes.Array[ctypes.ctypes.c_float]): token embeddings (i.e. float vector of size n_embd) (used when token is NULL) + pos (ctypes.Array[ctypes.Array[llama_pos]]): the positions of the respective token in the sequence + seq_id (ctypes.Array[ctypes.Array[llama_seq_id]]): the sequence to which the respective token belongs + logits (ctypes.Array[ctypes.ctypes.c_int8]): if zero, the logits for the respective token will not be output + """ + + if TYPE_CHECKING: + n_tokens: int + token: CtypesArray[llama_token] + embd: CtypesArray[ctypes.c_float] + pos: CtypesArray[CtypesArray[llama_pos]] + n_seq_id: CtypesArray[ctypes.c_int] + seq_id: CtypesArray[CtypesArray[llama_seq_id]] + logits: CtypesArray[ctypes.c_int8] + + _fields_ = [ + ("n_tokens", ctypes.c_int32), + ("token", ctypes.POINTER(llama_token)), + ("embd", ctypes.POINTER(ctypes.c_float)), + ("pos", ctypes.POINTER(llama_pos)), + ("n_seq_id", ctypes.POINTER(ctypes.c_int32)), + ("seq_id", ctypes.POINTER(ctypes.POINTER(llama_seq_id))), + ("logits", ctypes.POINTER(ctypes.c_int8)), + ("all_pos_0", llama_pos), + ("all_pos_1", llama_pos), + ("all_seq_id", llama_seq_id), + ] + + +# enum llama_model_kv_override_type { +# LLAMA_KV_OVERRIDE_TYPE_INT, +# LLAMA_KV_OVERRIDE_TYPE_FLOAT, +# LLAMA_KV_OVERRIDE_TYPE_BOOL, +# LLAMA_KV_OVERRIDE_TYPE_STR, +# }; +LLAMA_KV_OVERRIDE_TYPE_INT = 0 +LLAMA_KV_OVERRIDE_TYPE_FLOAT = 1 +LLAMA_KV_OVERRIDE_TYPE_BOOL = 2 +LLAMA_KV_OVERRIDE_TYPE_STR = 3 + + +# struct llama_model_kv_override { +# enum llama_model_kv_override_type tag; + +# char key[128]; + + +# union { +# int64_t val_i64; +# double val_f64; +# bool val_bool; +# char val_str[128]; +# }; +# }; +class llama_model_kv_override_value(ctypes.Union): + _fields_ = [ + ("int_value", ctypes.c_int64), + ("float_value", ctypes.c_double), + ("bool_value", ctypes.c_bool), + ("str_value", ctypes.c_char * 128), + ] + + if TYPE_CHECKING: + int_value: int + float_value: float + bool_value: bool + str_value: bytes + + +class llama_model_kv_override(ctypes.Structure): + _fields_ = [ + ("tag", ctypes.c_int), + ("key", ctypes.c_char * 128), + ("value", llama_model_kv_override_value), + ] + + if TYPE_CHECKING: + tag: int + key: bytes + value: Union[int, float, bool, bytes] + + +# struct llama_model_params { +# int32_t n_gpu_layers; // number of layers to store in VRAM +# enum llama_split_mode split_mode; // how to split the model across multiple GPUs + +# // main_gpu interpretation depends on split_mode: +# // LLAMA_SPLIT_NONE: the GPU that is used for the entire model +# // LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results +# // LLAMA_SPLIT_LAYER: ignored +# int32_t main_gpu; + +# // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices() +# const float * tensor_split; + +# // Called with a progress value between 0.0 and 1.0. Pass NULL to disable. +# // If the provided progress_callback returns true, model loading continues. +# // If it returns false, model loading is immediately aborted. +# llama_progress_callback progress_callback; + +# // context pointer passed to the progress callback +# void * progress_callback_user_data; + +# // override key-value pairs of the model meta data +# const struct llama_model_kv_override * kv_overrides; + + +# // Keep the booleans together to avoid misalignment during copy-by-value. +# bool vocab_only; // only load the vocabulary, no weights +# bool use_mmap; // use mmap if possible +# bool use_mlock; // force system to keep model in RAM +# bool check_tensors; // validate model tensor data +# }; +class llama_model_params(ctypes.Structure): + """Parameters for llama_model + + Attributes: + n_gpu_layers (int): number of layers to store in VRAM + split_mode (int): how to split the model across multiple GPUs + main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored + tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices() + progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted. + progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback + kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data + vocab_only (bool): only load the vocabulary, no weights + use_mmap (bool): use mmap if possible + use_mlock (bool): force system to keep model in RAM + check_tensors (bool): validate model tensor data""" + + if TYPE_CHECKING: + n_gpu_layers: int + split_mode: int + main_gpu: int + tensor_split: CtypesArray[ctypes.c_float] + progress_callback: Callable[[float, ctypes.c_void_p], bool] + progress_callback_user_data: ctypes.c_void_p + kv_overrides: CtypesArray[llama_model_kv_override] + vocab_only: bool + use_mmap: bool + use_mlock: bool + check_tensors: bool + + _fields_ = [ + ("n_gpu_layers", ctypes.c_int32), + ("split_mode", ctypes.c_int), + ("main_gpu", ctypes.c_int32), + ("tensor_split", ctypes.POINTER(ctypes.c_float)), + ("progress_callback", llama_progress_callback), + ("progress_callback_user_data", ctypes.c_void_p), + ("kv_overrides", ctypes.POINTER(llama_model_kv_override)), + ("vocab_only", ctypes.c_bool), + ("use_mmap", ctypes.c_bool), + ("use_mlock", ctypes.c_bool), + ("check_tensors", ctypes.c_bool), + ] + + +# struct llama_context_params { +# uint32_t seed; // RNG seed, -1 for random +# uint32_t n_ctx; // text context, 0 = from model +# uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode +# uint32_t n_ubatch; // physical maximum batch size +# uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models) +# uint32_t n_threads; // number of threads to use for generation +# uint32_t n_threads_batch; // number of threads to use for batch processing + +# enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` +# enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id +# // (ignored if no pooling layer) + +# // ref: https://github.com/ggerganov/llama.cpp/pull/2054 +# float rope_freq_base; // RoPE base frequency, 0 = from model +# float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model +# float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model +# float yarn_attn_factor; // YaRN magnitude scaling factor +# float yarn_beta_fast; // YaRN low correction dim +# float yarn_beta_slow; // YaRN high correction dim +# uint32_t yarn_orig_ctx; // YaRN original context size +# float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default) + +# ggml_backend_sched_eval_callback cb_eval; +# void * cb_eval_user_data; + +# enum ggml_type type_k; // data type for K cache +# enum ggml_type type_v; // data type for V cache + +# // Keep the booleans together to avoid misalignment during copy-by-value. +# bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) +# bool embeddings; // if true, extract embeddings (together with logits) +# bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU +# bool flash_attn; // whether to use flash attention + + +# // Abort callback +# // if it returns true, execution of llama_decode() will be aborted +# // currently works only with CPU execution +# ggml_abort_callback abort_callback; +# void * abort_callback_data; +# }; +class llama_context_params(ctypes.Structure): + """Parameters for llama_context + + Attributes: + seed (int): RNG seed, -1 for random + n_ctx (int): text context, 0 = from model + n_batch (int): logical maximum batch size that can be submitted to llama_decode + n_ubatch (int): physical maximum batch size + n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models) + n_threads (int): number of threads to use for generation + n_threads_batch (int): number of threads to use for batch processing + rope_scaling_type (int): RoPE scaling type, from `enum llama_rope_scaling_type` + pooling_type (int): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer) + rope_freq_base (float): RoPE base frequency, 0 = from model + rope_freq_scale (float): RoPE frequency scaling factor, 0 = from model + yarn_ext_factor (float): YaRN extrapolation mix factor, negative = from model + yarn_attn_factor (float): YaRN magnitude scaling factor + yarn_beta_fast (float): YaRN low correction dim + yarn_beta_slow (float): YaRN high correction dim + yarn_orig_ctx (int): YaRN original context size + defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default) + cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval + cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval + type_k (int): data type for K cache + type_v (int): data type for V cache + logits_all (bool): the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead) + embeddings (bool): if true, extract embeddings (together with logits) + offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU + flash_attn (bool): whether to use flash attention + abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted + abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback + """ + + if TYPE_CHECKING: + seed: int + n_ctx: int + n_batch: int + n_ubatch: int + n_seq_max: int + n_threads: int + n_threads_batch: int + rope_scaling_type: int + pooling_type: int + rope_freq_base: float + rope_freq_scale: float + yarn_ext_factor: float + yarn_attn_factor: float + yarn_beta_fast: float + yarn_beta_slow: float + yarn_orig_ctx: int + defrag_thold: float + cb_eval: Callable[[ctypes.c_void_p, bool], bool] + cb_eval_user_data: ctypes.c_void_p + type_k: int + type_v: int + logits_all: bool + embeddings: bool + offload_kqv: bool + flash_attn: bool + abort_callback: Callable[[ctypes.c_void_p], bool] + abort_callback_data: ctypes.c_void_p + + _fields_ = [ + ("seed", ctypes.c_uint32), + ("n_ctx", ctypes.c_uint32), + ("n_batch", ctypes.c_uint32), + ("n_ubatch", ctypes.c_uint32), + ("n_seq_max", ctypes.c_uint32), + ("n_threads", ctypes.c_uint32), + ("n_threads_batch", ctypes.c_uint32), + ("rope_scaling_type", ctypes.c_int), + ("pooling_type", ctypes.c_int), + ("rope_freq_base", ctypes.c_float), + ("rope_freq_scale", ctypes.c_float), + ("yarn_ext_factor", ctypes.c_float), + ("yarn_attn_factor", ctypes.c_float), + ("yarn_beta_fast", ctypes.c_float), + ("yarn_beta_slow", ctypes.c_float), + ("yarn_orig_ctx", ctypes.c_uint32), + ("defrag_thold", ctypes.c_float), + ("cb_eval", ggml_backend_sched_eval_callback), + ("cb_eval_user_data", ctypes.c_void_p), + ("type_k", ctypes.c_int), + ("type_v", ctypes.c_int), + ("logits_all", ctypes.c_bool), + ("embeddings", ctypes.c_bool), + ("offload_kqv", ctypes.c_bool), + ("flash_attn", ctypes.c_bool), + ("abort_callback", ggml_abort_callback), + ("abort_callback_data", ctypes.c_void_p), + ] + + +# // Signature for logging events +# // Note that text includes the new line character at the end for most events. +# // If your logging mechanism cannot handle that, check if the last character is '\n' and strip it +# // if it exists. +# // It might not exist for progress report where '.' is output repeatedly. +# typedef void (*llama_log_callback)(enum llama_log_level level, const char * text, void * user_data); +llama_log_callback = ctypes.CFUNCTYPE( + None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p +) +"""Signature for logging events +Note that text includes the new line character at the end for most events. +If your logging mechanism cannot handle that, check if the last character is '\n' and strip it +if it exists. +It might not exist for progress report where '.' is output repeatedly.""" + + +# // model quantization parameters +# typedef struct llama_model_quantize_params { +# int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() +# enum llama_ftype ftype; // quantize to this llama_ftype +# enum ggml_type output_tensor_type; // output tensor type +# enum ggml_type token_embedding_type; // itoken embeddings tensor type +# bool allow_requantize; // allow quantizing non-f32/f16 tensors +# bool quantize_output_tensor; // quantize output.weight +# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored +# bool pure; // quantize all tensors to the default type +# bool keep_split; // quantize to the same number of shards +# void * imatrix; // pointer to importance matrix data +# void * kv_overrides; // pointer to vector containing overrides +# } llama_model_quantize_params; +class llama_model_quantize_params(ctypes.Structure): + """Parameters for llama_model_quantize + + Attributes: + nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency() + ftype (int): quantize to this llama_ftype + output_tensor_type (int): output tensor type + token_embedding_type (int): itoken embeddings tensor type + allow_requantize (bool): allow quantizing non-f32/f16 tensors + quantize_output_tensor (bool): quantize output.weight + only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored + pure (bool): quantize all tensors to the default type + keep_split (bool): quantize to the same number of shards + imatrix (ctypes.c_void_p): pointer to importance matrix data + kv_overrides (ctypes.c_void_p): pointer to vector containing overrides + """ + + if TYPE_CHECKING: + nthread: int + ftype: int + output_tensor_type: int + token_embedding_type: int + allow_requantize: bool + quantize_output_tensor: bool + only_copy: bool + pure: bool + keep_split: bool + imatrix: ctypes.c_void_p + kv_overrides: ctypes.c_void_p + + _fields_ = [ + ("nthread", ctypes.c_int32), + ("ftype", ctypes.c_int), + ("output_tensor_type", ctypes.c_int), + ("token_embedding_type", ctypes.c_int), + ("allow_requantize", ctypes.c_bool), + ("quantize_output_tensor", ctypes.c_bool), + ("only_copy", ctypes.c_bool), + ("pure", ctypes.c_bool), + ("keep_split", ctypes.c_bool), + ("imatrix", ctypes.c_void_p), + ("kv_overrides", ctypes.c_void_p), + ] + + +# // grammar types +# struct llama_grammar; +llama_grammar_p = ctypes.c_void_p + +# // grammar element type +# enum llama_gretype { +# // end of rule definition +# LLAMA_GRETYPE_END = 0, + +# // start of alternate definition for rule +# LLAMA_GRETYPE_ALT = 1, + +# // non-terminal element: reference to rule +# LLAMA_GRETYPE_RULE_REF = 2, + +# // terminal element: character (code point) +# LLAMA_GRETYPE_CHAR = 3, + +# // inverse char(s) ([^a], [^a-b] [^abc]) +# LLAMA_GRETYPE_CHAR_NOT = 4, + +# // modifies a preceding LLAMA_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_ALT to +# // be an inclusive range ([a-z]) +# LLAMA_GRETYPE_CHAR_RNG_UPPER = 5, + +# // modifies a preceding LLAMA_GRETYPE_CHAR or +# // LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA]) +# LLAMA_GRETYPE_CHAR_ALT = 6, +# }; +LLAMA_GRETYPE_END = 0 +LLAMA_GRETYPE_ALT = 1 +LLAMA_GRETYPE_RULE_REF = 2 +LLAMA_GRETYPE_CHAR = 3 +LLAMA_GRETYPE_CHAR_NOT = 4 +LLAMA_GRETYPE_CHAR_RNG_UPPER = 5 +LLAMA_GRETYPE_CHAR_ALT = 6 + + +# typedef struct llama_grammar_element { +# enum llama_gretype type; +# uint32_t value; // Unicode code point or rule ID +# } llama_grammar_element; +class llama_grammar_element(ctypes.Structure): + if TYPE_CHECKING: + type: int + value: int + + _fields_ = [ + ("type", ctypes.c_int), + ("value", ctypes.c_uint32), + ] + + +llama_grammar_element_p = ctypes.POINTER(llama_grammar_element) + +# // performance timing information +# struct llama_timings { +# double t_start_ms; +# double t_end_ms; +# double t_load_ms; +# double t_sample_ms; +# double t_p_eval_ms; +# double t_eval_ms; + + +# int32_t n_sample; +# int32_t n_p_eval; +# int32_t n_eval; +# }; +class llama_timings(ctypes.Structure): + if TYPE_CHECKING: + t_start_ms: float + t_end_ms: float + t_load_ms: float + t_sample_ms: float + t_p_eval_ms: float + t_eval_ms: float + n_sample: int + n_p_eval: int + n_eval: int + + _fields_ = [ + ("t_start_ms", ctypes.c_double), + ("t_end_ms", ctypes.c_double), + ("t_load_ms", ctypes.c_double), + ("t_sample_ms", ctypes.c_double), + ("t_p_eval_ms", ctypes.c_double), + ("t_eval_ms", ctypes.c_double), + ("n_sample", ctypes.c_int32), + ("n_p_eval", ctypes.c_int32), + ("n_eval", ctypes.c_int32), + ] + + +# // used in chat template +# typedef struct llama_chat_message { +# const char * role; +# const char * content; +# } llama_chat_message; +class llama_chat_message(ctypes.Structure): + _fields_ = [ + ("role", ctypes.c_char_p), + ("content", ctypes.c_char_p), + ] + + +# // Helpers for getting default parameters +# LLAMA_API struct llama_model_params llama_model_default_params(void); +@ctypes_function( + "llama_model_default_params", + [], + llama_model_params, +) +def llama_model_default_params() -> llama_model_params: + """Get default parameters for llama_model""" + ... + + +# LLAMA_API struct llama_context_params llama_context_default_params(void); +@ctypes_function( + "llama_context_default_params", + [], + llama_context_params, +) +def llama_context_default_params() -> llama_context_params: + """Get default parameters for llama_context""" + ... + + +# LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(void); +@ctypes_function( + "llama_model_quantize_default_params", + [], + llama_model_quantize_params, +) +def llama_model_quantize_default_params() -> llama_model_quantize_params: + """Get default parameters for llama_model_quantize""" + ... + + +# // Initialize the llama + ggml backend +# // If numa is true, use NUMA optimizations +# // Call once at the start of the program +# LLAMA_API void llama_backend_init(bool numa); +# LLAMA_API void llama_backend_init(void); +@ctypes_function( + "llama_backend_init", + [], + None, +) +def llama_backend_init(): + """Initialize the llama + ggml backend + If numa is true, use NUMA optimizations + Call once at the start of the program""" + ... + + +# // numa strategies +# enum ggml_numa_strategy { +# GGML_NUMA_STRATEGY_DISABLED = 0, +# GGML_NUMA_STRATEGY_DISTRIBUTE = 1, +# GGML_NUMA_STRATEGY_ISOLATE = 2, +# GGML_NUMA_STRATEGY_NUMACTL = 3, +# GGML_NUMA_STRATEGY_MIRROR = 4, +# GGML_NUMA_STRATEGY_COUNT +# }; +GGML_NUMA_STRATEGY_DISABLED = 0 +GGML_NUMA_STRATEGY_DISTRIBUTE = 1 +GGML_NUMA_STRATEGY_ISOLATE = 2 +GGML_NUMA_STRATEGY_NUMACTL = 3 +GGML_NUMA_STRATEGY_MIRROR = 4 +GGML_NUMA_STRATEGY_COUNT = 5 + + +# //optional: +# LLAMA_API void llama_numa_init(enum ggml_numa_strategy numa); +@ctypes_function( + "llama_numa_init", + [ctypes.c_int], + None, +) +def llama_numa_init(numa: int, /): ... + + +# // Call once at the end of the program - currently only used for MPI +# LLAMA_API void llama_backend_free(void); +@ctypes_function( + "llama_backend_free", + [], + None, +) +def llama_backend_free(): + """Call once at the end of the program - currently only used for MPI""" + ... + + +# LLAMA_API struct llama_model * llama_load_model_from_file( +# const char * path_model, +# struct llama_model_params params); +@ctypes_function( + "llama_load_model_from_file", + [ctypes.c_char_p, llama_model_params], + llama_model_p_ctypes, +) +def llama_load_model_from_file( + path_model: bytes, params: llama_model_params, / +) -> Optional[llama_model_p]: ... + + +# LLAMA_API void llama_free_model(struct llama_model * model); +@ctypes_function( + "llama_free_model", + [llama_model_p_ctypes], + None, +) +def llama_free_model(model: llama_model_p, /): ... + + +# LLAMA_API struct llama_context * llama_new_context_with_model( +# struct llama_model * model, +# struct llama_context_params params); +@ctypes_function( + "llama_new_context_with_model", + [llama_model_p_ctypes, llama_context_params], + llama_context_p_ctypes, +) +def llama_new_context_with_model( + model: llama_model_p, params: llama_context_params, / +) -> Optional[llama_context_p]: ... + + +# // Frees all allocated memory +# LLAMA_API void llama_free(struct llama_context * ctx); +@ctypes_function( + "llama_free", + [llama_context_p_ctypes], + None, +) +def llama_free(ctx: llama_context_p, /): + """Frees all allocated memory""" + ... + + +# LLAMA_API int64_t llama_time_us(void); +@ctypes_function( + "llama_time_us", + [], + ctypes.c_int64, +) +def llama_time_us() -> int: ... + + +# LLAMA_API size_t llama_max_devices(void); +@ctypes_function("llama_max_devices", [], ctypes.c_size_t) +def llama_max_devices() -> int: ... + + +# LLAMA_API bool llama_supports_mmap (void); +@ctypes_function("llama_supports_mmap", [], ctypes.c_bool) +def llama_supports_mmap() -> bool: ... + + +# LLAMA_API bool llama_supports_mlock (void); +@ctypes_function("llama_supports_mlock", [], ctypes.c_bool) +def llama_supports_mlock() -> bool: ... + + +# LLAMA_API bool llama_supports_gpu_offload(void); +@ctypes_function("llama_supports_gpu_offload", [], ctypes.c_bool) +def llama_supports_gpu_offload() -> bool: ... + + +# LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx); +@ctypes_function("llama_get_model", [llama_context_p_ctypes], llama_model_p_ctypes) +def llama_get_model(ctx: llama_context_p, /) -> Optional[llama_model_p]: ... + + +# LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx); +@ctypes_function("llama_n_ctx", [llama_context_p_ctypes], ctypes.c_uint32) +def llama_n_ctx(ctx: llama_context_p, /) -> int: ... + + +# LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx); +@ctypes_function("llama_n_batch", [llama_context_p_ctypes], ctypes.c_uint32) +def llama_n_batch(ctx: llama_context_p, /) -> int: ... + + +# LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx); +@ctypes_function("llama_n_ubatch", [llama_context_p_ctypes], ctypes.c_uint32) +def llama_n_ubatch(ctx: llama_context_p, /) -> int: ... + + +# LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx); +@ctypes_function("llama_n_seq_max", [llama_context_p_ctypes], ctypes.c_uint32) +def llama_n_seq_max(ctx: llama_context_p, /) -> int: ... + + +# LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); +@ctypes_function("llama_pooling_type", [llama_context_p_ctypes], ctypes.c_int) +def llama_pooling_type(ctx: llama_context_p, /) -> int: ... + + +# LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model); +@ctypes_function("llama_vocab_type", [llama_model_p_ctypes], ctypes.c_int) +def llama_vocab_type(model: llama_model_p, /) -> int: ... + + +# LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); +@ctypes_function("llama_rope_type", [llama_model_p_ctypes], ctypes.c_int) +def llama_rope_type(model: llama_model_p, /) -> int: ... + + +# LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); +@ctypes_function("llama_n_vocab", [llama_model_p_ctypes], ctypes.c_int32) +def llama_n_vocab(model: llama_model_p, /) -> int: ... + + +# LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); +@ctypes_function("llama_n_ctx_train", [llama_model_p_ctypes], ctypes.c_int32) +def llama_n_ctx_train(model: llama_model_p, /) -> int: ... + + +# LLAMA_API int32_t llama_n_embd (const struct llama_model * model); +@ctypes_function("llama_n_embd", [llama_model_p_ctypes], ctypes.c_int32) +def llama_n_embd(model: llama_model_p, /) -> int: ... + + +# LLAMA_API int32_t llama_n_layer (const struct llama_model * model); +@ctypes_function("llama_n_layer", [llama_model_p_ctypes], ctypes.c_int32) +def llama_n_layer(model: llama_model_p, /) -> int: ... + + +# // Get the model's RoPE frequency scaling factor +# LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); +@ctypes_function("llama_rope_freq_scale_train", [llama_model_p_ctypes], ctypes.c_float) +def llama_rope_freq_scale_train(model: llama_model_p, /) -> float: + """Get the model's RoPE frequency scaling factor""" + ... + + +# // Functions to access the model's GGUF metadata scalar values +# // - The functions return the length of the string on success, or -1 on failure +# // - The output string is always null-terminated and cleared on failure +# // - GGUF array values are not supported by these functions + + +# // Get metadata value as a string by key name +# LLAMA_API int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size); +@ctypes_function( + "llama_model_meta_val_str", + [ + llama_model_p_ctypes, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_size_t, + ], + ctypes.c_int32, +) +def llama_model_meta_val_str( + model: llama_model_p, + key: Union[ctypes.c_char_p, bytes], + buf: bytes, + buf_size: int, + /, +) -> int: + """Get metadata value as a string by key name""" + ... + + +# // Get the number of metadata key/value pairs +# LLAMA_API int32_t llama_model_meta_count(const struct llama_model * model); +@ctypes_function("llama_model_meta_count", [llama_model_p_ctypes], ctypes.c_int32) +def llama_model_meta_count(model: llama_model_p, /) -> int: + """Get the number of metadata key/value pairs""" + ... + + +# // Get metadata key name by index +# LLAMA_API int32_t llama_model_meta_key_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); +@ctypes_function( + "llama_model_meta_key_by_index", + [ + llama_model_p_ctypes, + ctypes.c_int32, + ctypes.c_char_p, + ctypes.c_size_t, + ], + ctypes.c_int32, +) +def llama_model_meta_key_by_index( + model: llama_model_p, + i: Union[ctypes.c_int, int], + buf: Union[bytes, CtypesArray[ctypes.c_char]], + buf_size: int, + /, +) -> int: + """Get metadata key name by index""" + ... + + +# // Get metadata value as a string by index +# LLAMA_API int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size); +@ctypes_function( + "llama_model_meta_val_str_by_index", + [ + llama_model_p_ctypes, + ctypes.c_int32, + ctypes.c_char_p, + ctypes.c_size_t, + ], + ctypes.c_int32, +) +def llama_model_meta_val_str_by_index( + model: llama_model_p, + i: Union[ctypes.c_int, int], + buf: Union[bytes, CtypesArray[ctypes.c_char]], + buf_size: int, + /, +) -> int: + """Get metadata value as a string by index""" + ... + + +# // Get a string describing the model type +# LLAMA_API int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size); +@ctypes_function( + "llama_model_desc", + [llama_model_p_ctypes, ctypes.c_char_p, ctypes.c_size_t], + ctypes.c_int32, +) +def llama_model_desc( + model: llama_model_p, + buf: Union[bytes, CtypesArray[ctypes.c_char]], + buf_size: Union[ctypes.c_size_t, int], + /, +) -> int: + """Get a string describing the model type""" + ... + + +# // Returns the total size of all the tensors in the model in bytes +# LLAMA_API uint64_t llama_model_size(const struct llama_model * model); +@ctypes_function("llama_model_size", [llama_model_p_ctypes], ctypes.c_uint64) +def llama_model_size(model: llama_model_p, /) -> int: + """Returns the total size of all the tensors in the model in bytes""" + ... + + +# // Returns the total number of parameters in the model +# LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model); +@ctypes_function("llama_model_n_params", [llama_model_p_ctypes], ctypes.c_uint64) +def llama_model_n_params(model: llama_model_p, /) -> int: + """Returns the total number of parameters in the model""" + ... + + +# // Get a llama model tensor +# LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name); +@ctypes_function( + "llama_get_model_tensor", [llama_model_p_ctypes, ctypes.c_char_p], ctypes.c_void_p +) +def llama_get_model_tensor( + model: llama_model_p, name: Union[ctypes.c_char_p, bytes], / +) -> ctypes.c_void_p: + """Get a llama model tensor""" + ... + + +# // Returns 0 on success +# LLAMA_API uint32_t llama_model_quantize( +# const char * fname_inp, +# const char * fname_out, +# const llama_model_quantize_params * params); +@ctypes_function( + "llama_model_quantize", + [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.POINTER(llama_model_quantize_params), + ], + ctypes.c_uint32, +) +def llama_model_quantize( + fname_inp: bytes, + fname_out: bytes, + params: CtypesPointerOrRef[llama_model_quantize_params], + /, +) -> int: + """Returns 0 on success""" + ... + + +# // Apply a LoRA adapter to a loaded model +# // path_base_model is the path to a higher quality model to use as a base for +# // the layers modified by the adapter. Can be NULL to use the current loaded model. +# // The model needs to be reloaded before applying a new adapter, otherwise the adapter +# // will be applied on top of the previous one +# // Returns 0 on success +# LLAMA_API int32_t llama_model_apply_lora_from_file( +# const struct llama_model * model, +# const char * path_lora, +# float scale, +# const char * path_base_model, +# int32_t n_threads); +@ctypes_function( + "llama_model_apply_lora_from_file", + [ + llama_model_p_ctypes, + ctypes.c_char_p, + ctypes.c_float, + ctypes.c_char_p, + ctypes.c_int32, + ], + ctypes.c_int32, +) +def llama_model_apply_lora_from_file( + model: llama_model_p, + path_lora: Union[ctypes.c_char_p, bytes], + scale: Union[ctypes.c_float, float], + path_base_model: Union[ctypes.c_char_p, bytes, None], + n_threads: Union[ctypes.c_int32, int], + /, +) -> int: + """Apply a LoRA adapter to a loaded model + path_base_model is the path to a higher quality model to use as a base for + the layers modified by the adapter. Can be NULL to use the current loaded model. + The model needs to be reloaded before applying a new adapter, otherwise the adapter + will be applied on top of the previous one + Returns 0 on success""" + ... + + +# // Apply a loaded control vector to a llama_context, or if data is NULL, clear +# // the currently loaded vector. +# // n_embd should be the size of a single layer's control, and data should point +# // to an n_embd x n_layers buffer starting from layer 1. +# // il_start and il_end are the layer range the vector should apply to (both inclusive) +# // See llama_control_vector_load in common to load a control vector. +# LLAMA_API int32_t llama_control_vector_apply( +# struct llama_context * lctx, +# const float * data, +# size_t len, +# int32_t n_embd, +# int32_t il_start, +# int32_t il_end); +@ctypes_function( + "llama_control_vector_apply", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_float), + ctypes.c_size_t, + ctypes.c_int32, + ctypes.c_int32, + ctypes.c_int32, + ], + ctypes.c_int32, +) +def llama_control_vector_apply( + lctx: llama_context_p, + data: CtypesPointerOrRef[ctypes.c_float], + len: int, + n_embd: int, + il_start: int, + il_end: int, + /, +) -> int: + """Apply a loaded control vector to a llama_context, or if data is NULL, clear + the currently loaded vector. + n_embd should be the size of a single layer's control, and data should point + to an n_embd x n_layers buffer starting from layer 1. + il_start and il_end are the layer range the vector should apply to (both inclusive) + See llama_control_vector_load in common to load a control vector.""" + ... + + +# // +# // KV cache +# // + + +# // Information associated with an individual cell in the KV cache view. +# struct llama_kv_cache_view_cell { +# // The position for this cell. Takes KV cache shifts into account. +# // May be negative if the cell is not populated. +# llama_pos pos; +# }; +class llama_kv_cache_view_cell(ctypes.Structure): + """Information associated with an individual cell in the KV cache view. + + Attributes: + pos (llama_pos): The position for this cell. Takes KV cache shifts into account. + May be negative if the cell is not populated.""" + + if TYPE_CHECKING: + pos: llama_pos + + _fields_ = [("pos", llama_pos)] + + +# // An updateable view of the KV cache. +# struct llama_kv_cache_view { +# // Number of KV cache cells. This will be the same as the context size. +# int32_t n_cells; + +# // Maximum number of sequences that can exist in a cell. It's not an error +# // if there are more sequences in a cell than this value, however they will +# // not be visible in the view cells_sequences. +# int32_t n_seq_max; + +# // Number of tokens in the cache. For example, if there are two populated +# // cells, the first with 1 sequence id in it and the second with 2 sequence +# // ids then you'll have 3 tokens. +# int32_t token_count; + +# // Number of populated cache cells. +# int32_t used_cells; + +# // Maximum contiguous empty slots in the cache. +# int32_t max_contiguous; + +# // Index to the start of the max_contiguous slot range. Can be negative +# // when cache is full. +# int32_t max_contiguous_idx; + +# // Information for an individual cell. +# struct llama_kv_cache_view_cell * cells; + + +# // The sequences for each cell. There will be n_seq_max items per cell. +# llama_seq_id * cells_sequences; +# }; +class llama_kv_cache_view(ctypes.Structure): + if TYPE_CHECKING: + n_cells: int + n_max_seq: int + token_count: int + used_cells: int + max_contiguous: int + max_contiguous_idx: int + cells: CtypesArray[llama_kv_cache_view_cell] + cells_sequences: CtypesArray[llama_seq_id] + + _fields_ = [ + ("n_cells", ctypes.c_int32), + ("n_max_seq", ctypes.c_int32), + ("token_count", ctypes.c_int32), + ("used_cells", ctypes.c_int32), + ("max_contiguous", ctypes.c_int32), + ("max_contiguous_idx", ctypes.c_int32), + ("cells", ctypes.POINTER(llama_kv_cache_view_cell)), + ("cells_sequences", ctypes.POINTER(llama_seq_id)), + ] + + +llama_kv_cache_view_p = ctypes.POINTER(llama_kv_cache_view) + + +# // Create an empty KV cache view. (use only for debugging purposes) +# LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max); +@ctypes_function( + "llama_kv_cache_view_init", + [llama_context_p_ctypes, ctypes.c_int32], + llama_kv_cache_view, +) +def llama_kv_cache_view_init( + ctx: llama_context_p, n_seq_max: Union[ctypes.c_int32, int], / +) -> llama_kv_cache_view: + """Create an empty KV cache view. (use only for debugging purposes)""" + ... + + +# // Free a KV cache view. (use only for debugging purposes) +# LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view); +@ctypes_function("llama_kv_cache_view_free", [llama_kv_cache_view_p], None) +def llama_kv_cache_view_free(view: "ctypes.pointer[llama_kv_cache_view]", /): # type: ignore + """Free a KV cache view. (use only for debugging purposes)""" + ... + + +# // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes) +# LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view); +@ctypes_function( + "llama_kv_cache_view_update", [llama_context_p_ctypes, llama_kv_cache_view_p], None +) +def llama_kv_cache_view_update(ctx: llama_context_p, view: CtypesPointerOrRef[llama_kv_cache_view], /): # type: ignore + """Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)""" + ... + + +# // Returns the number of tokens in the KV cache (slow, use only for debug) +# // If a KV cell has multiple sequences assigned to it, it will be counted multiple times +# LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx); +@ctypes_function( + "llama_get_kv_cache_token_count", [llama_context_p_ctypes], ctypes.c_int32 +) +def llama_get_kv_cache_token_count(ctx: llama_context_p, /) -> int: + """Returns the number of tokens in the KV cache (slow, use only for debug) + If a KV cell has multiple sequences assigned to it, it will be counted multiple times + """ + ... + + +# // Returns the number of used KV cells (i.e. have at least one sequence assigned to them) +# LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx); +@ctypes_function( + "llama_get_kv_cache_used_cells", [llama_context_p_ctypes], ctypes.c_int32 +) +def llama_get_kv_cache_used_cells(ctx: llama_context_p, /) -> int: + """Returns the number of used KV cells (i.e. have at least one sequence assigned to them)""" + ... + + +# // Clear the KV cache - both cell info is erased and KV data is zeroed +# LLAMA_API void llama_kv_cache_clear( +# struct llama_context * ctx); +@ctypes_function("llama_kv_cache_clear", [llama_context_p_ctypes], None) +def llama_kv_cache_clear(ctx: llama_context_p, /): + """Clear the KV cache""" + ... + + +# // Removes all tokens that belong to the specified sequence and have positions in [p0, p1) +# // Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails +# // seq_id < 0 : match any sequence +# // p0 < 0 : [0, p1] +# // p1 < 0 : [p0, inf) +# LLAMA_API bool llama_kv_cache_seq_rm( +# struct llama_context * ctx, +# llama_seq_id seq_id, +# llama_pos p0, +# llama_pos p1); +@ctypes_function( + "llama_kv_cache_seq_rm", + [ + llama_context_p_ctypes, + llama_seq_id, + llama_pos, + llama_pos, + ], + ctypes.c_bool, +) +def llama_kv_cache_seq_rm( + ctx: llama_context_p, + seq_id: Union[llama_seq_id, int], + p0: Union[llama_pos, int], + p1: Union[llama_pos, int], + /, +) -> bool: + """Removes all tokens that belong to the specified sequence and have positions in [p0, p1) + + Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails + + seq_id < 0 : match any sequence + p0 < 0 : [0, p1] + p1 < 0 : [p0, inf)""" + ... + + +# // Copy all tokens that belong to the specified sequence to another sequence +# // Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence +# // p0 < 0 : [0, p1] +# // p1 < 0 : [p0, inf) +# LLAMA_API void llama_kv_cache_seq_cp( +# struct llama_context * ctx, +# llama_seq_id seq_id_src, +# llama_seq_id seq_id_dst, +# llama_pos p0, +# llama_pos p1); +@ctypes_function( + "llama_kv_cache_seq_cp", + [ + llama_context_p_ctypes, + llama_seq_id, + llama_seq_id, + llama_pos, + llama_pos, + ], + None, +) +def llama_kv_cache_seq_cp( + ctx: llama_context_p, + seq_id_src: Union[llama_seq_id, int], + seq_id_dst: Union[llama_seq_id, int], + p0: Union[llama_pos, int], + p1: Union[llama_pos, int], + /, +): + """Copy all tokens that belong to the specified sequence to another sequence + Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence + p0 < 0 : [0, p1] + p1 < 0 : [p0, inf)""" + ... + + +# // Removes all tokens that do not belong to the specified sequence +# LLAMA_API void llama_kv_cache_seq_keep( +# struct llama_context * ctx, +# llama_seq_id seq_id); +@ctypes_function( + "llama_kv_cache_seq_keep", [llama_context_p_ctypes, llama_seq_id], None +) +def llama_kv_cache_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq_id, int], /): + """Removes all tokens that do not belong to the specified sequence""" + ... + + +# // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1) +# // If the KV cache is RoPEd, the KV data is updated accordingly: +# // - lazily on next llama_decode() +# // - explicitly with llama_kv_cache_update() +# // p0 < 0 : [0, p1] +# // p1 < 0 : [p0, inf) +# LLAMA_API void llama_kv_cache_seq_add( +# struct llama_context * ctx, +# llama_seq_id seq_id, +# llama_pos p0, +# llama_pos p1, +# llama_pos delta); +@ctypes_function( + "llama_kv_cache_seq_add", + [ + llama_context_p_ctypes, + llama_seq_id, + llama_pos, + llama_pos, + llama_pos, + ], + None, +) +def llama_kv_cache_seq_add( + ctx: llama_context_p, + seq_id: Union[llama_seq_id, int], + p0: Union[llama_pos, int], + p1: Union[llama_pos, int], + delta: Union[llama_pos, int], + /, +): + """Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1) + If the KV cache is RoPEd, the KV data is updated accordingly: + - lazily on next llama_decode() + - explicitly with llama_kv_cache_update() + p0 < 0 : [0, p1] + p1 < 0 : [p0, inf)""" + ... + + +# // Integer division of the positions by factor of `d > 1` +# // If the KV cache is RoPEd, the KV data is updated accordingly +# // p0 < 0 : [0, p1] +# // p1 < 0 : [p0, inf) +# LLAMA_API void llama_kv_cache_seq_div( +# struct llama_context * ctx, +# llama_seq_id seq_id, +# llama_pos p0, +# llama_pos p1, +# int d); +@ctypes_function( + "llama_kv_cache_seq_div", + [ + llama_context_p_ctypes, + llama_seq_id, + llama_pos, + llama_pos, + ctypes.c_int, + ], + None, +) +def llama_kv_cache_seq_div( + ctx: llama_context_p, + seq_id: Union[llama_seq_id, int], + p0: Union[llama_pos, int], + p1: Union[llama_pos, int], + d: Union[ctypes.c_int, int], + /, +): + """Integer division of the positions by factor of `d > 1` + If the KV cache is RoPEd, the KV data is updated accordingly + p0 < 0 : [0, p1] + p1 < 0 : [p0, inf)""" + ... + + +# // Defragment the KV cache +# // This will be applied: +# // - lazily on next llama_decode() +# // - explicitly with llama_kv_cache_update() +# LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx); +@ctypes_function("llama_kv_cache_defrag", [llama_context_p_ctypes], None) +def llama_kv_cache_defrag(ctx: llama_context_p, /): + """Defragment the KV cache + This will be applied: + - lazily on next llama_decode() + - explicitly with llama_kv_cache_update()""" + ... + + +# // Apply the KV cache updates (such as K-shifts, defragmentation, etc.) +# LLAMA_API void llama_kv_cache_update(struct llama_context * ctx); +@ctypes_function("llama_kv_cache_update", [llama_context_p_ctypes], None) +def llama_kv_cache_update(ctx: llama_context_p, /): + """Apply the KV cache updates (such as K-shifts, defragmentation, etc.)""" + ... + + +# // +# // State / sessions +# // + + +# Returns the maximum size in bytes of the state (rng, logits, embedding +# and kv_cache) - will often be smaller after compacting tokens +# LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx); +@ctypes_function("llama_state_get_size", [llama_context_p_ctypes], ctypes.c_size_t) +def llama_state_get_size(ctx: llama_context_p, /) -> int: + """Returns the maximum size in bytes of the state (rng, logits, embedding + and kv_cache) - will often be smaller after compacting tokens""" + ... + + +# LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx), +# "use llama_state_get_size instead"); +@ctypes_function("llama_get_state_size", [llama_context_p_ctypes], ctypes.c_size_t) +def llama_get_state_size(ctx: llama_context_p, /) -> int: + """Returns the maximum size in bytes of the state (rng, logits, embedding + and kv_cache) - will often be smaller after compacting tokens""" + ... + + +# Copies the state to the specified destination address. +# Destination needs to have allocated enough memory. +# Returns the number of bytes copied +# LLAMA_API size_t llama_state_get_data( +# struct llama_context * ctx, +# uint8_t * dst); +@ctypes_function( + "llama_state_get_data", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_uint8), + ], + ctypes.c_size_t, +) +def llama_state_get_data( + ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], / +) -> int: + """Copies the state to the specified destination address. + Destination needs to have allocated enough memory. + Returns the number of bytes copied""" + ... + + +# LLAMA_API DEPRECATED(size_t llama_copy_state_data( +# struct llama_context * ctx, +# uint8_t * dst), +# "use llama_state_get_data instead"); +@ctypes_function( + "llama_copy_state_data", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_uint8), + ], + ctypes.c_size_t, +) +def llama_copy_state_data( + ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], / +) -> int: + """Copies the state to the specified destination address. + Destination needs to have allocated enough memory. + Returns the number of bytes copied""" + ... + + +# // Set the state reading from the specified address +# // Returns the number of bytes read +# LLAMA_API size_t llama_state_set_data( +# struct llama_context * ctx, +# const uint8_t * src); +@ctypes_function( + "llama_state_set_data", + [llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)], + ctypes.c_size_t, +) +def llama_state_set_data( + ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], / +) -> int: + """Set the state reading from the specified address + Returns the number of bytes read""" + ... + + +# LLAMA_API DEPRECATED(size_t llama_set_state_data( +# struct llama_context * ctx, +# const uint8_t * src), +# "use llama_state_set_data instead"); +@ctypes_function( + "llama_set_state_data", + [llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)], + ctypes.c_size_t, +) +def llama_set_state_data( + ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], / +) -> int: + """Set the state reading from the specified address""" + ... + + +# Save/load session file +# LLAMA_API bool llama_state_load_file( +# struct llama_context * ctx, +# const char * path_session, +# llama_token * tokens_out, +# size_t n_token_capacity, +# size_t * n_token_count_out); +@ctypes_function( + "llama_state_load_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_token_p, + ctypes.c_size_t, + ctypes.POINTER(ctypes.c_size_t), + ], + ctypes.c_bool, +) +def llama_state_load_file( + ctx: llama_context_p, + path_session: bytes, + tokens_out: CtypesArray[llama_token], + n_token_capacity: Union[ctypes.c_size_t, int], + n_token_count_out: CtypesPointerOrRef[ctypes.c_size_t], + /, +) -> bool: ... + + +# LLAMA_API DEPRECATED(bool llama_load_session_file( +# struct llama_context * ctx, +# const char * path_session, +# llama_token * tokens_out, +# size_t n_token_capacity, +# size_t * n_token_count_out), +# "use llama_state_load_file instead"); +@ctypes_function( + "llama_load_session_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_token_p, + ctypes.c_size_t, + ctypes.POINTER(ctypes.c_size_t), + ], + ctypes.c_size_t, +) +def llama_load_session_file( + ctx: llama_context_p, + path_session: bytes, + tokens_out: CtypesArray[llama_token], + n_token_capacity: Union[ctypes.c_size_t, int], + n_token_count_out: CtypesPointerOrRef[ctypes.c_size_t], + /, +) -> int: ... + + +# LLAMA_API bool llama_state_save_file( +# struct llama_context * ctx, +# const char * path_session, +# const llama_token * tokens, +# size_t n_token_count); +@ctypes_function( + "llama_state_save_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_token_p, + ctypes.c_size_t, + ], + ctypes.c_bool, +) +def llama_state_save_file( + ctx: llama_context_p, + path_session: bytes, + tokens: CtypesArray[llama_token], + n_token_count: Union[ctypes.c_size_t, int], + /, +) -> bool: ... + + +# LLAMA_API DEPRECATED(bool llama_save_session_file( +# struct llama_context * ctx, +# const char * path_session, +# const llama_token * tokens, +# size_t n_token_count), +# "use llama_state_save_file instead"); +@ctypes_function( + "llama_save_session_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_token_p, + ctypes.c_size_t, + ], + ctypes.c_size_t, +) +def llama_save_session_file( + ctx: llama_context_p, + path_session: bytes, + tokens: CtypesArray[llama_token], + n_token_count: Union[ctypes.c_size_t, int], + /, +) -> int: ... + + +# // Get the exact size needed to copy the KV cache of a single sequence +# LLAMA_API size_t llama_state_seq_get_size( +# struct llama_context * ctx, +# llama_seq_id seq_id); +@ctypes_function( + "llama_state_seq_get_size", + [llama_context_p_ctypes, llama_seq_id], + ctypes.c_size_t, +) +def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> int: + """Get the exact size needed to copy the KV cache of a single sequence""" + ... + + +# // Copy the KV cache of a single sequence into the specified buffer +# LLAMA_API size_t llama_state_seq_get_data( +# struct llama_context * ctx, +# uint8_t * dst, +# llama_seq_id seq_id); +@ctypes_function( + "llama_state_seq_get_data", + [llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id], + ctypes.c_size_t, +) +def llama_state_seq_get_data( + ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], seq_id: llama_seq_id, / +) -> int: + """Copy the KV cache of a single sequence into the specified buffer""" + ... + + +# // Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence +# // Returns: +# // - Positive: Ok +# // - Zero: Failed to load +# LLAMA_API size_t llama_state_seq_set_data( +# struct llama_context * ctx, +# const uint8_t * src, +# llama_seq_id dest_seq_id); +@ctypes_function( + "llama_state_seq_set_data", + [llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id], + ctypes.c_size_t, +) +def llama_state_seq_set_data( + ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], dest_seq_id: llama_seq_id, / +) -> int: + """Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence""" + ... + + +# LLAMA_API size_t llama_state_seq_save_file( +# struct llama_context * ctx, +# const char * filepath, +# llama_seq_id seq_id, +# const llama_token * tokens, +# size_t n_token_count); +@ctypes_function( + "llama_state_seq_save_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_seq_id, + llama_token_p, + ctypes.c_size_t, + ], + ctypes.c_size_t, +) +def llama_state_seq_save_file( + ctx: llama_context_p, + filepath: bytes, + seq_id: llama_seq_id, + tokens: CtypesArray[llama_token], + n_token_count: Union[ctypes.c_size_t, int], + /, +) -> int: ... + + +# LLAMA_API size_t llama_state_seq_load_file( +# struct llama_context * ctx, +# const char * filepath, +# llama_seq_id dest_seq_id, +# llama_token * tokens_out, +# size_t n_token_capacity, +# size_t * n_token_count_out); +@ctypes_function( + "llama_state_seq_load_file", + [ + llama_context_p_ctypes, + ctypes.c_char_p, + llama_seq_id, + llama_token_p, + ctypes.c_size_t, + ctypes.POINTER(ctypes.c_size_t), + ], + ctypes.c_size_t, +) +def llama_state_seq_load_file( + ctx: llama_context_p, + filepath: bytes, + dest_seq_id: llama_seq_id, + tokens_out: CtypesArray[llama_token], + n_token_capacity: Union[ctypes.c_size_t, int], + n_token_count_out: CtypesPointerOrRef[ctypes.c_size_t], + /, +) -> int: ... + + +# // +# // Decoding +# // + + +# // Return batch for single sequence of tokens starting at pos_0 +# // +# // NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it +# // +# LLAMA_API struct llama_batch llama_batch_get_one( +# llama_token * tokens, +# int32_t n_tokens, +# llama_pos pos_0, +# llama_seq_id seq_id); +@ctypes_function( + "llama_batch_get_one", + [ + llama_token_p, + ctypes.c_int, + llama_pos, + llama_seq_id, + ], + llama_batch, +) +def llama_batch_get_one( + tokens: CtypesArray[llama_token], + n_tokens: Union[ctypes.c_int, int], + pos_0: Union[llama_pos, int], + seq_id: llama_seq_id, + /, +) -> llama_batch: + """Return batch for single sequence of tokens starting at pos_0 + + NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it + """ + ... + + +# // Allocates a batch of tokens on the heap that can hold a maximum of n_tokens +# // Each token can be assigned up to n_seq_max sequence ids +# // The batch has to be freed with llama_batch_free() +# // If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float) +# // Otherwise, llama_batch.token will be allocated to store n_tokens llama_token +# // The rest of the llama_batch members are allocated with size n_tokens +# // All members are left uninitialized +# LLAMA_API struct llama_batch llama_batch_init( +# int32_t n_tokens, +# int32_t embd, +# int32_t n_seq_max); +@ctypes_function( + "llama_batch_init", [ctypes.c_int32, ctypes.c_int32, ctypes.c_int32], llama_batch +) +def llama_batch_init( + n_tokens: Union[ctypes.c_int32, int], + embd: Union[ctypes.c_int32, int], + n_seq_max: Union[ctypes.c_int32, int], + /, +) -> llama_batch: + """Allocates a batch of tokens on the heap that can hold a maximum of n_tokens + Each token can be assigned up to n_seq_max sequence ids + The batch has to be freed with llama_batch_free() + If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float) + Otherwise, llama_batch.token will be allocated to store n_tokens llama_token + The rest of the llama_batch members are allocated with size n_tokens + All members are left uninitialized""" + ... + + +# // Frees a batch of tokens allocated with llama_batch_init() +# LLAMA_API void llama_batch_free(struct llama_batch batch); +@ctypes_function("llama_batch_free", [llama_batch], None) +def llama_batch_free(batch: llama_batch, /): + """Frees a batch of tokens allocated with llama_batch_init()""" + ... + + +# // Positive return values does not mean a fatal error, but rather a warning. +# // 0 - success +# // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context) +# // < 0 - error +# LLAMA_API int32_t llama_decode( +# struct llama_context * ctx, +# struct llama_batch batch); +@ctypes_function("llama_decode", [llama_context_p_ctypes, llama_batch], ctypes.c_int32) +def llama_decode(ctx: llama_context_p, batch: llama_batch, /) -> int: + """Positive return values does not mean a fatal error, but rather a warning. + 0 - success + 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context) + < 0 - error""" + ... + + +# // Set the number of threads used for decoding +# // n_threads is the number of threads used for generation (single token) +# // n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens) +# LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch); +@ctypes_function( + "llama_set_n_threads", + [ + llama_context_p_ctypes, + ctypes.c_uint32, + ctypes.c_uint32, + ], + None, +) +def llama_set_n_threads( + ctx: llama_context_p, + n_threads: Union[ctypes.c_uint32, int], + n_threads_batch: Union[ctypes.c_uint32, int], + /, +): + """Set the number of threads used for decoding + n_threads is the number of threads used for generation (single token) + n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens) + """ + ... + + +# // Set whether to use causal attention or not +# // If set to true, the model will only attend to the past tokens +# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn); +@ctypes_function("llama_set_causal_attn", [llama_context_p_ctypes, ctypes.c_bool], None) +def llama_set_causal_attn(ctx: llama_context_p, causal_attn: bool, /): + """Set whether to use causal attention or not + If set to true, the model will only attend to the past tokens""" + ... + + +# // Set abort callback +# LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); +@ctypes_function( + "llama_set_abort_callback", + [llama_context_p_ctypes, ggml_abort_callback, ctypes.c_void_p], + None, +) +def llama_set_abort_callback( + ctx: llama_context_p, + abort_callback: Callable[[ctypes.c_void_p], None], + abort_callback_data: ctypes.c_void_p, + /, +): + """Set abort callback""" + ... + + +# // Wait until all computations are finished +# // This is automatically done when using one of the functions below to obtain the computation results +# // and is not necessary to call it explicitly in most cases +# LLAMA_API void llama_synchronize(struct llama_context * ctx); +@ctypes_function("llama_synchronize", [llama_context_p_ctypes], None) +def llama_synchronize(ctx: llama_context_p, /): + """Wait until all computations are finished + This is automatically done when using one of the functions below to obtain the computation results + and is not necessary to call it explicitly in most cases""" + ... + + +# // Token logits obtained from the last call to llama_decode() +# // The logits for which llama_batch.logits[i] != 0 are stored contiguously +# // in the order they have appeared in the batch. +# // Rows: number of tokens for which llama_batch.logits[i] != 0 +# // Cols: n_vocab +# LLAMA_API float * llama_get_logits(struct llama_context * ctx); +@ctypes_function( + "llama_get_logits", [llama_context_p_ctypes], ctypes.POINTER(ctypes.c_float) +) +def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]: + """Token logits obtained from the last call to llama_eval() + The logits for the last token are stored in the last row + Logits for which llama_batch.logits[i] == 0 are undefined + Rows: n_tokens provided with llama_batch + Cols: n_vocab + + Returns: + Pointer to the logits buffer of shape (n_tokens, n_vocab)""" + ... + + +# // Logits for the ith token. For positive indices, Equivalent to: +# // llama_get_logits(ctx) + ctx->output_ids[i]*n_vocab +# // Negative indicies can be used to access logits in reverse order, -1 is the last logit. +# // returns NULL for invalid ids. +# LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i); +@ctypes_function( + "llama_get_logits_ith", + [llama_context_p_ctypes, ctypes.c_int32], + ctypes.POINTER(ctypes.c_float), +) +def llama_get_logits_ith( + ctx: llama_context_p, i: Union[ctypes.c_int32, int], / +) -> CtypesArray[ctypes.c_float]: + """Logits for the ith token. Equivalent to: + llama_get_logits(ctx) + i*n_vocab""" + ... + + +# // Get all output token embeddings. +# // when pooling_type == LLAMA_POOLING_TYPE_NONE or when using a generative model, +# // the embeddings for which llama_batch.logits[i] != 0 are stored contiguously +# // in the order they have appeared in the batch. +# // shape: [n_outputs*n_embd] +# // Otherwise, returns NULL. +# LLAMA_API float * llama_get_embeddings(struct llama_context * ctx); +@ctypes_function( + "llama_get_embeddings", [llama_context_p_ctypes], ctypes.POINTER(ctypes.c_float) +) +def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_float]: + """Get the embeddings for the input + shape: [n_embd] (1-dimensional)""" + ... + + +# // Get the embeddings for the ith token. For positive indices, Equivalent to: +# // llama_get_embeddings(ctx) + ctx->output_ids[i]*n_embd +# // Negative indicies can be used to access embeddings in reverse order, -1 is the last embedding. +# // shape: [n_embd] (1-dimensional) +# // returns NULL for invalid ids. +# LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i); +@ctypes_function( + "llama_get_embeddings_ith", + [llama_context_p_ctypes, ctypes.c_int32], + ctypes.POINTER(ctypes.c_float), +) +def llama_get_embeddings_ith( + ctx: llama_context_p, i: Union[ctypes.c_int32, int], / +) -> CtypesArray[ctypes.c_float]: + """Get the embeddings for the ith sequence + llama_get_embeddings(ctx) + i*n_embd""" + ... + + +# // Get the embeddings for a sequence id +# // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE +# // shape: [n_embd] (1-dimensional) +# LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id); +@ctypes_function( + "llama_get_embeddings_seq", + [llama_context_p_ctypes, llama_seq_id], + ctypes.POINTER(ctypes.c_float), +) +def llama_get_embeddings_seq( + ctx: llama_context_p, seq_id: Union[llama_seq_id, int], / +) -> CtypesArray[ctypes.c_float]: + """Get the embeddings for a sequence id + Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE + shape: [n_embd] (1-dimensional)""" + ... + + +# // +# // Vocab +# // + + +# LLAMA_API const char * llama_token_get_text(const struct llama_model * model, llama_token token); +@ctypes_function( + "llama_token_get_text", [llama_model_p_ctypes, llama_token], ctypes.c_char_p +) +def llama_token_get_text( + model: llama_model_p, token: Union[llama_token, int], / +) -> bytes: ... + + +# LLAMA_API float llama_token_get_score(const struct llama_model * model, llama_token token); +@ctypes_function( + "llama_token_get_score", [llama_model_p_ctypes, llama_token], ctypes.c_float +) +def llama_token_get_score( + model: llama_model_p, token: Union[llama_token, int], / +) -> float: ... + + +# LLAMA_API enum llama_token_type llama_token_get_type(const struct llama_model * model, llama_token token); +@ctypes_function( + "llama_token_get_type", [llama_model_p_ctypes, llama_token], ctypes.c_int +) +def llama_token_get_type( + model: llama_model_p, token: Union[llama_token, int], / +) -> int: ... + + +# // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.) +# LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token); +@ctypes_function( + "llama_token_is_eog", [llama_model_p_ctypes, llama_token], ctypes.c_bool +) +def llama_token_is_eog(model: llama_model_p, token: Union[llama_token, int], /) -> bool: + """Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)""" + ... + + +# // Special tokens + + +# LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence +@ctypes_function("llama_token_bos", [llama_model_p_ctypes], llama_token) +def llama_token_bos(model: llama_model_p, /) -> int: + """beginning-of-sentence""" + ... + + +# LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence +@ctypes_function("llama_token_eos", [llama_model_p_ctypes], llama_token) +def llama_token_eos(model: llama_model_p, /) -> int: + """end-of-sentence""" + ... + + +# LLAMA_API llama_token llama_token_cls(const struct llama_model * model); // classification +@ctypes_function("llama_token_cls", [llama_model_p_ctypes], llama_token) +def llama_token_cls(model: llama_model_p, /) -> int: + """classification""" + ... + + +# LLAMA_API llama_token llama_token_sep(const struct llama_model * model); // sentence separator +@ctypes_function("llama_token_sep", [llama_model_p_ctypes], llama_token) +def llama_token_sep(model: llama_model_p, /) -> int: + """sentence separator""" + ... + + +# LLAMA_API llama_token llama_token_nl (const struct llama_model * model); // next-line +@ctypes_function("llama_token_nl", [llama_model_p_ctypes], llama_token) +def llama_token_nl(model: llama_model_p, /) -> int: + """next-line""" + ... + + +# // Returns -1 if unknown, 1 for true or 0 for false. +# LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model); +@ctypes_function("llama_add_bos_token", [llama_model_p_ctypes], ctypes.c_int32) +def llama_add_bos_token(model: llama_model_p, /) -> int: + """Returns -1 if unknown, 1 for true or 0 for false.""" + ... + + +# // Returns -1 if unknown, 1 for true or 0 for false. +# LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model); +@ctypes_function("llama_add_eos_token", [llama_model_p_ctypes], ctypes.c_int32) +def llama_add_eos_token(model: llama_model_p, /) -> int: + """Returns -1 if unknown, 1 for true or 0 for false.""" + ... + + +# // Codellama infill tokens +# LLAMA_API llama_token llama_token_prefix(const struct llama_model * model); // Beginning of infill prefix +@ctypes_function("llama_token_prefix", [llama_model_p_ctypes], llama_token) +def llama_token_prefix(model: llama_model_p) -> int: + """codellama infill tokens""" + ... + + +# LLAMA_API llama_token llama_token_middle(const struct llama_model * model); // Beginning of infill middle +@ctypes_function("llama_token_middle", [llama_model_p_ctypes], llama_token) +def llama_token_middle(model: llama_model_p, /) -> int: ... + + +# LLAMA_API llama_token llama_token_suffix(const struct llama_model * model); // Beginning of infill suffix +@ctypes_function("llama_token_suffix", [llama_model_p_ctypes], llama_token) +def llama_token_suffix(model: llama_model_p, /) -> int: ... + + +# LLAMA_API llama_token llama_token_eot (const struct llama_model * model); // End of infill middle +@ctypes_function("llama_token_eot", [llama_model_p_ctypes], llama_token) +def llama_token_eot(model: llama_model_p, /) -> int: ... + + +# // +# // Tokenization +# // + + +# /// @details Convert the provided text into tokens. +# /// @param tokens The tokens pointer must be large enough to hold the resulting tokens. +# /// @return Returns the number of tokens on success, no more than n_tokens_max +# /// @return Returns a negative number on failure - the number of tokens that would have been returned +# /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated +# /// as plaintext. Does not insert a leading space. +# LLAMA_API int32_t llama_tokenize( +# const struct llama_model * model, +# const char * text, +# int32_t text_len, +# llama_token * tokens, +# int32_t n_tokens_max, +# bool add_special, +# bool parse_special); +@ctypes_function( + "llama_tokenize", + [ + llama_model_p_ctypes, + ctypes.c_char_p, + ctypes.c_int32, + llama_token_p, + ctypes.c_int32, + ctypes.c_bool, + ctypes.c_bool, + ], + ctypes.c_int32, +) +def llama_tokenize( + model: llama_model_p, + text: bytes, + text_len: Union[ctypes.c_int, int], + tokens: CtypesArray[llama_token], + n_tokens_max: Union[ctypes.c_int, int], + add_special: Union[ctypes.c_bool, bool], + parse_special: Union[ctypes.c_bool, bool], + /, +) -> int: + """Convert the provided text into tokens. + + Args: + model: The model to use for tokenization. + text: The text to tokenize. + text_len: The length of the text. + tokens: The tokens pointer must be large enough to hold the resulting tokens. + n_max_tokens: The maximum number of tokens to return. + add_special: Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space. + parse_special: Allow parsing special tokens. + + Returns: + Returns the number of tokens on success, no more than n_tokens_max + Returns a negative number on failure - the number of tokens that would have been returned + """ + ... + + +# // Token Id -> Piece. +# // Uses the vocabulary in the provided context. +# // Does not write null terminator to the buffer. +# // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens. +# // @param special If true, special tokens are rendered in the output. +# LLAMA_API int32_t llama_token_to_piece( +# const struct llama_model * model, +# llama_token token, +# char * buf, +# int32_t length, +# bool special); +@ctypes_function( + "llama_token_to_piece", + [ + llama_model_p_ctypes, + llama_token, + ctypes.c_char_p, + ctypes.c_int32, + ctypes.c_bool, + ], + ctypes.c_int32, +) +def llama_token_to_piece( + model: llama_model_p, + token: Union[llama_token, int], + buf: Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]], + length: Union[ctypes.c_int, int], + special: Union[ctypes.c_bool, bool], + /, +) -> int: + """Token Id -> Piece. + Uses the vocabulary in the provided context. + Does not write null terminator to the buffer. + User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens. + + Args: + model: The model to use for tokenization. + token: The token to convert. + buf: The buffer to write the token to. + length: The length of the buffer. + special: If true, special tokens are rendered in the output.""" + ... + + +# /// Apply chat template. Inspired by hf apply_chat_template() on python. +# /// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model" +# /// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template +# /// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. +# /// @param chat Pointer to a list of multiple llama_chat_message +# /// @param n_msg Number of llama_chat_message in this chat +# /// @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. +# /// @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) +# /// @param length The size of the allocated buffer +# /// @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template. +# LLAMA_API int32_t llama_chat_apply_template( +# const struct llama_model * model, +# const char * tmpl, +# const struct llama_chat_message * chat, +# size_t n_msg, +# bool add_ass, +# char * buf, +# int32_t length); +@ctypes_function( + "llama_chat_apply_template", + [ + ctypes.c_void_p, + ctypes.c_char_p, + ctypes.POINTER(llama_chat_message), + ctypes.c_size_t, + ], + ctypes.c_int32, +) +def llama_chat_apply_template( + model: llama_model_p, + tmpl: bytes, + chat: CtypesArray[llama_chat_message], + n_msg: int, + /, +) -> int: ... + + +# // +# // Grammar +# // + + +# LLAMA_API struct llama_grammar * llama_grammar_init( +# const llama_grammar_element ** rules, +# size_t n_rules, +# size_t start_rule_index); +@ctypes_function( + "llama_grammar_init", + [ + ctypes.POINTER(llama_grammar_element_p), + ctypes.c_size_t, + ctypes.c_size_t, + ], + llama_grammar_p, +) +def llama_grammar_init( + rules: CtypesArray[ + CtypesPointer[llama_grammar_element] + ], # NOTE: This might be wrong type sig + n_rules: Union[ctypes.c_size_t, int], + start_rule_index: Union[ctypes.c_size_t, int], + /, +) -> llama_grammar_p: + """Initialize a grammar from a set of rules.""" + ... + + +# LLAMA_API void llama_grammar_free(struct llama_grammar * grammar); +@ctypes_function( + "llama_grammar_free", + [llama_grammar_p], + None, +) +def llama_grammar_free(grammar: llama_grammar_p, /): + """Free a grammar.""" + ... + + +# LLAMA_API struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar); +@ctypes_function( + "llama_grammar_copy", + [llama_grammar_p], + llama_grammar_p, +) +def llama_grammar_copy(grammar: llama_grammar_p, /) -> llama_grammar_p: + """Copy a grammar.""" + ... + + +# // +# // Sampling functions +# // + + +# // Sets the current rng seed. +# LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed); +@ctypes_function( + "llama_set_rng_seed", + [llama_context_p_ctypes, ctypes.c_uint32], + None, +) +def llama_set_rng_seed(ctx: llama_context_p, seed: Union[ctypes.c_uint32, int], /): + """Sets the current rng seed.""" + ... + + +# /// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. +# /// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. +# LLAMA_API void llama_sample_repetition_penalties( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# const llama_token * last_tokens, +# size_t penalty_last_n, +# float penalty_repeat, +# float penalty_freq, +# float penalty_present); +@ctypes_function( + "llama_sample_repetition_penalties", + [ + llama_context_p_ctypes, + llama_token_data_array_p, + llama_token_p, + ctypes.c_size_t, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ], + None, +) +def llama_sample_repetition_penalties( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + last_tokens_data: CtypesArray[llama_token], + penalty_last_n: Union[ctypes.c_size_t, int], + penalty_repeat: Union[ctypes.c_float, float], + penalty_freq: Union[ctypes.c_float, float], + penalty_present: Union[ctypes.c_float, float], + /, +): + """Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. + Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. + """ + ... + + +# /// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806 +# /// @param logits Logits extracted from the original generation context. +# /// @param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context. +# /// @param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance. +# LLAMA_API void llama_sample_apply_guidance( +# struct llama_context * ctx, +# float * logits, +# float * logits_guidance, +# float scale); +@ctypes_function( + "llama_sample_apply_guidance", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_float), + ctypes.POINTER(ctypes.c_float), + ctypes.c_float, + ], + None, +) +def llama_sample_apply_guidance( + ctx: llama_context_p, + logits: CtypesArray[ctypes.c_float], + logits_guidance: CtypesArray[ctypes.c_float], + scale: Union[ctypes.c_float, float], + /, +): + """Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806""" + ... + + +# /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. +# LLAMA_API void llama_sample_softmax( +# struct llama_context * ctx, +# llama_token_data_array * candidates); +@ctypes_function( + "llama_sample_softmax", + [llama_context_p_ctypes, llama_token_data_array_p], + None, +) +def llama_sample_softmax( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + /, +): + """Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.""" + ... + + +# /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 +# LLAMA_API void llama_sample_top_k( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# int32_t k, +# size_t min_keep); +@ctypes_function( + "llama_sample_top_k", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_int32, ctypes.c_size_t], + None, +) +def llama_sample_top_k( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + k: Union[ctypes.c_int, int], + min_keep: Union[ctypes.c_size_t, int], + /, +): + """Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751""" + ... + + +# /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 +# LLAMA_API void llama_sample_top_p( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float p, +# size_t min_keep); +@ctypes_function( + "llama_sample_top_p", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float, ctypes.c_size_t], + None, +) +def llama_sample_top_p( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + p: Union[ctypes.c_float, float], + min_keep: Union[ctypes.c_size_t, int], + /, +): + """Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751""" + ... + + +# /// @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841 +# LLAMA_API void llama_sample_min_p( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float p, +# size_t min_keep); +@ctypes_function( + "llama_sample_min_p", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float, ctypes.c_size_t], + None, +) +def llama_sample_min_p( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + p: Union[ctypes.c_float, float], + min_keep: Union[ctypes.c_size_t, int], + /, +): + """Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841""" + ... + + +# /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. +# LLAMA_API void llama_sample_tail_free( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float z, +# size_t min_keep); +@ctypes_function( + "llama_sample_tail_free", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float, ctypes.c_size_t], + None, +) +def llama_sample_tail_free( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + z: Union[ctypes.c_float, float], + min_keep: Union[ctypes.c_size_t, int], + /, +): + """Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.""" + ... + + +# /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. +# LLAMA_API void llama_sample_typical( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float p, +# size_t min_keep); +@ctypes_function( + "llama_sample_typical", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float, ctypes.c_size_t], + None, +) +def llama_sample_typical( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + p: Union[ctypes.c_float, float], + min_keep: Union[ctypes.c_size_t, int], + /, +): + """Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.""" + ... + + +# /// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772. +# LLAMA_API void llama_sample_entropy( +# struct llama_context * ctx, +# llama_token_data_array * candidates_p, +# float min_temp, +# float max_temp, +# float exponent_val); +@ctypes_function( + "llama_sample_entropy", + [ + llama_context_p_ctypes, + llama_token_data_array_p, + ctypes.c_float, + ctypes.c_float, + ctypes.c_float, + ], + None, +) +def llama_sample_entropy( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + min_temp: Union[ctypes.c_float, float], + max_temp: Union[ctypes.c_float, float], + exponent_val: Union[ctypes.c_float, float], + /, +): + """Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.""" + ... + + +# LLAMA_API void llama_sample_temp( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float temp); +@ctypes_function( + "llama_sample_temp", + [llama_context_p_ctypes, llama_token_data_array_p, ctypes.c_float], + None, +) +def llama_sample_temp( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + temp: Union[ctypes.c_float, float], + /, +): + """Temperature sampling described in academic paper "Generating Long Sequences with Sparse Transformers" https://arxiv.org/abs/1904.10509 + + Parameters: + candidates: A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + temp: The temperature value to use for the sampling. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + """ + ... + + +# /// @details Apply constraints from grammar +# LLAMA_API void llama_sample_grammar( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# const struct llama_grammar * grammar); +@ctypes_function( + "llama_sample_grammar", + [llama_context_p_ctypes, llama_token_data_array_p, llama_grammar_p], + None, +) +def llama_sample_grammar( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + grammar, # type: llama_grammar_p + /, +): + """Apply constraints from grammar + + Parameters: + candidates: A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + grammar: A grammar object containing the rules and constraints to apply to the generated text. + """ + ... + + +# /// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. +# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. +# /// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. +# /// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. +# /// @param m The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm. +# /// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. +# LLAMA_API llama_token llama_sample_token_mirostat( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float tau, +# float eta, +# int32_t m, +# float * mu); +@ctypes_function( + "llama_sample_token_mirostat", + [ + llama_context_p_ctypes, + llama_token_data_array_p, + ctypes.c_float, + ctypes.c_float, + ctypes.c_int32, + ctypes.POINTER(ctypes.c_float), + ], + llama_token, +) +def llama_sample_token_mirostat( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + tau: Union[ctypes.c_float, float], + eta: Union[ctypes.c_float, float], + m: Union[ctypes.c_int, int], + mu: CtypesPointerOrRef[ctypes.c_float], + /, +) -> int: + """Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. + + Parameters: + candidates: A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + m: The number of tokens considered in the estimation of `s_hat`. This is an arbitrary value that is used to calculate `s_hat`, which in turn helps to calculate the value of `k`. In the paper, they use `m = 100`, but you can experiment with different values to see how it affects the performance of the algorithm. + mu: Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. + """ + ... + + +# /// @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. +# /// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. +# /// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. +# /// @param eta The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. +# /// @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. +# LLAMA_API llama_token llama_sample_token_mirostat_v2( +# struct llama_context * ctx, +# llama_token_data_array * candidates, +# float tau, +# float eta, +# float * mu); +@ctypes_function( + "llama_sample_token_mirostat_v2", + [ + llama_context_p_ctypes, + llama_token_data_array_p, + ctypes.c_float, + ctypes.c_float, + ctypes.POINTER(ctypes.c_float), + ], + llama_token, +) +def llama_sample_token_mirostat_v2( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + tau: Union[ctypes.c_float, float], + eta: Union[ctypes.c_float, float], + mu: CtypesPointerOrRef[ctypes.c_float], + /, +) -> int: + """Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. + + Parameters: + candidates: A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. + tau: The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. + eta: The learning rate used to update `mu` based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause `mu` to be updated more quickly, while a smaller learning rate will result in slower updates. + mu: Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (`2 * tau`) and is updated in the algorithm based on the error between the target and observed surprisal. + """ + ... + + +# /// @details Selects the token with the highest probability. +# /// Does not compute the token probabilities. Use llama_sample_softmax() instead. +# LLAMA_API llama_token llama_sample_token_greedy( +# struct llama_context * ctx, +# llama_token_data_array * candidates); +@ctypes_function( + "llama_sample_token_greedy", + [llama_context_p_ctypes, llama_token_data_array_p], + llama_token, +) +def llama_sample_token_greedy( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + /, +) -> int: + """Selects the token with the highest probability.""" + ... + + +# /// @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx. +# LLAMA_API llama_token llama_sample_token( +# struct llama_context * ctx, +# llama_token_data_array * candidates); +@ctypes_function( + "llama_sample_token", + [llama_context_p_ctypes, llama_token_data_array_p], + llama_token, +) +def llama_sample_token( + ctx: llama_context_p, + candidates: Union[ + CtypesArray[llama_token_data_array], CtypesPointerOrRef[llama_token_data_array] + ], + /, +) -> int: + """Randomly selects a token from the candidates based on their probabilities.""" + ... + + +# /// @details Accepts the sampled token into the grammar +# LLAMA_API void llama_grammar_accept_token( +# struct llama_context * ctx, +# struct llama_grammar * grammar, +# llama_token token); +@ctypes_function( + "llama_grammar_accept_token", + [llama_context_p_ctypes, llama_grammar_p, llama_token], + None, +) +def llama_grammar_accept_token( + ctx: llama_context_p, grammar: llama_grammar_p, token: Union[llama_token, int], / +) -> None: + """Accepts the sampled token into the grammar""" + ... + + +# // +# // Beam search +# // + +# struct llama_beam_view { +# const llama_token * tokens; + + +# size_t n_tokens; +# float p; // Cumulative beam probability (renormalized relative to all beams) +# bool eob; // Callback should set this to true when a beam is at end-of-beam. +# }; +class llama_beam_view(ctypes.Structure): + if TYPE_CHECKING: + tokens: CtypesArray[llama_token] + n_tokens: int + p: float + eob: bool + + _fields_ = [ + ("tokens", llama_token_p), + ("n_tokens", ctypes.c_size_t), + ("p", ctypes.c_float), + ("eob", ctypes.c_bool), + ] + + +# // Passed to beam_search_callback function. +# // Whenever 0 < common_prefix_length, this number of tokens should be copied from any of the beams +# // (e.g. beams[0]) as they will be removed (shifted) from all beams in all subsequent callbacks. +# // These pointers are valid only during the synchronous callback, so should not be saved. +# struct llama_beams_state { +# struct llama_beam_view * beam_views; +# size_t n_beams; // Number of elements in beam_views[]. +# size_t common_prefix_length; // Current max length of prefix tokens shared by all beams. +# bool last_call; // True iff this is the last callback invocation. +# }; +class llama_beams_state(ctypes.Structure): + if TYPE_CHECKING: + beam_views: CtypesArray[llama_beam_view] + n_beams: int + common_prefix_length: int + last_call: bool + + _fields_ = [ + ("beam_views", ctypes.POINTER(llama_beam_view)), + ("n_beams", ctypes.c_size_t), + ("common_prefix_length", ctypes.c_size_t), + ("last_call", ctypes.c_bool), + ] + + +# // Type of pointer to the beam_search_callback function. +# // void* callback_data is any custom data passed to llama_beam_search, that is subsequently +# // passed back to beam_search_callback. This avoids having to use global variables in the callback. +# typedef void (*llama_beam_search_callback_fn_t)(void * callback_data, struct llama_beams_state); +llama_beam_search_callback_fn_t = ctypes.CFUNCTYPE( + None, ctypes.c_void_p, llama_beams_state +) + + +# /// @details Deterministically returns entire sentence constructed by a beam search. +# /// @param ctx Pointer to the llama_context. +# /// @param callback Invoked for each iteration of the beam_search loop, passing in beams_state. +# /// @param callback_data A pointer that is simply passed back to callback. +# /// @param n_beams Number of beams to use. +# /// @param n_past Number of tokens already evaluated. +# /// @param n_predict Maximum number of tokens to predict. EOS may occur earlier. +# /// @param n_threads Number of threads as passed to llama_eval(). +# LLAMA_API void llama_beam_search( +# struct llama_context * ctx, +# llama_beam_search_callback_fn_t callback, +# void * callback_data, +# size_t n_beams, +# int32_t n_past, +# int32_t n_predict); +@ctypes_function( + "llama_beam_search", + [ + llama_context_p_ctypes, + llama_beam_search_callback_fn_t, + ctypes.c_void_p, + ctypes.c_size_t, + ctypes.c_int32, + ctypes.c_int32, + ], + None, +) +def llama_beam_search( + ctx: llama_context_p, + callback: CtypesFuncPointer, + callback_data: ctypes.c_void_p, + n_beams: Union[ctypes.c_size_t, int], + n_past: Union[ctypes.c_int, int], + n_predict: Union[ctypes.c_int, int], + /, +): ... + + +# /// @details Build a split GGUF final path for this chunk. +# /// llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf" +# // Returns the split_path length. +# LLAMA_API int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix, int split_no, int split_count); +@ctypes_function( + "llama_split_path", + [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_char_p, ctypes.c_int, ctypes.c_int], + ctypes.c_int, +) +def llama_split_path( + split_path: bytes, + maxlen: Union[ctypes.c_size_t, int], + path_prefix: bytes, + split_no: Union[ctypes.c_int, int], + split_count: Union[ctypes.c_int, int], + /, +) -> int: + """Build a split GGUF final path for this chunk.""" + ... + + +# /// @details Extract the path prefix from the split_path if and only if the split_no and split_count match. +# /// llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0" +# // Returns the split_prefix length. +# LLAMA_API int llama_split_prefix(char * split_prefix, size_t maxlen, const char * split_path, int split_no, int split_count); +@ctypes_function( + "llama_split_prefix", + [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_char_p, ctypes.c_int, ctypes.c_int], + ctypes.c_int, +) +def llama_split_prefix( + split_prefix: bytes, + maxlen: Union[ctypes.c_size_t, int], + split_path: bytes, + split_no: Union[ctypes.c_int, int], + split_count: Union[ctypes.c_int, int], + /, +) -> int: + """Extract the path prefix from the split_path if and only if the split_no and split_count match.""" + ... + + +# Performance information + + +# LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx); +@ctypes_function( + "llama_get_timings", + [llama_context_p_ctypes], + llama_timings, +) +def llama_get_timings(ctx: llama_context_p, /) -> llama_timings: + """Get performance information""" + ... + + +# LLAMA_API void llama_print_timings(struct llama_context * ctx); +@ctypes_function( + "llama_print_timings", + [llama_context_p_ctypes], + None, +) +def llama_print_timings(ctx: llama_context_p, /): + """Print performance information""" + ... + + +# LLAMA_API void llama_reset_timings(struct llama_context * ctx); +@ctypes_function( + "llama_reset_timings", + [llama_context_p_ctypes], + None, +) +def llama_reset_timings(ctx: llama_context_p, /): + """Reset performance information""" + ... + + +# Print system information +# LLAMA_API const char * llama_print_system_info(void); +@ctypes_function( + "llama_print_system_info", + [], + ctypes.c_char_p, +) +def llama_print_system_info() -> bytes: + """Print system information""" + ... + + +# NOTE: THIS IS CURRENTLY BROKEN AS ggml_log_callback IS NOT EXPOSED IN LLAMA.H +# // Set callback for all future logging events. +# // If this is not called, or NULL is supplied, everything is output on stderr. +# LLAMA_API void llama_log_set(ggml_log_callback log_callback, void * user_data); +@ctypes_function( + "llama_log_set", + [ctypes.c_void_p, ctypes.c_void_p], + None, +) +def llama_log_set( + log_callback: Optional[CtypesFuncPointer], + user_data: ctypes.c_void_p, + /, +): + """Set callback for all future logging events. + + If this is not called, or NULL is supplied, everything is output on stderr.""" + ... + + +# LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx); +@ctypes_function( + "llama_dump_timing_info_yaml", + [ctypes.c_void_p, llama_context_p_ctypes], + None, +) +def llama_dump_timing_info_yaml(stream: ctypes.c_void_p, ctx: llama_context_p, /): ... diff --git a/llama-cpp-python/llama_cpp/llama_grammar.py b/llama-cpp-python/llama_cpp/llama_grammar.py new file mode 100644 index 0000000000000000000000000000000000000000..d9a3823b4f83b89068121563bb3cc24bec77bfe2 --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_grammar.py @@ -0,0 +1,1946 @@ +"""Python implementation of llama grammar parser directly translated from C++ source file in vendor/llama.cpp/common/grammar-parser.cpp.""" + +# flake8: noqa +from pathlib import Path +import sys +from ctypes import * # type: ignore +from enum import Enum +from itertools import islice, groupby +from typing import ( + Any, + Callable, + Dict, + Set, + Generic, + List, + Optional, + OrderedDict, + TextIO, + Tuple, + TypeVar, + Union, + overload, +) + +import llama_cpp.llama_cpp as llama_cpp + +# Type aliases +llama_grammar_element = llama_cpp.llama_grammar_element +llama_grammar_element_p = llama_cpp.llama_grammar_element_p +llama_grammar_p = llama_cpp.llama_grammar_p + +# Type variables +Ptr = TypeVar("Ptr", bound="const_char_p") +T = TypeVar("T") +U = TypeVar("U") +V = TypeVar("V") +W = TypeVar("W") + + +class Sentinel: + """Used to mark the end of a iterator of std::vector & std::map.""" + + +class LlamaGrammar: + """Keeps reference counts of all the arguments, so that they are not + garbage collected by Python.""" + + def __del__(self) -> None: + """Free the grammar pointer when the object is deleted.""" + if self.grammar is not None: + llama_cpp.llama_grammar_free(self.grammar) + self.grammar = None + + def __init__( + self, + parsed_grammar: "parse_state", + ) -> None: + """Initialize the grammar pointer from the parsed state.""" + self._grammar_rules = ( + parsed_grammar.c_rules() + ) # type: std.vector[std.vector[LlamaGrammarElement]] + self._n_rules = self._grammar_rules.size() # type: int + self._start_rule_index = parsed_grammar.symbol_ids.at("root") # type: int + self.init() + + @classmethod + def from_string(cls, grammar: str, verbose: bool = True) -> "LlamaGrammar": + """Convert a GBNF grammar to a Llama grammar.""" + parsed_grammar = parse(const_char_p(grammar)) # type: parse_state + if parsed_grammar.rules.empty(): + raise ValueError( + f"{cls.from_string.__name__}: error parsing grammar file: parsed_grammar.rules is empty" + ) + if verbose: + print(f"{cls.from_string.__name__} grammar:", file=sys.stderr) + print_grammar(sys.stderr, parsed_grammar) + print(file=sys.stderr) + return cls(parsed_grammar) + + @classmethod + def from_json_schema( + cls, + json_schema: str, + verbose: bool = True, + ) -> "LlamaGrammar": + """Convert a JSON schema to a Llama grammar.""" + return cls.from_string(json_schema_to_gbnf(json_schema), verbose=verbose) + + @classmethod + def from_file(cls, file: Union[str, Path], verbose: bool = True) -> "LlamaGrammar": + try: + with open(file) as f: + grammar = f.read() + except Exception as err: + raise Exception( + f"{cls.from_file.__name__}: error reading grammar file: {err}" + ) + + if grammar: + return cls.from_string(grammar, verbose=verbose) + + raise ValueError( + f"{cls.from_file.__name__}: error parsing grammar file: params_grammer is empty" + ) + + def init(self) -> None: + # Step 1: Convert LlamaGrammarElement to llama_grammar_element + self._element_lists = [ + [ + llama_grammar_element(c_int(elem.type.value), c_uint32(elem.value)) + for elem in subvector + ] + for subvector in self._grammar_rules + ] # type: List[List[llama_grammar_element]] + + # Step 2: Convert each list to llama_grammar_element array and get pointer + self._element_arrays = [ + (llama_grammar_element * len(sublist))(*sublist) + for sublist in self._element_lists + ] # type: List[Array[llama_grammar_element]] + + # Step 3: Get pointer of each array + self._element_array_pointers = [ + cast(subarray, llama_grammar_element_p) for subarray in self._element_arrays + ] # type: List[llama_grammar_element_p] + + # Step 4: Make array of these pointers and get its pointer + self._rules = (llama_grammar_element_p * len(self._element_array_pointers))( + *self._element_array_pointers + ) + self.grammar = llama_cpp.llama_grammar_init( + self._rules, c_size_t(self._n_rules), c_size_t(self._start_rule_index) + ) + + def reset(self) -> None: + if self.grammar is not None: + llama_cpp.llama_grammar_free(self.grammar) + self.init() + + +class LlamaGrammarElement: + def __init__(self, type: "llama_gretype", value: int): + self.type = type + self.value = value # Unicode code point or rule ID + + +class const_char_p: + """C++ implementation of const char *.""" + + def __init__(self, value: Union[str, Ptr], move: Optional[int] = None): + if isinstance(value, const_char_p): + # We're copying an existing const_char_p + self.value = value.value + self.pos = value.pos + (move or 0) + return + + # We're creating a new const_char_p + self.value = value + self.pos = move or 0 + + def __str__(self) -> str: + assert self.value is not None, "null pointer" + return self.value[self.pos :] + + def __getitem__(self, index: int) -> str: + value = str(self) + return value[index] if index < len(value) else "" + + @overload + def __add__(self: Ptr, other: int) -> Ptr: + ... + + @overload + def __add__(self: Ptr, other: Ptr) -> int: + ... + + def __add__(self: Ptr, other: Union[int, Ptr]) -> Union[int, Ptr]: + return ( + self.__class__(self.value, self.pos + other) + if isinstance(other, int) + else self.pos + other.pos + ) + + @overload + def __sub__(self: Ptr, other: int) -> Ptr: + ... + + @overload + def __sub__(self: Ptr, other: Ptr) -> int: + ... + + def __sub__(self: Ptr, other: Union[int, Ptr]) -> Union[int, Ptr]: + return ( + self.__class__(self.value, self.pos - other) + if isinstance(other, int) + else self.pos - other.pos + ) + + def __eq__(self: Ptr, other: Ptr) -> bool: + assert self.value == other.value, "comparing pointers from different strings" + return self.pos == other.pos + + def __lt__(self: Ptr, other: Ptr) -> bool: + assert self.value == other.value, "comparing pointers from different strings" + return self.pos < other.pos + + def __gt__(self: Ptr, other: Ptr) -> bool: + assert self.value == other.value, "comparing pointers from different strings" + return self.pos > other.pos + + +class std: + @staticmethod + def string(ptr: const_char_p, length: Optional[int] = None) -> str: + """C++ implementation of std::string constructor.""" + value = str(ptr) + if length is not None: + value = value[:length] + return value + + class vector(Generic[T], List[T]): + """C++ implementation of std::vector.""" + + class iterator: + def __init__(self, vector: "std.vector[T]", index: int): + self._vector = vector + self._index = index + self._version = vector._version + + def _check_version(self): + if self._version != self._vector._version: + raise RuntimeError("Iterator used after vector was modified.") + + def __iter__(self): + return self + + def __next__(self) -> T: + self._check_version() + if self._index >= self._vector.size(): + raise StopIteration + value = self._vector[self._index] + self._index += 1 + return value + + def __add__(self, value: int) -> "std.vector[T].iterator": + return self.__class__(self._vector, self._index + value) + + def __sub__(self, value: int) -> "std.vector[T].iterator": + return self.__class__(self._vector, self._index - value) + + def __init__(self): + self._version = 0 + + def modify(self): + # This is a bit of a hack to make sure iterators are invalidated + self._version += 1 + + def push_back(self, value: T) -> None: + self.modify() + self.append(value) + + def pop_back(self) -> None: + self.modify() + if not self.empty(): + self.pop() + + def back(self) -> T: + return self[-1] + + def size(self) -> int: + return len(self) + + def clear(self) -> None: + self.modify() + super().clear() + + def empty(self) -> bool: + return self.size() == 0 + + def data(self) -> "std.vector[T]": + return self + + def resize( + self, + new_size: int, + fill_value_factory: Optional[Callable[[], T]] = None, + ) -> None: + if new_size > self.size(): + if fill_value_factory is None: + raise ValueError("A fill value factory function must be provided.") + self.reserve(new_size, fill_value_factory) + elif new_size < self.size(): + self[:] = self[:new_size] + + def reserve(self, capacity: int, fill_value_factory: Callable[[], T]) -> None: + if capacity > self.size(): + fill_value = fill_value_factory() + self.extend([fill_value] * (capacity - self.size())) + + def front(self) -> T: + if not self.empty(): + return self[0] + else: + raise IndexError("Vector is empty.") + + def assign(self, count: int, value: T) -> None: + self.clear() + self.extend([value] * count) + + def insert( + self, + pos: "std.vector[T].iterator", + first: "std.vector[T].iterator", + last: "std.vector[T].iterator", + ) -> None: + self[pos._index : pos._index] = list( + islice(first._vector, first._index, last._index) + ) + + def begin(self) -> "std.vector[T].iterator": + return self.iterator(self, 0) + + def end(self) -> "std.vector[T].iterator": + return self.iterator(self, self.size()) + + class map(Generic[T, U], OrderedDict[T, U]): + """C++ implementation of std::map.""" + + class iterator(Generic[V, W]): + def __init__(self, _map: "std.map[T, U]", key: Union[T, Sentinel]): + self._map = _map + self.iter = iter(_map) + self.key = key + self._advance() + + def _sanitize_key(self) -> T: + if isinstance(self.key, Sentinel): + raise StopIteration + return self.key + + def _advance(self) -> None: + try: + while next(self.iter) != self.key: + pass + except StopIteration: + self.key = Sentinel() + + def __next__(self) -> Tuple[T, U]: + key = self._sanitize_key() + if key in self._map: + value = self._map[key] + self._advance() + return key, value + else: + raise StopIteration + + def get(self) -> Tuple[T, U]: + key = self._sanitize_key() + return key, self._map[key] + + @property + def first(self) -> T: + return self._sanitize_key() + + @property + def second(self) -> U: + return self._map[self._sanitize_key()] + + def insert( + self, key: T, value: U + ) -> Tuple["std.map[T, U].iterator[T, U]", bool]: + if key in self: + return self.iterator(self, key), False + else: + self[key] = value + return self.iterator(self, key), True + + def find(self, key: T) -> "std.map[T, U].iterator[T, U]": + if key in self: + return self.iterator(self, key) + else: + return self.end() + + def at(self, key: T) -> U: + if key in self: + return self[key] + else: + raise KeyError("The provided key is not found in the map.") + + def erase(self, iterator: "std.map[T, U].iterator[T, U]") -> None: + key = iterator.first + if key in self: + del self[key] + + def size(self) -> int: + return len(self) + + def empty(self) -> bool: + return self.size() == 0 + + def lower_bound(self, key: T) -> "std.map[T, U].iterator[T, U]": + try: + keys = sorted(list(self.keys())) # type: ignore + for k in keys: + if k >= key: + return self.iterator(self, k) + raise ValueError("No key found that is not less than the input key") + except TypeError: + raise TypeError("Keys of type T cannot be sorted.") + + def begin(self) -> "std.map[T, U].iterator[T, U]": + return self.iterator(self, next(iter(self))) + + def end(self) -> "std.map[T, U].iterator[T, U]": + return self.iterator(self, Sentinel()) + + +# // grammar element type +# enum llama_gretype { +# // end of rule definition +# LLAMA_GRETYPE_END = 0, + +# // start of alternate definition for rule +# LLAMA_GRETYPE_ALT = 1, + +# // non-terminal element: reference to rule +# LLAMA_GRETYPE_RULE_REF = 2, + +# // terminal element: character (code point) +# LLAMA_GRETYPE_CHAR = 3, + +# // inverse char(s) ([^a], [^a-b] [^abc]) +# LLAMA_GRETYPE_CHAR_NOT = 4, + +# // modifies a preceding LLAMA_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_ALT to +# // be an inclusive range ([a-z]) +# LLAMA_GRETYPE_CHAR_RNG_UPPER = 5, + + +# // modifies a preceding LLAMA_GRETYPE_CHAR or +# // LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA]) +# LLAMA_GRETYPE_CHAR_ALT = 6, +# }; +class llama_gretype(Enum): + """grammar element type""" + + LLAMA_GRETYPE_END = 0 # end of rule definition + LLAMA_GRETYPE_ALT = 1 # start of alternate definition for rule + LLAMA_GRETYPE_RULE_REF = 2 # non-terminal element: reference to rule + LLAMA_GRETYPE_CHAR = 3 # terminal element: character (code point) + LLAMA_GRETYPE_CHAR_NOT = 4 # inverse char(s) ([^a], [^a-b] [^abc]) + LLAMA_GRETYPE_CHAR_RNG_UPPER = 5 # modifies a preceding LLAMA_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_ALT to be an inclusive range ([a-z]) + LLAMA_GRETYPE_CHAR_ALT = 6 # modifies a preceding LLAMA_GRETYPE_CHAR or LLAMA_GRETYPE_CHAR_RNG_UPPER to add an alternate char to match ([ab], [a-zA]) + + +# struct parse_state { +# std::map symbol_ids; +# std::vector> rules; +# std::vector c_rules(); +# }; +class parse_state: + def __init__(self): + self.symbol_ids: std.map[str, int] = std.map() + self.rules: std.vector[std.vector[LlamaGrammarElement]] = std.vector() + + # std::vector parse_state::c_rules() { + # std::vector ret; + # for (const auto & rule : rules) { + # ret.push_back(rule.data()); + # } + # return ret; + # } + def c_rules(self) -> std.vector[std.vector[LlamaGrammarElement]]: + ret = std.vector() # type: std.vector[std.vector[LlamaGrammarElement]] + for rule in self.rules: + ret.push_back(rule.data()) + return ret + + def __repr__(self) -> str: + return ( + f"parse_state(symbol_ids={len(self.symbol_ids)}, rules={len(self.rules)})" + ) + + +# struct llama_grammar { +# const std::vector> rules; +# std::vector> stacks; +# }; +# class llama_grammar: +# def __init__( +# self, +# rules: std.vector[std.vector[llama_grammar_element]], +# stacks: std.vector[std.vector[llama_grammar_element]], +# ): +# self.rules = rules +# self.stacks = stacks + + +# uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) { +# uint32_t next_id = static_cast(state.symbol_ids.size()); +# auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id)); +# return result.first->second; +# } +def get_symbol_id(state: parse_state, src: const_char_p, len: int) -> int: + next_id = state.symbol_ids.size() # type: int + result = state.symbol_ids.insert(std.string(src, len), next_id) + return result[0].second # type: ignore + + +# uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) { +# uint32_t next_id = static_cast(state.symbol_ids.size()); +# state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id; +# return next_id; +# } +def generate_symbol_id(state: parse_state, base_name: str) -> int: + next_id = state.symbol_ids.size() # type: int + state.symbol_ids[base_name + "_" + str(next_id)] = next_id + return next_id + + +# void add_rule( +# parse_state & state, +# uint32_t rule_id, +# const std::vector & rule) { +# if (state.rules.size() <= rule_id) { +# state.rules.resize(rule_id + 1); +# } +# state.rules[rule_id] = rule; +# } +def add_rule( + state: parse_state, + rule_id: int, + rule: std.vector[LlamaGrammarElement], +) -> None: + if state.rules.size() <= rule_id: + state.rules.resize( + rule_id + 1, + fill_value_factory=std.vector[LlamaGrammarElement], + ) + state.rules[rule_id] = rule + + +# std::pair decode_utf8(const char * src) { +# static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; +# uint8_t first_byte = static_cast(*src); +# uint8_t highbits = first_byte >> 4; +# int len = lookup[highbits]; +# uint8_t mask = (1 << (8 - len)) - 1; +# uint32_t value = first_byte & mask; +# const char * end = src + len; // may overrun! +# const char * pos = src + 1; +# for ( ; pos < end && *pos; pos++) { +# value = (value << 6) + (static_cast(*pos) & 0x3F); +# } +# return std::make_pair(value, pos); +# } +def decode_utf8(src: const_char_p) -> Tuple[int, const_char_p]: + """Decodes a UTF-8 character from the source string.""" + # Get the codepoint of the first character + value = ord(src[0]) + # Move the pointer ahead one character + pos = src + 1 + + return value, pos + + +# bool is_word_char(char c) { +# return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9'); +# } +def is_word_char(c: str) -> bool: + return ("a" <= c <= "z") or ("A" <= c <= "Z") or c == "-" or ("0" <= c <= "9") + + +# std::pair parse_hex(const char * src, int size) { +# const char * pos = src; +# const char * end = src + size; +# uint32_t value = 0; +# for ( ; pos < end && *pos; pos++) { +# value <<= 4; +# char c = *pos; +# if ('a' <= c && c <= 'f') { +# value += c - 'a' + 10; +# } else if ('A' <= c && c <= 'F') { +# value += c - 'A' + 10; +# } else if ('0' <= c && c <= '9') { +# value += c - '0'; +# } else { +# break; +# } +# } +# if (pos != end) { +# throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src); +# } +# return std::make_pair(value, pos); +# } +def parse_hex(src: const_char_p, size: int) -> Tuple[int, const_char_p]: + pos = const_char_p(src) # type: const_char_p + end = src + size # type: const_char_p + value = 0 # type: int + while pos < end and pos[0]: + value <<= 4 + c = pos[0] # type: str + if "a" <= c <= "f": + value += ord(c) - ord("a") + 10 + elif "A" <= c <= "F": + value += ord(c) - ord("A") + 10 + elif "0" <= c <= "9": + value += ord(c) - ord("0") + else: + break + pos += 1 + if pos != end: + raise RuntimeError("expecting " + str(size) + " hex chars at " + str(src)) + return (value, pos) + + +# std::pair parse_char(const char * src) { +# if (*src == '\\') { +# switch (src[1]) { +# case 'x': return parse_hex(src + 2, 2); +# case 'u': return parse_hex(src + 2, 4); +# case 'U': return parse_hex(src + 2, 8); +# case 't': return std::make_pair('\t', src + 2); +# case 'r': return std::make_pair('\r', src + 2); +# case 'n': return std::make_pair('\n', src + 2); +# case '\\': +# case '"': +# case '[': +# case ']': +# return std::make_pair(src[1], src + 2); +# default: +# throw std::runtime_error(std::string("unknown escape at ") + src); +# } +# } else if (*src) { +# return decode_utf8(src); +# } +# throw std::runtime_error("unexpected end of input"); +# } +def parse_char(src: const_char_p) -> Tuple[int, const_char_p]: + if src[0] == "\\": + case = src[1] # type: str + if case == "x": + return parse_hex(src + 2, 2) + elif case == "u": + return parse_hex(src + 2, 4) + elif case == "U": + return parse_hex(src + 2, 8) + elif case == "t": + return (ord("\t"), src + 2) # implicit cast + elif case == "r": + return (ord("\r"), src + 2) # implicit cast + elif case == "n": + return (ord("\n"), src + 2) # implicit cast + elif case in ("\\", '"', "[", "]"): + return (ord(case), src + 2) # implicit cast + else: + raise RuntimeError("unknown escape at " + str(src)) + elif src[0]: + return decode_utf8(src) + else: + raise RuntimeError("unexpected end of input") + + +# const char * parse_name(const char * src) { +# const char * pos = src; +# while (is_word_char(*pos)) { +# pos++; +# } +# if (pos == src) { +# throw std::runtime_error(std::string("expecting name at ") + src); +# } +# return pos; +# } +def parse_name(src: const_char_p) -> const_char_p: + pos = const_char_p(src) # type: const_char_p + while is_word_char(pos[0]): + pos += 1 + if pos == src: + raise RuntimeError("expecting name at " + str(src)) + return pos + + +# const char * parse_space(const char * src, bool newline_ok) { +# const char * pos = src; +# while (*pos == ' ' || *pos == '\t' || *pos == '#' || +# (newline_ok && (*pos == '\r' || *pos == '\n'))) { +# if (*pos == '#') { +# while (*pos && *pos != '\r' && *pos != '\n') { +# pos++; +# } +# } else { +# pos++; +# } +# } +# return pos; +# } +def parse_space(src: const_char_p, newline_ok: bool) -> const_char_p: + pos = const_char_p(src) # type: const_char_p + while pos[0] in (" ", "\t", "#") or (newline_ok and pos[0] in ("\r", "\n")): + if pos[0] == "#": + while pos[0] is not None and pos[0] not in ("\r", "\n"): + pos += 1 + else: + pos += 1 + return pos + + +# const char * parse_sequence( +# parse_state & state, +# const char * src, +# const std::string & rule_name, +# std::vector & out_elements, +# bool is_nested) { +def parse_sequence( + state: parse_state, + src: const_char_p, + rule_name: str, + out_elements: std.vector[LlamaGrammarElement], + is_nested: bool, +) -> const_char_p: + # size_t last_sym_start = out_elements.size(); + # const char * pos = src; + last_sym_start = out_elements.size() # type: int + pos = const_char_p(src) # type: const_char_p + # while (*pos) { + while pos[0]: + # if (*pos == '"') { // literal string + # pos++; + # last_sym_start = out_elements.size(); + # while (*pos != '"') { + # auto char_pair = parse_char(pos); + # pos = char_pair.second; + # out_elements.push_back({LLAMA_GRETYPE_CHAR, char_pair.first}); + # } + # pos = parse_space(pos + 1, is_nested); + if pos[0] == '"': # literal string + pos += 1 + last_sym_start = out_elements.size() + while pos[0] != '"': + char_pair = parse_char(pos) # type: Tuple[int, const_char_p] + pos = char_pair[1] + out_elements.push_back( + LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_CHAR, char_pair[0]) + ) + pos = parse_space(pos + 1, is_nested) + # } else if (*pos == '[') { // char range(s) + # pos++; + # enum llama_gretype start_type = LLAMA_GRETYPE_CHAR; + elif pos[0] == "[": # char range(s) + pos += 1 + start_type = llama_gretype.LLAMA_GRETYPE_CHAR # type: llama_gretype + # if (*pos == '^') { + # pos++; + # start_type = LLAMA_GRETYPE_CHAR_NOT; + # } + # last_sym_start = out_elements.size(); + if pos[0] == "^": + pos += 1 + start_type = llama_gretype.LLAMA_GRETYPE_CHAR_NOT + last_sym_start = out_elements.size() + # while (*pos != ']') { + # auto char_pair = parse_char(pos); + # pos = char_pair.second; + # enum llama_gretype type = last_sym_start < out_elements.size() + # ? LLAMA_GRETYPE_CHAR_ALT + # : start_type; + # out_elements.push_back({type, char_pair.first}); + while pos[0] != "]": + char_pair = parse_char(pos) # type: Tuple[int, const_char_p] + pos = char_pair[1] + type = ( + llama_gretype.LLAMA_GRETYPE_CHAR_ALT + if last_sym_start < out_elements.size() + else start_type + ) # type: llama_gretype + out_elements.push_back(LlamaGrammarElement(type, char_pair[0])) + # if (pos[0] == '-' && pos[1] != ']') { + # auto endchar_pair = parse_char(pos + 1); + # pos = endchar_pair.second; + # out_elements.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first}); + # } + # } + if pos[0] == "-" and pos[1] != "]": + endchar_pair = parse_char(pos + 1) # type: Tuple[int, const_char_p] + pos = endchar_pair[1] + out_elements.push_back( + LlamaGrammarElement( + llama_gretype.LLAMA_GRETYPE_CHAR_RNG_UPPER, + endchar_pair[0], + ) + ) + # pos = parse_space(pos + 1, is_nested); + pos = parse_space(pos + 1, is_nested) + # } else if (is_word_char(*pos)) { // rule reference + # const char * name_end = parse_name(pos); + # uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos); + # pos = parse_space(name_end, is_nested); + # last_sym_start = out_elements.size(); + # out_elements.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id}); + elif is_word_char(pos[0]): # rule reference + name_end = parse_name(pos) # type: const_char_p + ref_rule_id = get_symbol_id(state, pos, name_end - pos) # type: int + pos = parse_space(name_end, is_nested) + last_sym_start = out_elements.size() + out_elements.push_back( + LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_RULE_REF, ref_rule_id) + ) + # } else if (*pos == '(') { // grouping + # // parse nested alternates into synthesized rule + # pos = parse_space(pos + 1, true); + # uint32_t sub_rule_id = generate_symbol_id(state, rule_name); + # pos = parse_alternates(state, pos, rule_name, sub_rule_id, true); + # last_sym_start = out_elements.size(); + # // output reference to synthesized rule + # out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + # if (*pos != ')') { + # throw std::runtime_error(std::string("expecting ')' at ") + pos); + # } + # pos = parse_space(pos + 1, is_nested); + elif pos[0] == "(": # grouping + # parse nested alternates into synthesized rule + pos = parse_space(pos + 1, True) + sub_rule_id = generate_symbol_id(state, rule_name) # type: int + pos = parse_alternates(state, pos, rule_name, sub_rule_id, True) + last_sym_start = out_elements.size() + # output reference to synthesized rule + out_elements.push_back( + LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_RULE_REF, sub_rule_id) + ) + if pos[0] != ")": + raise RuntimeError("expecting ')' at " + str(pos)) + pos = parse_space(pos + 1, is_nested) + # } else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator + # if (last_sym_start == out_elements.size()) { + # throw std::runtime_error(std::string("expecting preceeding item to */+/? at ") + pos); + # } + elif pos[0] in ("*", "+", "?"): # repetition operator + if last_sym_start == out_elements.size(): + raise RuntimeError("expecting preceding item to */+/? at " + str(pos)) + # // apply transformation to previous symbol (last_sym_start to end) according to + # // rewrite rules: + # // S* --> S' ::= S S' | + # // S+ --> S' ::= S S' | S + # // S? --> S' ::= S | + # uint32_t sub_rule_id = generate_symbol_id(state, rule_name); + # std::vector sub_rule; + # // add preceding symbol to generated rule + # sub_rule.insert( + # sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end()); + sub_rule_id = generate_symbol_id(state, rule_name) # type: int + sub_rule = std.vector[ + LlamaGrammarElement + ]() # type: std.vector[LlamaGrammarElement] + sub_rule.insert( + sub_rule.end(), + out_elements.begin() + last_sym_start, + out_elements.end(), + ) + # if (*pos == '*' || *pos == '+') { + # // cause generated rule to recurse + # sub_rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + # } + # // mark start of alternate def + # sub_rule.push_back({LLAMA_GRETYPE_ALT, 0}); + if pos[0] in ("*", "+"): + sub_rule.push_back( + LlamaGrammarElement( + llama_gretype.LLAMA_GRETYPE_RULE_REF, sub_rule_id + ) + ) + sub_rule.push_back(LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_ALT, 0)) + # if (*pos == '+') { + # // add preceding symbol as alternate only for '+' (otherwise empty) + # sub_rule.insert( + # sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end()); + # } + # sub_rule.push_back({LLAMA_GRETYPE_END, 0}); + # add_rule(state, sub_rule_id, sub_rule); + # // in original rule, replace previous symbol with reference to generated rule + # out_elements.resize(last_sym_start); + # out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + # pos = parse_space(pos + 1, is_nested); + if pos[0] == "+": + # add preceding symbol as alternate only for '+' (otherwise empty) + sub_rule.insert( + sub_rule.end(), + out_elements.begin() + last_sym_start, + out_elements.end(), + ) + sub_rule.push_back(LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_END, 0)) + add_rule(state, sub_rule_id, sub_rule) + # in original rule, replace previous symbol with reference to generated rule + out_elements.resize(last_sym_start) + out_elements.push_back( + LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_RULE_REF, sub_rule_id) + ) + pos = parse_space(pos + 1, is_nested) + # } else { + # break; + # } + else: + break + # } + # return pos; + # } + return pos + + +# const char * parse_alternates( +# parse_state & state, +# const char * src, +# const std::string & rule_name, +# uint32_t rule_id, +# bool is_nested) { +# std::vector rule; +# const char * pos = parse_sequence(state, src, rule_name, rule, is_nested); +# while (*pos == '|') { +# rule.push_back({LLAMA_GRETYPE_ALT, 0}); +# pos = parse_space(pos + 1, true); +# pos = parse_sequence(state, pos, rule_name, rule, is_nested); +# } +# rule.push_back({LLAMA_GRETYPE_END, 0}); +# add_rule(state, rule_id, rule); +# return pos; +# } +def parse_alternates( + state: parse_state, + src: const_char_p, + rule_name: str, + rule_id: int, + is_nested: bool, +) -> const_char_p: + rule = std.vector() # type: std.vector[LlamaGrammarElement] + pos = parse_sequence(state, src, rule_name, rule, is_nested) # type: const_char_p + while pos[0] == "|": + rule.push_back(LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_ALT, 0)) + pos = parse_space(pos + 1, True) + pos = parse_sequence(state, pos, rule_name, rule, is_nested) + rule.push_back(LlamaGrammarElement(llama_gretype.LLAMA_GRETYPE_END, 0)) + add_rule(state, rule_id, rule) + return pos + + +# const char * parse_rule(parse_state & state, const char * src) { +# const char * name_end = parse_name(src); +# const char * pos = parse_space(name_end, false); +# size_t name_len = name_end - src; +# uint32_t rule_id = get_symbol_id(state, src, name_len); +# const std::string name(src, name_len); + +# if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) { +# throw std::runtime_error(std::string("expecting ::= at ") + pos); +# } +# pos = parse_space(pos + 3, true); + +# pos = parse_alternates(state, pos, name, rule_id, false); + + +# if (*pos == '\r') { +# pos += pos[1] == '\n' ? 2 : 1; +# } else if (*pos == '\n') { +# pos++; +# } else if (*pos) { +# throw std::runtime_error(std::string("expecting newline or end at ") + pos); +# } +# return parse_space(pos, true); +# } +def parse_rule(state: parse_state, src: const_char_p) -> const_char_p: + name_end = parse_name(src) # type: const_char_p + pos = parse_space(name_end, False) # type: const_char_p + name_len = name_end - src # type: int + rule_id = get_symbol_id(state, src, name_len) # type: int + name = std.string(src, name_len) # type: str + + if not (pos[0] == ":" and pos[1] == ":" and pos[2] == "="): + raise RuntimeError("expecting ::= at " + str(pos)) + + pos = parse_space(pos + 3, True) # type: const_char_p + pos = parse_alternates(state, pos, name, rule_id, False) # type: const_char_p + + if pos[0] == "\r": + pos += 2 if pos[1] == "\n" else 1 + elif pos[0] == "\n": + pos += 1 + elif pos[0]: + raise RuntimeError("expecting newline or end at " + str(pos)) + return parse_space(pos, True) + + +# parse_state parse(const char * src) { +# try { +# parse_state state; +# const char * pos = parse_space(src, true); +# while (*pos) { +# pos = parse_rule(state, pos); +# } +# return state; +# } catch (const std::exception & err) { +# fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); +# return parse_state(); +# } +# } +def parse(src: const_char_p) -> parse_state: + try: + state = parse_state() # type: parse_state + pos = parse_space(src, True) # type: const_char_p + while pos[0]: + pos = parse_rule(state, pos) + return state + except Exception as err: + print(f"{parse.__name__}: error parsing grammar: {err}") + return parse_state() + + +# void print_grammar_char(FILE * file, uint32_t c) { +# if (0x20 <= c && c <= 0x7f) { +# fprintf(file, "%c", static_cast(c)); +# } else { +# // cop out of encoding UTF-8 +# fprintf(file, "", c); +# } +# } +def print_grammar_char(file: TextIO, c: int) -> None: + if 0x20 <= c and c <= 0x7F: + file.write(chr(c)) + else: + # cop out of encoding UTF-8 + file.write(f"") + + +# bool is_char_element(llama_grammar_element elem) { +# switch (elem.type) { +# case LLAMA_GRETYPE_CHAR: return true; +# case LLAMA_GRETYPE_CHAR_NOT: return true; +# case LLAMA_GRETYPE_CHAR_ALT: return true; +# case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true; +# default: return false; +# } +# } +def is_char_element(elem: LlamaGrammarElement) -> bool: + return elem.type in ( + llama_gretype.LLAMA_GRETYPE_CHAR, + llama_gretype.LLAMA_GRETYPE_CHAR_NOT, + llama_gretype.LLAMA_GRETYPE_CHAR_ALT, + llama_gretype.LLAMA_GRETYPE_CHAR_RNG_UPPER, + ) + + +# void print_rule( +# FILE * file, +# uint32_t rule_id, +# const std::vector & rule, +# const std::map & symbol_id_names) { +def print_rule( + file: TextIO, + rule_id: int, + rule: std.vector[LlamaGrammarElement], + symbol_id_names: std.map[int, str], +) -> None: + # if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) { + # throw std::runtime_error( + # "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id)); + # } + # fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str()); + if rule.empty() or rule.back().type != llama_gretype.LLAMA_GRETYPE_END: + raise RuntimeError( + "malformed rule, does not end with LLAMA_GRETYPE_END: " + str(rule_id) + ) + print(f"{symbol_id_names.at(rule_id)} ::=", file=file, end=" ") + # for (size_t i = 0, end = rule.size() - 1; i < end; i++) { + # llama_grammar_element elem = rule[i]; + # switch (elem.type) { + # case LLAMA_GRETYPE_END: + # throw std::runtime_error( + # "unexpected end of rule: " + std::to_string(rule_id) + "," + + # std::to_string(i)); + # case LLAMA_GRETYPE_ALT: + # fprintf(file, "| "); + # break; + # case LLAMA_GRETYPE_RULE_REF: + # fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str()); + # break; + # case LLAMA_GRETYPE_CHAR: + # fprintf(file, "["); + # print_grammar_char(file, elem.value); + # break; + # case LLAMA_GRETYPE_CHAR_NOT: + # fprintf(file, "[^"); + # print_grammar_char(file, elem.value); + # break; + # case LLAMA_GRETYPE_CHAR_RNG_UPPER: + # if (i == 0 || !is_char_element(rule[i - 1])) { + # throw std::runtime_error( + # "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " + + # std::to_string(rule_id) + "," + std::to_string(i)); + # } + # fprintf(file, "-"); + # print_grammar_char(file, elem.value); + # break; + # case LLAMA_GRETYPE_CHAR_ALT: + # if (i == 0 || !is_char_element(rule[i - 1])) { + # throw std::runtime_error( + # "LLAMA_GRETYPE_CHAR_ALT without preceding char: " + + # std::to_string(rule_id) + "," + std::to_string(i)); + # } + # print_grammar_char(file, elem.value); + # break; + # } + for i, elem in enumerate(rule[:-1]): + case = elem.type # type: llama_gretype + if case is llama_gretype.LLAMA_GRETYPE_END: + raise RuntimeError("unexpected end of rule: " + str(rule_id) + "," + str(i)) + elif case is llama_gretype.LLAMA_GRETYPE_ALT: + print("| ", file=file, end="") + elif case is llama_gretype.LLAMA_GRETYPE_RULE_REF: + print(f"{symbol_id_names.at(elem.value)} ", file=file, end="") + elif case is llama_gretype.LLAMA_GRETYPE_CHAR: + print("[", file=file, end="") + print_grammar_char(file, elem.value) + elif case is llama_gretype.LLAMA_GRETYPE_CHAR_NOT: + print("[^", file=file, end="") + print_grammar_char(file, elem.value) + elif case is llama_gretype.LLAMA_GRETYPE_CHAR_RNG_UPPER: + if i == 0 or not is_char_element(rule[i - 1]): + raise RuntimeError( + "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " + + str(rule_id) + + "," + + str(i) + ) + print("-", file=file, end="") + print_grammar_char(file, elem.value) + elif case is llama_gretype.LLAMA_GRETYPE_CHAR_ALT: + if i == 0 or not is_char_element(rule[i - 1]): + raise RuntimeError( + "LLAMA_GRETYPE_CHAR_ALT without preceding char: " + + str(rule_id) + + "," + + str(i) + ) + print_grammar_char(file, elem.value) + # if (is_char_element(elem)) { + # switch (rule[i + 1].type) { + # case LLAMA_GRETYPE_CHAR_ALT: + # case LLAMA_GRETYPE_CHAR_RNG_UPPER: + # break; + # default: + # fprintf(file, "] "); + if is_char_element(elem): + if rule[i + 1].type in ( + llama_gretype.LLAMA_GRETYPE_CHAR_ALT, + llama_gretype.LLAMA_GRETYPE_CHAR_RNG_UPPER, + ): + pass + else: + print("] ", file=file, end="") + # } + # } + # } + # fprintf(file, "\n"); + # } + print(file=file) + + +# void print_grammar(FILE * file, const parse_state & state) { +# try { +# std::map symbol_id_names; +# for (auto kv : state.symbol_ids) { +# symbol_id_names[kv.second] = kv.first; +# } +# for (size_t i = 0, end = state.rules.size(); i < end; i++) { +# // fprintf(file, "%zu: ", i); +# // print_rule_binary(file, state.rules[i]); +# print_rule(file, i, state.rules[i], symbol_id_names); +# // fprintf(file, "\n"); +# } +# } catch (const std::exception & err) { +# fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what()); +# } +# } +def print_grammar(file: TextIO, state: parse_state) -> None: + try: + symbol_id_names = std.map() # type: std.map[int, str] + for kv in state.symbol_ids.items(): + symbol_id_names[kv[1]] = kv[0] + + for i, rule in enumerate(state.rules): + print_rule(file, i, rule, symbol_id_names) + except Exception as err: + print( + f"{print_grammar.__name__}: error printing grammar: {err}", + file=sys.stderr, + ) + + +"""llama.cpp gbnf rules from vendor/llama.cpp/grammars""" + +ARITHMETIC_GBNF = r""" +root ::= (expr "=" ws term "\n")+ +expr ::= term ([-+*/] term)* +term ::= ident | num | "(" ws expr ")" ws +ident ::= [a-z] [a-z0-9_]* ws +num ::= [0-9]+ ws +ws ::= [ \t\n]* +""" + +C_GBNF = r""" +root ::= (declaration)* + +declaration ::= dataType identifier "(" parameter? ")" "{" statement* "}" + +dataType ::= "int" ws | "float" ws | "char" ws +identifier ::= [a-zA-Z_] [a-zA-Z_0-9]* + +parameter ::= dataType identifier + +statement ::= + ( dataType identifier ws "=" ws expression ";" ) | + ( identifier ws "=" ws expression ";" ) | + ( identifier ws "(" argList? ")" ";" ) | + ( "return" ws expression ";" ) | + ( "while" "(" condition ")" "{" statement* "}" ) | + ( "for" "(" forInit ";" ws condition ";" ws forUpdate ")" "{" statement* "}" ) | + ( "if" "(" condition ")" "{" statement* "}" ("else" "{" statement* "}")? ) | + ( singleLineComment ) | + ( multiLineComment ) + +forInit ::= dataType identifier ws "=" ws expression | identifier ws "=" ws expression +forUpdate ::= identifier ws "=" ws expression + +condition ::= expression relationOperator expression +relationOperator ::= ("<=" | "<" | "==" | "!=" | ">=" | ">") + +expression ::= term (("+" | "-") term)* +term ::= factor(("*" | "/") factor)* + +factor ::= identifier | number | unaryTerm | funcCall | parenExpression +unaryTerm ::= "-" factor +funcCall ::= identifier "(" argList? ")" +parenExpression ::= "(" ws expression ws ")" + +argList ::= expression ("," ws expression)* + +number ::= [0-9]+ + +singleLineComment ::= "//" [^\n]* "\n" +multiLineComment ::= "/*" ( [^*] | ("*" [^/]) )* "*/" + +ws ::= ([ \t\n]+) +""" + +CHESS_GBNF = r""" +root ::= object +value ::= object | array | string | number | ("true" | "false" | "null") ws + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +# Optional space: by convention, applied in this grammar after literal chars when allowed +ws ::= ([ \t\n] ws)? +""" + +JAPANESE_GBNF = r""" +root ::= object +value ::= object | array | string | number | ("true" | "false" | "null") ws + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +# Optional space: by convention, applied in this grammar after literal chars when allowed +ws ::= ([ \t\n] ws)? +""" + +JSON_ARR_GBNF = r""" +# This is the same as json.gbnf but we restrict whitespaces at the end of the root array +# Useful for generating JSON arrays + +root ::= arr +value ::= object | array | string | number | ("true" | "false" | "null") ws + +arr ::= + "[\n" ws ( + value + (",\n" ws value)* + )? "]" + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\\x7F\x00-\x1F] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +# Optional space: by convention, applied in this grammar after literal chars when allowed +ws ::= ([ \t\n] ws)? +""" + + +JSON_GBNF = r""" +root ::= object +value ::= object | array | string | number | ("true" | "false" | "null") ws + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\\x7F\x00-\x1F] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +ws ::= ([ \t\n] ws)? +""" + +LIST_GBNF = r""" +root ::= item+ + +# Excludes various line break characters +item ::= "- " [^\r\n\x0b\x0c\x85\u2028\u2029]+ "\n" +""" + +"""llama.cpp json-schema to grammar converter from vendor/llama.cpp/examples/json-schema-to-grammar.py""" +import json +import re +from typing import List, Optional + +# whitespace is constrained to a single space char to prevent model "running away" in +# whitespace. Also maybe improves generation quality? +SPACE_RULE = '" "?' + + +INVALID_RULE_CHARS_RE = re.compile(r"[^a-zA-Z0-9-]+") +GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]') +GRAMMAR_LITERAL_ESCAPES = {"\r": "\\r", "\n": "\\n", '"': '\\"'} + +# whitespace is constrained to a single space char to prevent model "running away" in +# whitespace. Also maybe improves generation quality? +SPACE_RULE = '" "?' + + +def _build_repetition(item_rule, min_items, max_items, separator_rule=None, item_rule_is_literal=False): + if not separator_rule: + if min_items == 0 and max_items == 1: + return f'{item_rule}?' + elif min_items == 1 and max_items is None: + return f'{item_rule}+' + + result = '' + + if min_items > 0: + if item_rule_is_literal and separator_rule is None: + result = '"' + (item_rule[1:-1] * min_items) + '"' + else: + result = (f' {separator_rule} ' if separator_rule else ' ').join([item_rule] * min_items) + + def opt_repetitions(up_to_n, prefix_with_sep=False): + ''' + - n=4, no sep: '(a (a (a (a)?)?)?)?' + - n=4, sep=',', prefix: '("," a ("," a ("," a ("," a)?)?)?)?' + - n=4, sep=',', no prefix: '(a ("," a ("," a ("," a)?)?)?)?' + ''' + + content = f'{separator_rule} {item_rule}' if prefix_with_sep and separator_rule else item_rule + if up_to_n == 0: + return '' + elif up_to_n == 1: + return f'({content})?' + elif separator_rule and not prefix_with_sep: + return f'({content} {opt_repetitions(up_to_n - 1, prefix_with_sep=True)})?' + else: + return (f'({content} ' * up_to_n).rstrip() + (')?' * up_to_n) + + if min_items > 0 and max_items != min_items: + result += ' ' + + if max_items is not None: + result += opt_repetitions(max_items - min_items, prefix_with_sep=min_items > 0) + else: + item_operator = f'({separator_rule + " " if separator_rule else ""}{item_rule})' + + if min_items == 0 and separator_rule: + result = f'({item_rule} {item_operator}*)?' + else: + result += f'{item_operator}*' + + return result + + + +class BuiltinRule: + def __init__(self, content: str, deps: list = None): + self.content = content + self.deps = deps or [] + +_up_to_15_digits = _build_repetition('[0-9]', 0, 15) + +PRIMITIVE_RULES = { + 'boolean' : BuiltinRule('("true" | "false") space', []), + 'decimal-part' : BuiltinRule('[0-9] ' + _up_to_15_digits, []), + 'integral-part': BuiltinRule('[0-9] | [1-9] ' + _up_to_15_digits, []), + 'number' : BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']), + 'integer' : BuiltinRule('("-"? integral-part) space', ['integral-part']), + 'value' : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']), + 'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']), + 'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']), + 'uuid' : BuiltinRule(r'"\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + r' "\"" space', []), + 'char' : BuiltinRule(r'[^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])', []), + 'string' : BuiltinRule(r'"\"" char* "\"" space', ['char']), + 'null' : BuiltinRule('"null" space', []), +} + +# TODO: support "uri", "email" string formats +STRING_FORMAT_RULES = { + 'date' : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []), + 'time' : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []), + 'date-time' : BuiltinRule('date "T" time', ['date', 'time']), + 'date-string' : BuiltinRule('"\\"" date "\\"" space', ['date']), + 'time-string' : BuiltinRule('"\\"" time "\\"" space', ['time']), + 'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']), +} + +DOTALL = '[\\U00000000-\\U0010FFFF]' +DOT = '[^\\x0A\\x0D]' + +RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()]) + + +NON_LITERAL_SET = set('|.()[]{}*+?') +ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') + + + + +class SchemaConverter: + def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): + self._prop_order = prop_order + self._allow_fetch = allow_fetch + self._dotall = dotall + self._raw_pattern = raw_pattern + self._rules = { + 'space': SPACE_RULE, + } + self._refs = {} + self._refs_being_resolved = set() + + def _format_literal(self, literal): + escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub( + lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), literal + ) + return f'"{escaped}"' + + def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str: + ''' + not_literal('a') -> '[^a]' + not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?' + ''' + assert len(literal) > 0, 'Empty literal not supported' + def recurse(i: int): + c = literal[i] + if maybe_escaped_underscores and c == '_': + yield f'[^{c}\\\\]' + yield ' | ' + yield f'"\\\\"? "{c}"' + else: + yield f'[^{c}]' + if i < len(literal) - 1: + yield ' | ' + yield self._format_literal(c) + yield ' (' + yield from recurse(i + 1) + yield ')?' + + return ''.join(('(', *recurse(0), ')')) + + def _add_rule(self, name, rule): + esc_name = INVALID_RULE_CHARS_RE.sub('-', name) + if esc_name not in self._rules or self._rules[esc_name] == rule: + key = esc_name + else: + i = 0 + while f'{esc_name}{i}' in self._rules and self._rules[f'{esc_name}{i}'] != rule: + i += 1 + key = f'{esc_name}{i}' + self._rules[key] = rule + return key + + def resolve_refs(self, schema: dict, url: str): + ''' + Resolves all $ref fields in the given schema, fetching any remote schemas, + replacing $ref with absolute reference URL and populating self._refs with the + respective referenced (sub)schema dictionaries. + ''' + def visit(n: dict): + if isinstance(n, list): + return [visit(x) for x in n] + elif isinstance(n, dict): + ref = n.get('$ref') + if ref is not None and ref not in self._refs: + if ref.startswith('https://'): + assert self._allow_fetch, 'Fetching remote schemas is not allowed (use --allow-fetch for force)' + import requests + + frag_split = ref.split('#') + base_url = frag_split[0] + + target = self._refs.get(base_url) + if target is None: + target = self.resolve_refs(requests.get(ref).json(), base_url) + self._refs[base_url] = target + + if len(frag_split) == 1 or frag_split[-1] == '': + return target + elif ref.startswith('#/'): + target = schema + ref = f'{url}{ref}' + n['$ref'] = ref + else: + raise ValueError(f'Unsupported ref {ref}') + + for sel in ref.split('#')[-1].split('/')[1:]: + assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}' + target = target[sel] + + self._refs[ref] = target + else: + for v in n.values(): + visit(v) + + return n + return visit(schema) + + def _generate_union_rule(self, name, alt_schemas): + return ' | '.join(( + self.visit(alt_schema, f'{name}{"-" if name else "alternative-"}{i}') + for i, alt_schema in enumerate(alt_schemas) + )) + + def _visit_pattern(self, pattern, name): + ''' + Transforms a regular expression pattern into a GBNF rule. + + Input: https://json-schema.org/understanding-json-schema/reference/regular_expressions + Output: https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md + + Unsupported features: negative/positive lookaheads, greedy/non-greedy modifiers. + + Mostly a 1:1 translation, except for {x} / {x,} / {x,y} quantifiers for which + we define sub-rules to keep the output lean. + ''' + + assert pattern.startswith('^') and pattern.endswith('$'), 'Pattern must start with "^" and end with "$"' + pattern = pattern[1:-1] + sub_rule_ids = {} + + i = 0 + length = len(pattern) + + def to_rule(s: Tuple[str, bool]) -> str: + (txt, is_literal) = s + return "\"" + txt + "\"" if is_literal else txt + + def transform() -> Tuple[str, bool]: + ''' + Parse a unit at index i (advancing it), and return its string representation + whether it's a literal. + ''' + nonlocal i + nonlocal pattern + nonlocal sub_rule_ids + + start = i + # For each component of this sequence, store its string representation and whether it's a literal. + # We only need a flat structure here to apply repetition operators to the last item, and + # to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially + # (GBNF's syntax is luckily very close to regular expressions!) + seq: list[Tuple[str, bool]] = [] + + def get_dot(): + if self._dotall: + rule = DOTALL + else: + # Accept any character... except \n and \r line break chars (\x0A and \xOD) + rule = DOT + return self._add_rule(f'dot', rule) + + def join_seq(): + nonlocal seq + ret = [] + for is_literal, g in groupby(seq, lambda x: x[1]): + if is_literal: + ret.append((''.join(x[0] for x in g), True)) + else: + ret.extend(g) + if len(ret) == 1: + return ret[0] + return (' '.join(to_rule(x) for x in seq), False) + + while i < length: + c = pattern[i] + if c == '.': + seq.append((get_dot(), False)) + i += 1 + elif c == '(': + i += 1 + if i < length: + assert pattern[i] != '?', f'Unsupported pattern syntax "{pattern[i]}" at index {i} of /{pattern}/' + seq.append((f'({to_rule(transform())})', False)) + elif c == ')': + i += 1 + assert start > 0 and pattern[start-1] == '(', f'Unbalanced parentheses; start = {start}, i = {i}, pattern = {pattern}' + return join_seq() + elif c == '[': + square_brackets = c + i += 1 + while i < length and pattern[i] != ']': + if pattern[i] == '\\': + square_brackets += pattern[i:i+2] + i += 2 + else: + square_brackets += pattern[i] + i += 1 + assert i < length, f'Unbalanced square brackets; start = {start}, i = {i}, pattern = {pattern}' + square_brackets += ']' + i += 1 + seq.append((square_brackets, False)) + elif c == '|': + seq.append(('|', False)) + i += 1 + elif c in ('*', '+', '?'): + seq[-1] = (to_rule(seq[-1]) + c, False) + i += 1 + elif c == '{': + curly_brackets = c + i += 1 + while i < length and pattern[i] != '}': + curly_brackets += pattern[i] + i += 1 + assert i < length, f'Unbalanced curly brackets; start = {start}, i = {i}, pattern = {pattern}' + curly_brackets += '}' + i += 1 + nums = [s.strip() for s in curly_brackets[1:-1].split(',')] + min_times = 0 + max_times = None + try: + if len(nums) == 1: + min_times = int(nums[0]) + max_times = min_times + else: + assert len(nums) == 2 + min_times = int(nums[0]) if nums[0] else 0 + max_times = int(nums[1]) if nums[1] else None + except ValueError: + raise ValueError(f'Invalid quantifier {curly_brackets} in /{pattern}/') + + (sub, sub_is_literal) = seq[-1] + + if not sub_is_literal: + id = sub_rule_ids.get(sub) + if id is None: + id = self._add_rule(f'{name}-{len(sub_rule_ids) + 1}', sub) + sub_rule_ids[sub] = id + sub = id + + seq[-1] = (_build_repetition(f'"{sub}"' if sub_is_literal else sub, min_times, max_times, item_rule_is_literal=sub_is_literal), False) + else: + literal = '' + while i < length: + if pattern[i] == '\\' and i < length - 1: + next = pattern[i + 1] + if next in ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS: + i += 1 + literal += pattern[i] + i += 1 + else: + literal += pattern[i:i+2] + i += 2 + elif pattern[i] == '"' and not self._raw_pattern: + literal += '\\"' + i += 1 + elif pattern[i] not in NON_LITERAL_SET and \ + (i == length - 1 or literal == '' or pattern[i+1] == '.' or pattern[i+1] not in NON_LITERAL_SET): + literal += pattern[i] + i += 1 + else: + break + if literal: + seq.append((literal, True)) + + return join_seq() + + return self._add_rule( + name, + to_rule(transform()) if self._raw_pattern \ + else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space") + + + def _resolve_ref(self, ref): + ref_name = ref.split('/')[-1] + if ref_name not in self._rules and ref not in self._refs_being_resolved: + self._refs_being_resolved.add(ref) + resolved = self._refs[ref] + ref_name = self.visit(resolved, ref_name) + self._refs_being_resolved.remove(ref) + return ref_name + + def _generate_constant_rule(self, value): + return self._format_literal(json.dumps(value)) + + def visit(self, schema, name): + schema_type = schema.get('type') + schema_format = schema.get('format') + rule_name = name + '-' if name in RESERVED_NAMES else name or 'root' + + if (ref := schema.get('$ref')) is not None: + return self._add_rule(rule_name, self._resolve_ref(ref)) + + elif 'oneOf' in schema or 'anyOf' in schema: + return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf'])) + + elif isinstance(schema_type, list): + return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type])) + + elif 'const' in schema: + return self._add_rule(rule_name, self._generate_constant_rule(schema['const'])) + + elif 'enum' in schema: + rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + return self._add_rule(rule_name, rule) + + elif schema_type in (None, 'object') and \ + ('properties' in schema or \ + ('additionalProperties' in schema and schema['additionalProperties'] is not True)): + required = set(schema.get('required', [])) + properties = list(schema.get('properties', {}).items()) + return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties'))) + + elif schema_type in (None, 'object') and 'allOf' in schema: + required = set() + properties = [] + hybrid_name = name + def add_component(comp_schema, is_required): + if (ref := comp_schema.get('$ref')) is not None: + comp_schema = self._refs[ref] + + if 'properties' in comp_schema: + for prop_name, prop_schema in comp_schema['properties'].items(): + properties.append((prop_name, prop_schema)) + if is_required: + required.add(prop_name) + + for t in schema['allOf']: + if 'anyOf' in t: + for tt in t['anyOf']: + add_component(tt, is_required=False) + else: + add_component(t, is_required=True) + + return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[])) + + elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema): + items = schema.get('items') or schema['prefixItems'] + if isinstance(items, list): + return self._add_rule( + rule_name, + '"[" space ' + + ' "," space '.join( + self.visit(item, f'{name}{"-" if name else ""}tuple-{i}') + for i, item in enumerate(items)) + + ' "]" space') + else: + item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item') + min_items = schema.get("minItems", 0) + max_items = schema.get("maxItems") + return self._add_rule(rule_name, '"[" space ' + _build_repetition(item_rule_name, min_items, max_items, separator_rule='"," space') + ' "]" space') + + elif schema_type in (None, 'string') and 'pattern' in schema: + return self._visit_pattern(schema['pattern'], rule_name) + + elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''): + return self._add_primitive( + 'root' if rule_name == 'root' else schema_format, + PRIMITIVE_RULES['uuid'] + ) + + elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES: + prim_name = f'{schema_format}-string' + return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name])) + + elif schema_type == 'string' and ('minLength' in schema or 'maxLength' in schema): + char_rule = self._add_primitive('char', PRIMITIVE_RULES['char']) + min_len = schema.get('minLength', 0) + max_len = schema.get('maxLength') + + return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space') + + elif (schema_type == 'object') or (len(schema) == 0): + return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object'])) + + else: + assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}' + # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type]) + + def _add_primitive(self, name: str, rule: BuiltinRule): + n = self._add_rule(name, rule.content) + + for dep in rule.deps: + dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep) + assert dep_rule, f'Rule {dep} not known' + if dep not in self._rules: + self._add_primitive(dep, dep_rule) + return n + + def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): + prop_order = self._prop_order + # sort by position in prop_order (if specified) then by original order + sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] + + prop_kv_rule_names = {} + for prop_name, prop_schema in properties: + prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}') + prop_kv_rule_names[prop_name] = self._add_rule( + f'{name}{"-" if name else ""}{prop_name}-kv', + fr'{self._format_literal(json.dumps(prop_name))} space ":" space {prop_rule_name}' + ) + required_props = [k for k in sorted_props if k in required] + optional_props = [k for k in sorted_props if k not in required] + + if additional_properties == True or isinstance(additional_properties, dict): + sub_name = f'{name}{"-" if name else ""}additional' + value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') + prop_kv_rule_names["*"] = self._add_rule( + f'{sub_name}-kv', + self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' + ) + optional_props.append("*") + + rule = '"{" space ' + rule += ' "," space '.join(prop_kv_rule_names[k] for k in required_props) + + if optional_props: + rule += ' (' + if required_props: + rule += ' "," space ( ' + + def get_recursive_refs(ks, first_is_optional): + [k, *rest] = ks + kv_rule_name = prop_kv_rule_names[k] + if k == '*': + res = self._add_rule( + f'{name}{"-" if name else ""}additional-kvs', + f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*' + ) + elif first_is_optional: + res = f'( "," space {kv_rule_name} )?' + else: + res = kv_rule_name + if len(rest) > 0: + res += ' ' + self._add_rule( + f'{name}{"-" if name else ""}{k}-rest', + get_recursive_refs(rest, first_is_optional=True) + ) + return res + + rule += ' | '.join( + get_recursive_refs(optional_props[i:], first_is_optional=False) + for i in range(len(optional_props)) + ) + if required_props: + rule += ' )' + rule += ' )?' + + rule += ' "}" space' + + return rule + + def format_grammar(self): + return '\n'.join( + f'{name} ::= {rule}' + for name, rule in sorted(self._rules.items(), key=lambda kv: kv[0]) + ) +def json_schema_to_gbnf(schema: str, prop_order: Optional[List[str]] = None): + prop_order = prop_order or [] + schema = json.loads(schema) + prop_order = {name: idx for idx, name in enumerate(prop_order)} + converter = SchemaConverter(prop_order=prop_order, allow_fetch=False, dotall=False, raw_pattern=False) + schema = converter.resolve_refs(schema, "stdin") + converter.visit(schema, "") + return converter.format_grammar() diff --git a/llama-cpp-python/llama_cpp/llama_speculative.py b/llama-cpp-python/llama_cpp/llama_speculative.py new file mode 100644 index 0000000000000000000000000000000000000000..39dfb903ba43d89c83c7b5b2f7d93502716bb16a --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_speculative.py @@ -0,0 +1,64 @@ +import abc + +from typing import Any + +import numpy as np +import numpy.typing as npt + + +class LlamaDraftModel(abc.ABC): + @abc.abstractmethod + def __call__( + self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any + ) -> npt.NDArray[np.intc]: + raise NotImplementedError() + + +class LlamaPromptLookupDecoding(LlamaDraftModel): + """Based on https://github.com/apoorvumang/prompt-lookup-decoding""" + + def __init__(self, max_ngram_size: int = 2, num_pred_tokens: int = 10): + self.max_ngram_size = max_ngram_size + self.num_pred_tokens = num_pred_tokens + + @staticmethod + def find_candidate_pred_tokens( + input_ids: npt.NDArray[np.intc], + max_ngram_size: int, + num_pred_tokens: int, + ): + input_length = input_ids.shape[0] + + for ngram_size in range(min(max_ngram_size, input_length - 1), 0, -1): + # Create sliding windows of size ngram_size + windows = np.lib.stride_tricks.sliding_window_view(input_ids, (ngram_size,)) + + # Convert ngram to an array for comparison + ngram_array = input_ids[-ngram_size:] + + # Find where the windows match the ngram + matches = np.all(windows == ngram_array, axis=1) + + # Get the indices of matches + match_indices = np.nonzero(matches)[0] + + # Iterate through match indices to find a valid continuation + for idx in match_indices: + start_idx = idx + ngram_size + end_idx = start_idx + num_pred_tokens + end_idx = min(end_idx, input_length) + + if start_idx < end_idx: + return input_ids[start_idx:end_idx] + + # If no match is found, return an empty array + return np.array([], dtype=np.intc) + + def __call__( + self, input_ids: npt.NDArray[np.intc], /, **kwargs: Any + ) -> npt.NDArray[np.intc]: + return self.find_candidate_pred_tokens( + input_ids=input_ids, + max_ngram_size=self.max_ngram_size, + num_pred_tokens=self.num_pred_tokens, + ) diff --git a/llama-cpp-python/llama_cpp/llama_tokenizer.py b/llama-cpp-python/llama_cpp/llama_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8b13b7a55a0ccc0cc593b23b7c7a450b2822d0 --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_tokenizer.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import abc +from typing import ( + List, + Optional, + Any, +) + +import llama_cpp +from llama_cpp.llama_types import List + + +class BaseLlamaTokenizer(abc.ABC): + @abc.abstractmethod + def tokenize( + self, text: bytes, add_bos: bool = True, special: bool = True + ) -> List[int]: + """Tokenize the text into tokens. + + Args: + text: The text to tokenize. + add_bos: Whether to add a beginning of sequence token. + special: Whether to tokenize text literally or as special tokens.""" + raise NotImplementedError + + @abc.abstractmethod + def detokenize( + self, tokens: List[int], prev_tokens: Optional[List[int]] = None + ) -> bytes: + """Detokenize the tokens into text. + + Args: + tokens: The tokens to detokenize. + prev_tokens: If tokens is a continuation of a previous sequence, the previous tokens.""" + raise NotImplementedError + + +class LlamaTokenizer(BaseLlamaTokenizer): + def __init__(self, llama: llama_cpp.Llama): + self._model = llama._model # type: ignore + + def tokenize( + self, text: bytes, add_bos: bool = True, special: bool = True + ) -> List[int]: + return self._model.tokenize(text, add_bos=add_bos, special=special) + + def detokenize( + self, tokens: List[int], prev_tokens: Optional[List[int]] = None + ) -> bytes: + return self._model.detokenize(tokens) + + def encode( + self, text: str, add_bos: bool = True, special: bool = True + ) -> List[int]: + return self.tokenize( + text.encode("utf-8", errors="ignore"), add_bos=add_bos, special=special + ) + + def decode(self, tokens: List[int]) -> str: + return self.detokenize(tokens).decode("utf-8", errors="ignore") + + @classmethod + def from_ggml_file(cls, path: str) -> "LlamaTokenizer": + return cls(llama_cpp.Llama(model_path=path, vocab_only=True)) + + +class LlamaHFTokenizer(BaseLlamaTokenizer): + def __init__(self, hf_tokenizer: Any): + self.hf_tokenizer = hf_tokenizer + + def tokenize( + self, text: bytes, add_bos: bool = True, special: bool = True + ) -> List[int]: + return self.hf_tokenizer.encode( + text.decode("utf-8", errors="ignore"), add_special_tokens=special + ) + + def detokenize( + self, tokens: List[int], prev_tokens: Optional[List[int]] = None + ) -> bytes: + if prev_tokens is not None: + text = self.hf_tokenizer.decode(prev_tokens + tokens).encode("utf-8", errors="ignore") + prev_text = self.hf_tokenizer.decode(prev_tokens).encode( + "utf-8", errors="ignore" + ) + return text[len(prev_text) :] + else: + return self.hf_tokenizer.decode(tokens).encode("utf-8", errors="ignore") + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: str) -> "LlamaHFTokenizer": + try: + from transformers import AutoTokenizer + except ImportError: + raise ImportError( + "The `transformers` library is required to use the `HFTokenizer`." + "You can install it with `pip install transformers`." + ) + hf_tokenizer = AutoTokenizer.from_pretrained( + pretrained_model_name_or_path=pretrained_model_name_or_path + ) + return cls(hf_tokenizer) diff --git a/llama-cpp-python/llama_cpp/llama_types.py b/llama-cpp-python/llama_cpp/llama_types.py new file mode 100644 index 0000000000000000000000000000000000000000..4677785ae166612a8bdc0a238e05b0dae0878ac9 --- /dev/null +++ b/llama-cpp-python/llama_cpp/llama_types.py @@ -0,0 +1,295 @@ +"""Types and request signatures for OpenAI compatibility + +NOTE: These types may change to match the OpenAI OpenAPI specification. + +Based on the OpenAI OpenAPI specification: +https://github.com/openai/openai-openapi/blob/master/openapi.yaml + +""" +from typing import Any, List, Optional, Dict, Union +from typing_extensions import TypedDict, NotRequired, Literal + + +# NOTE: Defining this correctly using annotations seems to break pydantic validation. +# This is a workaround until we can figure out how to do this correctly +# JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]] +JsonType = Union[None, int, str, bool, List[Any], Dict[str, Any]] + + +class EmbeddingUsage(TypedDict): + prompt_tokens: int + total_tokens: int + + +class Embedding(TypedDict): + index: int + object: str + embedding: Union[List[float], List[List[float]]] + + +class CreateEmbeddingResponse(TypedDict): + object: Literal["list"] + model: str + data: List[Embedding] + usage: EmbeddingUsage + + +class CompletionLogprobs(TypedDict): + text_offset: List[int] + token_logprobs: List[Optional[float]] + tokens: List[str] + top_logprobs: List[Optional[Dict[str, float]]] + + +class CompletionChoice(TypedDict): + text: str + index: int + logprobs: Optional[CompletionLogprobs] + finish_reason: Optional[Literal["stop", "length"]] + + +class CompletionUsage(TypedDict): + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +class CreateCompletionResponse(TypedDict): + id: str + object: Literal["text_completion"] + created: int + model: str + choices: List[CompletionChoice] + usage: NotRequired[CompletionUsage] + + +class ChatCompletionResponseFunctionCall(TypedDict): + name: str + arguments: str + + +class ChatCompletionResponseMessage(TypedDict): + content: Optional[str] + tool_calls: NotRequired["ChatCompletionMessageToolCalls"] + role: Literal["assistant", "function"] # NOTE: "function" may be incorrect here + function_call: NotRequired[ChatCompletionResponseFunctionCall] # DEPRECATED + + +class ChatCompletionFunction(TypedDict): + name: str + description: NotRequired[str] + parameters: Dict[str, JsonType] # TODO: make this more specific + + +class ChatCompletionResponseChoice(TypedDict): + index: int + message: "ChatCompletionResponseMessage" + logprobs: Optional[CompletionLogprobs] + finish_reason: Optional[str] + + +class CreateChatCompletionResponse(TypedDict): + id: str + object: Literal["chat.completion"] + created: int + model: str + choices: List["ChatCompletionResponseChoice"] + usage: CompletionUsage + + +class ChatCompletionMessageToolCallChunkFunction(TypedDict): + name: Optional[str] + arguments: str + + +class ChatCompletionMessageToolCallChunk(TypedDict): + index: int + id: NotRequired[str] + type: Literal["function"] + function: ChatCompletionMessageToolCallChunkFunction + + +class ChatCompletionStreamResponseDeltaEmpty(TypedDict): + pass + + +class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict): + name: str + arguments: str + + +class ChatCompletionStreamResponseDelta(TypedDict): + content: NotRequired[Optional[str]] + function_call: NotRequired[ + Optional[ChatCompletionStreamResponseDeltaFunctionCall] + ] # DEPRECATED + tool_calls: NotRequired[Optional[List[ChatCompletionMessageToolCallChunk]]] + role: NotRequired[Optional[Literal["system", "user", "assistant", "tool"]]] + + +class ChatCompletionStreamResponseChoice(TypedDict): + index: int + delta: Union[ + ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty + ] + finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]] + logprobs: NotRequired[Optional[CompletionLogprobs]] + + +class CreateChatCompletionStreamResponse(TypedDict): + id: str + model: str + object: Literal["chat.completion.chunk"] + created: int + choices: List[ChatCompletionStreamResponseChoice] + + +class ChatCompletionFunctions(TypedDict): + name: str + description: NotRequired[str] + parameters: Dict[str, JsonType] # TODO: make this more specific + + +class ChatCompletionFunctionCallOption(TypedDict): + name: str + + +class ChatCompletionRequestResponseFormat(TypedDict): + type: Literal["text", "json_object"] + schema: NotRequired[JsonType] # https://docs.endpoints.anyscale.com/guides/json_mode/ + + +class ChatCompletionRequestMessageContentPartText(TypedDict): + type: Literal["text"] + text: str + + +class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict): + url: str + detail: NotRequired[Literal["auto", "low", "high"]] + + +class ChatCompletionRequestMessageContentPartImage(TypedDict): + type: Literal["image_url"] + image_url: Union[str, ChatCompletionRequestMessageContentPartImageImageUrl] + + +ChatCompletionRequestMessageContentPart = Union[ + ChatCompletionRequestMessageContentPartText, + ChatCompletionRequestMessageContentPartImage, +] + + +class ChatCompletionRequestSystemMessage(TypedDict): + role: Literal["system"] + content: Optional[str] + + +class ChatCompletionRequestUserMessage(TypedDict): + role: Literal["user"] + content: Optional[Union[str, List[ChatCompletionRequestMessageContentPart]]] + + +class ChatCompletionMessageToolCallFunction(TypedDict): + name: str + arguments: str + + +class ChatCompletionMessageToolCall(TypedDict): + id: str + type: Literal["function"] + function: ChatCompletionMessageToolCallFunction + + +ChatCompletionMessageToolCalls = List[ChatCompletionMessageToolCall] + + +class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict): + name: str + arguments: str + + +class ChatCompletionRequestAssistantMessage(TypedDict): + role: Literal["assistant"] + content: Optional[str] + tool_calls: NotRequired[ChatCompletionMessageToolCalls] + function_call: NotRequired[ + ChatCompletionRequestAssistantMessageFunctionCall + ] # DEPRECATED + + +class ChatCompletionRequestToolMessage(TypedDict): + role: Literal["tool"] + content: Optional[str] + tool_call_id: str + + +class ChatCompletionRequestFunctionMessage(TypedDict): + role: Literal["function"] + content: Optional[str] + name: str + + +ChatCompletionRequestMessage = Union[ + ChatCompletionRequestSystemMessage, + ChatCompletionRequestUserMessage, + ChatCompletionRequestAssistantMessage, + ChatCompletionRequestUserMessage, + ChatCompletionRequestToolMessage, + ChatCompletionRequestFunctionMessage, +] + + +class ChatCompletionRequestFunctionCallOption(TypedDict): + name: str + + +ChatCompletionRequestFunctionCall = Union[ + Literal["none", "auto"], ChatCompletionRequestFunctionCallOption +] + +ChatCompletionFunctionParameters = Dict[str, JsonType] # TODO: make this more specific + + +class ChatCompletionToolFunction(TypedDict): + name: str + description: NotRequired[str] + parameters: ChatCompletionFunctionParameters + + +class ChatCompletionTool(TypedDict): + type: Literal["function"] + function: ChatCompletionToolFunction + + +class ChatCompletionNamedToolChoiceFunction(TypedDict): + name: str + + +class ChatCompletionNamedToolChoice(TypedDict): + type: Literal["function"] + function: ChatCompletionNamedToolChoiceFunction + + +ChatCompletionToolChoiceOption = Union[ + Literal["none", "auto"], ChatCompletionNamedToolChoice +] + + +# NOTE: The following type names are not part of the OpenAI OpenAPI specification +# and will be removed in a future major release. + +EmbeddingData = Embedding +CompletionChunk = CreateCompletionResponse +Completion = CreateCompletionResponse +CreateCompletionStreamResponse = CreateCompletionResponse +ChatCompletionMessage = ChatCompletionResponseMessage +ChatCompletionChoice = ChatCompletionResponseChoice +ChatCompletion = CreateChatCompletionResponse +ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty +ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice +ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta +ChatCompletionChunk = CreateChatCompletionStreamResponse +ChatCompletionStreamResponse = CreateChatCompletionStreamResponse +ChatCompletionResponseFunction = ChatCompletionFunction +ChatCompletionFunctionCall = ChatCompletionResponseFunctionCall diff --git a/llama-cpp-python/llama_cpp/llava_cpp.py b/llama-cpp-python/llama_cpp/llava_cpp.py new file mode 100644 index 0000000000000000000000000000000000000000..3ded1f25d366457046d90f6743200e9e2e0ff9db --- /dev/null +++ b/llama-cpp-python/llama_cpp/llava_cpp.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import sys +import os +import ctypes +import functools +from ctypes import ( + c_bool, + c_char_p, + c_int, + c_uint8, + c_float, + c_void_p, + POINTER, + _Pointer, # type: ignore + Structure, +) +import pathlib +from typing import ( + List, + Union, + NewType, + Optional, + TypeVar, + Callable, + Any, + TYPE_CHECKING, + Generic, +) +from typing_extensions import TypeAlias + +import llama_cpp.llama_cpp as llama_cpp + + +# Load the library +def _load_shared_library(lib_base_name: str): + # Construct the paths to the possible shared library names + _base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) + # Searching for the library in the current directory under the name "libllama" (default name + # for llamacpp) and "llama" (default name for this repo) + _lib_paths: List[pathlib.Path] = [] + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + _lib_paths += [ + _base_path / f"lib{lib_base_name}.so", + ] + elif sys.platform == "darwin": + _lib_paths += [ + _base_path / f"lib{lib_base_name}.so", + _base_path / f"lib{lib_base_name}.dylib", + ] + elif sys.platform == "win32": + _lib_paths += [ + _base_path / f"{lib_base_name}.dll", + _base_path / f"lib{lib_base_name}.dll", + ] + else: + raise RuntimeError("Unsupported platform") + + if "LLAVA_CPP_LIB" in os.environ: + lib_base_name = os.environ["LLAVA_CPP_LIB"] + _lib = pathlib.Path(lib_base_name) + _base_path = _lib.parent.resolve() + _lib_paths = [_lib.resolve()] + + cdll_args = dict() # type: ignore + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + if "CUDA_PATH" in os.environ: + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin")) + os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib")) + cdll_args["winmode"] = ctypes.RTLD_GLOBAL + + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore + except Exception as e: + raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") + + raise FileNotFoundError( + f"Shared library with base name '{lib_base_name}' not found" + ) + + +# Specify the base name of the shared library to load +_libllava_base_name = "llava" + +# Load the library +_libllava = _load_shared_library(_libllava_base_name) + +# ctypes helper + +if TYPE_CHECKING: + CtypesCData = TypeVar("CtypesCData", bound=ctypes._CData) # type: ignore + + CtypesArray: TypeAlias = ctypes.Array[CtypesCData] # type: ignore + + CtypesPointer: TypeAlias = ctypes._Pointer[CtypesCData] # type: ignore + + CtypesVoidPointer: TypeAlias = ctypes.c_void_p + + class CtypesRef(Generic[CtypesCData]): + pass + + CtypesPointerOrRef: TypeAlias = Union[ + CtypesPointer[CtypesCData], CtypesRef[CtypesCData] + ] + + CtypesFuncPointer: TypeAlias = ctypes._FuncPointer # type: ignore + +F = TypeVar("F", bound=Callable[..., Any]) + + +def ctypes_function_for_shared_library(lib: ctypes.CDLL): + def ctypes_function( + name: str, argtypes: List[Any], restype: Any, enabled: bool = True + ): + def decorator(f: F) -> F: + if enabled: + func = getattr(lib, name) + func.argtypes = argtypes + func.restype = restype + functools.wraps(f)(func) + return func + else: + return f + + return decorator + + return ctypes_function + + +ctypes_function = ctypes_function_for_shared_library(_libllava) + + +################################################ +# llava.h +################################################ + +# struct clip_ctx; +clip_ctx_p = NewType("clip_ctx_p", int) +clip_ctx_p_ctypes = c_void_p + + +# struct llava_image_embed { +# float * embed; +# int n_image_pos; +# }; +class llava_image_embed(Structure): + _fields_ = [ + ("embed", POINTER(c_float)), + ("n_image_pos", c_int), + ] + + +# /** sanity check for clip <-> llava embed size match */ +# LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip); +@ctypes_function( + "llava_validate_embed_size", + [llama_cpp.llama_context_p_ctypes, clip_ctx_p_ctypes], + c_bool, +) +def llava_validate_embed_size( + ctx_llama: llama_cpp.llama_context_p, ctx_clip: clip_ctx_p, / +) -> bool: ... + + +# /** build an image embed from image file bytes */ +# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length); +@ctypes_function( + "llava_image_embed_make_with_bytes", + [clip_ctx_p_ctypes, c_int, POINTER(c_uint8), c_int], + POINTER(llava_image_embed), +) +def llava_image_embed_make_with_bytes( + ctx_clip: clip_ctx_p, + n_threads: Union[c_int, int], + image_bytes: CtypesArray[c_uint8], + image_bytes_length: Union[c_int, int], + /, +) -> "_Pointer[llava_image_embed]": ... + + +# /** build an image embed from a path to an image filename */ +# LLAVA_API struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx * ctx_clip, int n_threads, const char * image_path); +@ctypes_function( + "llava_image_embed_make_with_filename", + [clip_ctx_p_ctypes, c_int, c_char_p], + POINTER(llava_image_embed), +) +def llava_image_embed_make_with_filename( + ctx_clip: clip_ctx_p, n_threads: Union[c_int, int], image_path: bytes, / +) -> "_Pointer[llava_image_embed]": ... + + +# LLAVA_API void llava_image_embed_free(struct llava_image_embed * embed); +# /** free an embedding made with llava_image_embed_make_* */ +@ctypes_function("llava_image_embed_free", [POINTER(llava_image_embed)], None) +def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /): ... + + +# /** write the image represented by embed into the llama context with batch size n_batch, starting at context pos n_past. on completion, n_past points to the next position in the context after the image embed. */ +# LLAVA_API bool llava_eval_image_embed(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past); +@ctypes_function( + "llava_eval_image_embed", + [ + llama_cpp.llama_context_p_ctypes, + POINTER(llava_image_embed), + c_int, + POINTER(c_int), + ], + c_bool, +) +def llava_eval_image_embed( + ctx_llama: llama_cpp.llama_context_p, + embed: "_Pointer[llava_image_embed]", + n_batch: Union[c_int, int], + n_past: "_Pointer[c_int]", + /, +) -> bool: ... + + +################################################ +# clip.h +################################################ + + +# /** load mmproj model */ +# CLIP_API struct clip_ctx * clip_model_load (const char * fname, int verbosity); +@ctypes_function("clip_model_load", [c_char_p, c_int], clip_ctx_p_ctypes) +def clip_model_load( + fname: bytes, verbosity: Union[c_int, int], / +) -> Optional[clip_ctx_p]: ... + + +# /** free mmproj model */ +# CLIP_API void clip_free(struct clip_ctx * ctx); +@ctypes_function("clip_free", [clip_ctx_p_ctypes], None) +def clip_free(ctx: clip_ctx_p, /): ... diff --git a/llama-cpp-python/llama_cpp/py.typed b/llama-cpp-python/llama_cpp/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llama-cpp-python/llama_cpp/server/__init__.py b/llama-cpp-python/llama_cpp/server/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llama-cpp-python/llama_cpp/server/__main__.py b/llama-cpp-python/llama_cpp/server/__main__.py new file mode 100644 index 0000000000000000000000000000000000000000..a6f1f4e9caed0c617c89f1144189ad846e1436f1 --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/__main__.py @@ -0,0 +1,97 @@ +"""Example FastAPI server for llama.cpp. + +To run this example: + +```bash +pip install fastapi uvicorn sse-starlette pydantic-settings +export MODEL=../models/7B/... +``` + +Then run: +``` +uvicorn llama_cpp.server.app:create_app --reload +``` + +or + +``` +python3 -m llama_cpp.server +``` + +Then visit http://localhost:8000/docs to see the interactive API docs. + +""" +from __future__ import annotations + +import os +import sys +import argparse + +import uvicorn + +from llama_cpp.server.app import create_app +from llama_cpp.server.settings import ( + Settings, + ServerSettings, + ModelSettings, + ConfigFileSettings, +) +from llama_cpp.server.cli import add_args_from_model, parse_model_from_args + + +def main(): + description = "🦙 Llama.cpp python server. Host your own LLMs!🚀" + parser = argparse.ArgumentParser(description=description) + + add_args_from_model(parser, Settings) + parser.add_argument( + "--config_file", + type=str, + help="Path to a config file to load.", + ) + server_settings: ServerSettings | None = None + model_settings: list[ModelSettings] = [] + args = parser.parse_args() + try: + # Load server settings from config_file if provided + config_file = os.environ.get("CONFIG_FILE", args.config_file) + if config_file: + if not os.path.exists(config_file): + raise ValueError(f"Config file {config_file} not found!") + with open(config_file, "rb") as f: + # Check if yaml file + if config_file.endswith(".yaml") or config_file.endswith(".yml"): + import yaml + import json + + config_file_settings = ConfigFileSettings.model_validate_json( + json.dumps(yaml.safe_load(f)) + ) + else: + config_file_settings = ConfigFileSettings.model_validate_json(f.read()) + server_settings = ServerSettings.model_validate(config_file_settings) + model_settings = config_file_settings.models + else: + server_settings = parse_model_from_args(ServerSettings, args) + model_settings = [parse_model_from_args(ModelSettings, args)] + except Exception as e: + print(e, file=sys.stderr) + parser.print_help() + sys.exit(1) + assert server_settings is not None + assert model_settings is not None + app = create_app( + server_settings=server_settings, + model_settings=model_settings, + ) + uvicorn.run( + app, + host=os.getenv("HOST", server_settings.host), + port=int(os.getenv("PORT", server_settings.port)), + ssl_keyfile=server_settings.ssl_keyfile, + ssl_certfile=server_settings.ssl_certfile, + ) + + +if __name__ == "__main__": + main() diff --git a/llama-cpp-python/llama_cpp/server/app.py b/llama-cpp-python/llama_cpp/server/app.py new file mode 100644 index 0000000000000000000000000000000000000000..b6ed9b1b6b6e46a1f356de9e3db3fa53212e83b6 --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/app.py @@ -0,0 +1,558 @@ +from __future__ import annotations + +import os +import json + +from threading import Lock +from functools import partial +from typing import Iterator, List, Optional, Union, Dict + +import llama_cpp + +import anyio +from anyio.streams.memory import MemoryObjectSendStream +from starlette.concurrency import run_in_threadpool, iterate_in_threadpool +from fastapi import Depends, FastAPI, APIRouter, Request, HTTPException, status, Body +from fastapi.middleware import Middleware +from fastapi.middleware.cors import CORSMiddleware +from fastapi.security import HTTPBearer +from sse_starlette.sse import EventSourceResponse +from starlette_context.plugins import RequestIdPlugin # type: ignore +from starlette_context.middleware import RawContextMiddleware + +from llama_cpp.server.model import ( + LlamaProxy, +) +from llama_cpp.server.settings import ( + ConfigFileSettings, + Settings, + ModelSettings, + ServerSettings, +) +from llama_cpp.server.types import ( + CreateCompletionRequest, + CreateEmbeddingRequest, + CreateChatCompletionRequest, + ModelList, + TokenizeInputRequest, + TokenizeInputResponse, + TokenizeInputCountResponse, + DetokenizeInputRequest, + DetokenizeInputResponse, +) +from llama_cpp.server.errors import RouteErrorHandler + + +router = APIRouter(route_class=RouteErrorHandler) + +_server_settings: Optional[ServerSettings] = None + + +def set_server_settings(server_settings: ServerSettings): + global _server_settings + _server_settings = server_settings + + +def get_server_settings(): + yield _server_settings + + +_llama_proxy: Optional[LlamaProxy] = None + +llama_outer_lock = Lock() +llama_inner_lock = Lock() + + +def set_llama_proxy(model_settings: List[ModelSettings]): + global _llama_proxy + _llama_proxy = LlamaProxy(models=model_settings) + + +def get_llama_proxy(): + # NOTE: This double lock allows the currently streaming llama model to + # check if any other requests are pending in the same thread and cancel + # the stream if so. + llama_outer_lock.acquire() + release_outer_lock = True + try: + llama_inner_lock.acquire() + try: + llama_outer_lock.release() + release_outer_lock = False + yield _llama_proxy + finally: + llama_inner_lock.release() + finally: + if release_outer_lock: + llama_outer_lock.release() + + +_ping_message_factory = None + +def set_ping_message_factory(factory): + global _ping_message_factory + _ping_message_factory = factory + + +def create_app( + settings: Settings | None = None, + server_settings: ServerSettings | None = None, + model_settings: List[ModelSettings] | None = None, +): + config_file = os.environ.get("CONFIG_FILE", None) + if config_file is not None: + if not os.path.exists(config_file): + raise ValueError(f"Config file {config_file} not found!") + with open(config_file, "rb") as f: + # Check if yaml file + if config_file.endswith(".yaml") or config_file.endswith(".yml"): + import yaml + + config_file_settings = ConfigFileSettings.model_validate_json( + json.dumps(yaml.safe_load(f)) + ) + else: + config_file_settings = ConfigFileSettings.model_validate_json(f.read()) + server_settings = ServerSettings.model_validate(config_file_settings) + model_settings = config_file_settings.models + + if server_settings is None and model_settings is None: + if settings is None: + settings = Settings() + server_settings = ServerSettings.model_validate(settings) + model_settings = [ModelSettings.model_validate(settings)] + + assert ( + server_settings is not None and model_settings is not None + ), "server_settings and model_settings must be provided together" + + set_server_settings(server_settings) + middleware = [Middleware(RawContextMiddleware, plugins=(RequestIdPlugin(),))] + app = FastAPI( + middleware=middleware, + title="🦙 llama.cpp Python API", + version=llama_cpp.__version__, + ) + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + app.include_router(router) + + assert model_settings is not None + set_llama_proxy(model_settings=model_settings) + + if server_settings.disable_ping_events: + set_ping_message_factory(lambda: bytes()) + + return app + + +async def get_event_publisher( + request: Request, + inner_send_chan: MemoryObjectSendStream, + iterator: Iterator, +): + async with inner_send_chan: + try: + async for chunk in iterate_in_threadpool(iterator): + await inner_send_chan.send(dict(data=json.dumps(chunk))) + if await request.is_disconnected(): + raise anyio.get_cancelled_exc_class()() + if ( + next(get_server_settings()).interrupt_requests + and llama_outer_lock.locked() + ): + await inner_send_chan.send(dict(data="[DONE]")) + raise anyio.get_cancelled_exc_class()() + await inner_send_chan.send(dict(data="[DONE]")) + except anyio.get_cancelled_exc_class() as e: + print("disconnected") + with anyio.move_on_after(1, shield=True): + print(f"Disconnected from client (via refresh/close) {request.client}") + raise e + + +def _logit_bias_tokens_to_input_ids( + llama: llama_cpp.Llama, + logit_bias: Dict[str, float], +) -> Dict[str, float]: + to_bias: Dict[str, float] = {} + for token, score in logit_bias.items(): + token = token.encode("utf-8") + for input_id in llama.tokenize(token, add_bos=False, special=True): + to_bias[str(input_id)] = score + return to_bias + + +# Setup Bearer authentication scheme +bearer_scheme = HTTPBearer(auto_error=False) + + +async def authenticate( + settings: Settings = Depends(get_server_settings), + authorization: Optional[str] = Depends(bearer_scheme), +): + # Skip API key check if it's not set in settings + if settings.api_key is None: + return True + + # check bearer credentials against the api_key + if authorization and authorization.credentials == settings.api_key: + # api key is valid + return authorization.credentials + + # raise http error 401 + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid API key", + ) + + +openai_v1_tag = "OpenAI V1" + + +@router.post( + "/v1/completions", + summary="Completion", + dependencies=[Depends(authenticate)], + response_model=Union[ + llama_cpp.CreateCompletionResponse, + str, + ], + responses={ + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + {"$ref": "#/components/schemas/CreateCompletionResponse"} + ], + "title": "Completion response, when stream=False", + } + }, + "text/event-stream": { + "schema": { + "type": "string", + "title": "Server Side Streaming response, when stream=True. " + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 + "example": """data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""", + } + }, + }, + } + }, + tags=[openai_v1_tag], +) +@router.post( + "/v1/engines/copilot-codex/completions", + include_in_schema=False, + dependencies=[Depends(authenticate)], + tags=[openai_v1_tag], +) +async def create_completion( + request: Request, + body: CreateCompletionRequest, + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> llama_cpp.Completion: + if isinstance(body.prompt, list): + assert len(body.prompt) <= 1 + body.prompt = body.prompt[0] if len(body.prompt) > 0 else "" + + llama = llama_proxy( + body.model + if request.url.path != "/v1/engines/copilot-codex/completions" + else "copilot-codex" + ) + + exclude = { + "n", + "best_of", + "logit_bias_type", + "user", + } + kwargs = body.model_dump(exclude=exclude) + + if body.logit_bias is not None: + kwargs["logit_bias"] = ( + _logit_bias_tokens_to_input_ids(llama, body.logit_bias) + if body.logit_bias_type == "tokens" + else body.logit_bias + ) + + if body.grammar is not None: + kwargs["grammar"] = llama_cpp.LlamaGrammar.from_string(body.grammar) + + iterator_or_completion: Union[ + llama_cpp.CreateCompletionResponse, + Iterator[llama_cpp.CreateCompletionStreamResponse], + ] = await run_in_threadpool(llama, **kwargs) + + if isinstance(iterator_or_completion, Iterator): + # EAFP: It's easier to ask for forgiveness than permission + first_response = await run_in_threadpool(next, iterator_or_completion) + + # If no exception was raised from first_response, we can assume that + # the iterator is valid and we can use it to stream the response. + def iterator() -> Iterator[llama_cpp.CreateCompletionStreamResponse]: + yield first_response + yield from iterator_or_completion + + send_chan, recv_chan = anyio.create_memory_object_stream(10) + return EventSourceResponse( + recv_chan, + data_sender_callable=partial( # type: ignore + get_event_publisher, + request=request, + inner_send_chan=send_chan, + iterator=iterator(), + ), + sep="\n", + ping_message_factory=_ping_message_factory, + ) + else: + return iterator_or_completion + + +@router.post( + "/v1/embeddings", + summary="Embedding", + dependencies=[Depends(authenticate)], + tags=[openai_v1_tag], +) +async def create_embedding( + request: CreateEmbeddingRequest, + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +): + return await run_in_threadpool( + llama_proxy(request.model).create_embedding, + **request.model_dump(exclude={"user"}), + ) + + +@router.post( + "/v1/chat/completions", + summary="Chat", + dependencies=[Depends(authenticate)], + response_model=Union[llama_cpp.ChatCompletion, str], + responses={ + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/CreateChatCompletionResponse" + } + ], + "title": "Completion response, when stream=False", + } + }, + "text/event-stream": { + "schema": { + "type": "string", + "title": "Server Side Streaming response, when stream=True" + + "See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format", # noqa: E501 + "example": """data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]""", + } + }, + }, + } + }, + tags=[openai_v1_tag], +) +async def create_chat_completion( + request: Request, + body: CreateChatCompletionRequest = Body( + openapi_examples={ + "normal": { + "summary": "Chat Completion", + "value": { + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + ], + }, + }, + "json_mode": { + "summary": "JSON Mode", + "value": { + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Who won the world series in 2020"}, + ], + "response_format": { "type": "json_object" } + }, + }, + "tool_calling": { + "summary": "Tool Calling", + "value": { + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Extract Jason is 30 years old."}, + ], + "tools": [ + { + "type": "function", + "function": { + "name": "User", + "description": "User record", + "parameters": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "number"}, + }, + "required": ["name", "age"], + }, + } + } + ], + "tool_choice": { + "type": "function", + "function": { + "name": "User", + } + } + }, + }, + "logprobs": { + "summary": "Logprobs", + "value": { + "model": "gpt-3.5-turbo", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + ], + "logprobs": True, + "top_logprobs": 10 + }, + }, + } + ), + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> llama_cpp.ChatCompletion: + exclude = { + "n", + "logit_bias_type", + "user", + } + kwargs = body.model_dump(exclude=exclude) + llama = llama_proxy(body.model) + if body.logit_bias is not None: + kwargs["logit_bias"] = ( + _logit_bias_tokens_to_input_ids(llama, body.logit_bias) + if body.logit_bias_type == "tokens" + else body.logit_bias + ) + + if body.grammar is not None: + kwargs["grammar"] = llama_cpp.LlamaGrammar.from_string(body.grammar) + + iterator_or_completion: Union[ + llama_cpp.ChatCompletion, Iterator[llama_cpp.ChatCompletionChunk] + ] = await run_in_threadpool(llama.create_chat_completion, **kwargs) + + if isinstance(iterator_or_completion, Iterator): + # EAFP: It's easier to ask for forgiveness than permission + first_response = await run_in_threadpool(next, iterator_or_completion) + + # If no exception was raised from first_response, we can assume that + # the iterator is valid and we can use it to stream the response. + def iterator() -> Iterator[llama_cpp.ChatCompletionChunk]: + yield first_response + yield from iterator_or_completion + + send_chan, recv_chan = anyio.create_memory_object_stream(10) + return EventSourceResponse( + recv_chan, + data_sender_callable=partial( # type: ignore + get_event_publisher, + request=request, + inner_send_chan=send_chan, + iterator=iterator(), + ), + sep="\n", + ping_message_factory=_ping_message_factory, + ) + else: + return iterator_or_completion + + +@router.get( + "/v1/models", + summary="Models", + dependencies=[Depends(authenticate)], + tags=[openai_v1_tag], +) +async def get_models( + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> ModelList: + return { + "object": "list", + "data": [ + { + "id": model_alias, + "object": "model", + "owned_by": "me", + "permissions": [], + } + for model_alias in llama_proxy + ], + } + + +extras_tag = "Extras" + + +@router.post( + "/extras/tokenize", + summary="Tokenize", + dependencies=[Depends(authenticate)], + tags=[extras_tag], +) +async def tokenize( + body: TokenizeInputRequest, + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> TokenizeInputResponse: + tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True) + + return TokenizeInputResponse(tokens=tokens) + + +@router.post( + "/extras/tokenize/count", + summary="Tokenize Count", + dependencies=[Depends(authenticate)], + tags=[extras_tag], +) +async def count_query_tokens( + body: TokenizeInputRequest, + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> TokenizeInputCountResponse: + tokens = llama_proxy(body.model).tokenize(body.input.encode("utf-8"), special=True) + + return TokenizeInputCountResponse(count=len(tokens)) + + +@router.post( + "/extras/detokenize", + summary="Detokenize", + dependencies=[Depends(authenticate)], + tags=[extras_tag], +) +async def detokenize( + body: DetokenizeInputRequest, + llama_proxy: LlamaProxy = Depends(get_llama_proxy), +) -> DetokenizeInputResponse: + text = llama_proxy(body.model).detokenize(body.tokens).decode("utf-8") + + return DetokenizeInputResponse(text=text) diff --git a/llama-cpp-python/llama_cpp/server/cli.py b/llama-cpp-python/llama_cpp/server/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..3dd00767671c5e9dac5a2ab8f4f1331531294b60 --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/cli.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import argparse + +from typing import List, Literal, Union, Any, Type, TypeVar + +from pydantic import BaseModel + + +def _get_base_type(annotation: Type[Any]) -> Type[Any]: + if getattr(annotation, "__origin__", None) is Literal: + assert hasattr(annotation, "__args__") and len(annotation.__args__) >= 1 # type: ignore + return type(annotation.__args__[0]) # type: ignore + elif getattr(annotation, "__origin__", None) is Union: + assert hasattr(annotation, "__args__") and len(annotation.__args__) >= 1 # type: ignore + non_optional_args: List[Type[Any]] = [ + arg for arg in annotation.__args__ if arg is not type(None) # type: ignore + ] + if non_optional_args: + return _get_base_type(non_optional_args[0]) + elif ( + getattr(annotation, "__origin__", None) is list + or getattr(annotation, "__origin__", None) is List + ): + assert hasattr(annotation, "__args__") and len(annotation.__args__) >= 1 # type: ignore + return _get_base_type(annotation.__args__[0]) # type: ignore + return annotation + + +def _contains_list_type(annotation: Type[Any] | None) -> bool: + origin = getattr(annotation, "__origin__", None) + + if origin is list or origin is List: + return True + elif origin in (Literal, Union): + return any(_contains_list_type(arg) for arg in annotation.__args__) # type: ignore + else: + return False + + +def _parse_bool_arg(arg: str | bytes | bool) -> bool: + if isinstance(arg, bytes): + arg = arg.decode("utf-8") + + true_values = {"1", "on", "t", "true", "y", "yes"} + false_values = {"0", "off", "f", "false", "n", "no"} + + arg_str = str(arg).lower().strip() + + if arg_str in true_values: + return True + elif arg_str in false_values: + return False + else: + raise ValueError(f"Invalid boolean argument: {arg}") + + +def add_args_from_model(parser: argparse.ArgumentParser, model: Type[BaseModel]): + """Add arguments from a pydantic model to an argparse parser.""" + + for name, field in model.model_fields.items(): + description = field.description + if field.default and description and not field.is_required(): + description += f" (default: {field.default})" + base_type = ( + _get_base_type(field.annotation) if field.annotation is not None else str + ) + list_type = _contains_list_type(field.annotation) + if base_type is not bool: + parser.add_argument( + f"--{name}", + dest=name, + nargs="*" if list_type else None, + type=base_type, + help=description, + ) + if base_type is bool: + parser.add_argument( + f"--{name}", + dest=name, + type=_parse_bool_arg, + help=f"{description}", + ) + + +T = TypeVar("T", bound=Type[BaseModel]) + + +def parse_model_from_args(model: T, args: argparse.Namespace) -> T: + """Parse a pydantic model from an argparse namespace.""" + return model( + **{ + k: v + for k, v in vars(args).items() + if v is not None and k in model.model_fields + } + ) diff --git a/llama-cpp-python/llama_cpp/server/errors.py b/llama-cpp-python/llama_cpp/server/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..fbf9fd80d5f6f8048cca04acc2d8749971298cfc --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/errors.py @@ -0,0 +1,210 @@ +from __future__ import annotations + +import sys +import traceback +import time +from re import compile, Match, Pattern +from typing import Callable, Coroutine, Optional, Tuple, Union, Dict +from typing_extensions import TypedDict + + +from fastapi import ( + Request, + Response, + HTTPException, +) +from fastapi.responses import JSONResponse +from fastapi.routing import APIRoute + +from llama_cpp.server.types import ( + CreateCompletionRequest, + CreateEmbeddingRequest, + CreateChatCompletionRequest, +) + + +class ErrorResponse(TypedDict): + """OpenAI style error response""" + + message: str + type: str + param: Optional[str] + code: Optional[str] + + +class ErrorResponseFormatters: + """Collection of formatters for error responses. + + Args: + request (Union[CreateCompletionRequest, CreateChatCompletionRequest]): + Request body + match (Match[str]): Match object from regex pattern + + Returns: + Tuple[int, ErrorResponse]: Status code and error response + """ + + @staticmethod + def context_length_exceeded( + request: Union["CreateCompletionRequest", "CreateChatCompletionRequest"], + match, # type: Match[str] # type: ignore + ) -> Tuple[int, ErrorResponse]: + """Formatter for context length exceeded error""" + + context_window = int(match.group(2)) + prompt_tokens = int(match.group(1)) + completion_tokens = request.max_tokens + if hasattr(request, "messages"): + # Chat completion + message = ( + "This model's maximum context length is {} tokens. " + "However, you requested {} tokens " + "({} in the messages, {} in the completion). " + "Please reduce the length of the messages or completion." + ) + else: + # Text completion + message = ( + "This model's maximum context length is {} tokens, " + "however you requested {} tokens " + "({} in your prompt; {} for the completion). " + "Please reduce your prompt; or completion length." + ) + return 400, ErrorResponse( + message=message.format( + context_window, + (completion_tokens or 0) + prompt_tokens, + prompt_tokens, + completion_tokens, + ), # type: ignore + type="invalid_request_error", + param="messages", + code="context_length_exceeded", + ) + + @staticmethod + def model_not_found( + request: Union["CreateCompletionRequest", "CreateChatCompletionRequest"], + match, # type: Match[str] # type: ignore + ) -> Tuple[int, ErrorResponse]: + """Formatter for model_not_found error""" + + model_path = str(match.group(1)) + message = f"The model `{model_path}` does not exist" + return 400, ErrorResponse( + message=message, + type="invalid_request_error", + param=None, + code="model_not_found", + ) + + +class RouteErrorHandler(APIRoute): + """Custom APIRoute that handles application errors and exceptions""" + + # key: regex pattern for original error message from llama_cpp + # value: formatter function + pattern_and_formatters: Dict[ + "Pattern[str]", + Callable[ + [ + Union["CreateCompletionRequest", "CreateChatCompletionRequest"], + "Match[str]", + ], + Tuple[int, ErrorResponse], + ], + ] = { + compile( + r"Requested tokens \((\d+)\) exceed context window of (\d+)" + ): ErrorResponseFormatters.context_length_exceeded, + compile( + r"Model path does not exist: (.+)" + ): ErrorResponseFormatters.model_not_found, + } + + def error_message_wrapper( + self, + error: Exception, + body: Optional[ + Union[ + "CreateChatCompletionRequest", + "CreateCompletionRequest", + "CreateEmbeddingRequest", + ] + ] = None, + ) -> Tuple[int, ErrorResponse]: + """Wraps error message in OpenAI style error response""" + print(f"Exception: {str(error)}", file=sys.stderr) + traceback.print_exc(file=sys.stderr) + if body is not None and isinstance( + body, + ( + CreateCompletionRequest, + CreateChatCompletionRequest, + ), + ): + # When text completion or chat completion + for pattern, callback in self.pattern_and_formatters.items(): + match = pattern.search(str(error)) + if match is not None: + return callback(body, match) + + # Wrap other errors as internal server error + return 500, ErrorResponse( + message=str(error), + type="internal_server_error", + param=None, + code=None, + ) + + def get_route_handler( + self, + ) -> Callable[[Request], Coroutine[None, None, Response]]: + """Defines custom route handler that catches exceptions and formats + in OpenAI style error response""" + + original_route_handler = super().get_route_handler() + + async def custom_route_handler(request: Request) -> Response: + try: + start_sec = time.perf_counter() + response = await original_route_handler(request) + elapsed_time_ms = int((time.perf_counter() - start_sec) * 1000) + response.headers["openai-processing-ms"] = f"{elapsed_time_ms}" + return response + except HTTPException as unauthorized: + # api key check failed + raise unauthorized + except Exception as exc: + json_body = await request.json() + try: + if "messages" in json_body: + # Chat completion + body: Optional[ + Union[ + CreateChatCompletionRequest, + CreateCompletionRequest, + CreateEmbeddingRequest, + ] + ] = CreateChatCompletionRequest(**json_body) + elif "prompt" in json_body: + # Text completion + body = CreateCompletionRequest(**json_body) + else: + # Embedding + body = CreateEmbeddingRequest(**json_body) + except Exception: + # Invalid request body + body = None + + # Get proper error message from the exception + ( + status_code, + error_message, + ) = self.error_message_wrapper(error=exc, body=body) + return JSONResponse( + {"error": error_message}, + status_code=status_code, + ) + + return custom_route_handler diff --git a/llama-cpp-python/llama_cpp/server/model.py b/llama-cpp-python/llama_cpp/server/model.py new file mode 100644 index 0000000000000000000000000000000000000000..f00292410944dd5716a0efc77dd44207f940ec2b --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/model.py @@ -0,0 +1,276 @@ +from __future__ import annotations + +import json + +from typing import Dict, Optional, Union, List + +import llama_cpp +import llama_cpp.llama_speculative as llama_speculative +import llama_cpp.llama_tokenizer as llama_tokenizer + +from llama_cpp.server.settings import ModelSettings + + +class LlamaProxy: + def __init__(self, models: List[ModelSettings]) -> None: + assert len(models) > 0, "No models provided!" + + self._model_settings_dict: dict[str, ModelSettings] = {} + for model in models: + if not model.model_alias: + model.model_alias = model.model + self._model_settings_dict[model.model_alias] = model + + self._current_model: Optional[llama_cpp.Llama] = None + self._current_model_alias: Optional[str] = None + + self._default_model_settings: ModelSettings = models[0] + self._default_model_alias: str = self._default_model_settings.model_alias # type: ignore + + # Load default model + self._current_model = self.load_llama_from_model_settings( + self._default_model_settings + ) + self._current_model_alias = self._default_model_alias + + def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama: + if model is None: + model = self._default_model_alias + + if model not in self._model_settings_dict: + model = self._default_model_alias + + if model == self._current_model_alias: + if self._current_model is not None: + return self._current_model + + self._current_model = None + + settings = self._model_settings_dict[model] + self._current_model = self.load_llama_from_model_settings(settings) + self._current_model_alias = model + return self._current_model + + def __getitem__(self, model: str): + return self._model_settings_dict[model].model_dump() + + def __setitem__(self, model: str, settings: Union[ModelSettings, str, bytes]): + if isinstance(settings, (bytes, str)): + settings = ModelSettings.model_validate_json(settings) + self._model_settings_dict[model] = settings + + def __iter__(self): + for model in self._model_settings_dict: + yield model + + def free(self): + if self._current_model: + del self._current_model + + @staticmethod + def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama: + chat_handler = None + if settings.chat_format == "llava-1-5": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.Llava15ChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "obsidian": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.ObsidianChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.ObsidianChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "llava-1-6": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.Llava16ChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.Llava16ChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "moondream": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.MoondreamChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.MoondreamChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "nanollava": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.NanoLlavaChatHandler.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.NanoLlavaChatHandler( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "llama-3-vision-alpha": + assert settings.clip_model_path is not None, "clip model not found" + if settings.hf_model_repo_id is not None: + chat_handler = ( + llama_cpp.llama_chat_format.Llama3VisionAlpha.from_pretrained( + repo_id=settings.hf_model_repo_id, + filename=settings.clip_model_path, + verbose=settings.verbose, + ) + ) + else: + chat_handler = llama_cpp.llama_chat_format.Llama3VisionAlpha( + clip_model_path=settings.clip_model_path, verbose=settings.verbose + ) + elif settings.chat_format == "hf-autotokenizer": + assert ( + settings.hf_pretrained_model_name_or_path is not None + ), "hf_pretrained_model_name_or_path must be set for hf-autotokenizer" + chat_handler = ( + llama_cpp.llama_chat_format.hf_autotokenizer_to_chat_completion_handler( + settings.hf_pretrained_model_name_or_path + ) + ) + elif settings.chat_format == "hf-tokenizer-config": + assert ( + settings.hf_tokenizer_config_path is not None + ), "hf_tokenizer_config_path must be set for hf-tokenizer-config" + chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler( + json.load(open(settings.hf_tokenizer_config_path)) + ) + + tokenizer: Optional[llama_cpp.BaseLlamaTokenizer] = None + if settings.hf_pretrained_model_name_or_path is not None: + tokenizer = llama_tokenizer.LlamaHFTokenizer.from_pretrained( + settings.hf_pretrained_model_name_or_path + ) + + draft_model = None + if settings.draft_model is not None: + draft_model = llama_speculative.LlamaPromptLookupDecoding( + num_pred_tokens=settings.draft_model_num_pred_tokens + ) + + kv_overrides: Optional[Dict[str, Union[bool, int, float]]] = None + if settings.kv_overrides is not None: + assert isinstance(settings.kv_overrides, list) + kv_overrides = {} + for kv in settings.kv_overrides: + key, value = kv.split("=") + if ":" in value: + value_type, value = value.split(":") + if value_type == "bool": + kv_overrides[key] = value.lower() in ["true", "1"] + elif value_type == "int": + kv_overrides[key] = int(value) + elif value_type == "float": + kv_overrides[key] = float(value) + else: + raise ValueError(f"Unknown value type {value_type}") + + import functools + + kwargs = {} + + if settings.hf_model_repo_id is not None: + create_fn = functools.partial( + llama_cpp.Llama.from_pretrained, + repo_id=settings.hf_model_repo_id, + filename=settings.model, + ) + else: + create_fn = llama_cpp.Llama + kwargs["model_path"] = settings.model + + _model = create_fn( + **kwargs, + # Model Params + n_gpu_layers=settings.n_gpu_layers, + main_gpu=settings.main_gpu, + tensor_split=settings.tensor_split, + vocab_only=settings.vocab_only, + use_mmap=settings.use_mmap, + use_mlock=settings.use_mlock, + kv_overrides=kv_overrides, + # Context Params + seed=settings.seed, + n_ctx=settings.n_ctx, + n_batch=settings.n_batch, + n_threads=settings.n_threads, + n_threads_batch=settings.n_threads_batch, + rope_scaling_type=settings.rope_scaling_type, + rope_freq_base=settings.rope_freq_base, + rope_freq_scale=settings.rope_freq_scale, + yarn_ext_factor=settings.yarn_ext_factor, + yarn_attn_factor=settings.yarn_attn_factor, + yarn_beta_fast=settings.yarn_beta_fast, + yarn_beta_slow=settings.yarn_beta_slow, + yarn_orig_ctx=settings.yarn_orig_ctx, + mul_mat_q=settings.mul_mat_q, + logits_all=settings.logits_all, + embedding=settings.embedding, + offload_kqv=settings.offload_kqv, + flash_attn=settings.flash_attn, + # Sampling Params + last_n_tokens_size=settings.last_n_tokens_size, + # LoRA Params + lora_base=settings.lora_base, + lora_path=settings.lora_path, + # Backend Params + numa=settings.numa, + # Chat Format Params + chat_format=settings.chat_format, + chat_handler=chat_handler, + # Speculative Decoding + draft_model=draft_model, + # KV Cache Quantization + type_k=settings.type_k, + type_v=settings.type_v, + # Tokenizer + tokenizer=tokenizer, + # Misc + verbose=settings.verbose, + ) + if settings.cache: + if settings.cache_type == "disk": + if settings.verbose: + print(f"Using disk cache with size {settings.cache_size}") + cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size) + else: + if settings.verbose: + print(f"Using ram cache with size {settings.cache_size}") + cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size) + _model.set_cache(cache) + return _model diff --git a/llama-cpp-python/llama_cpp/server/settings.py b/llama-cpp-python/llama_cpp/server/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..ed05a889f0cb69d919210314d4286884dcb3a0e2 --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/settings.py @@ -0,0 +1,227 @@ +from __future__ import annotations + +import multiprocessing + +from typing import Optional, List, Literal, Union, Dict, cast +from typing_extensions import Self + +from pydantic import Field, model_validator +from pydantic_settings import BaseSettings + +import llama_cpp + +# Disable warning for model and model_alias settings +BaseSettings.model_config["protected_namespaces"] = () + + +class ModelSettings(BaseSettings): + """Model settings used to load a Llama model.""" + + model: str = Field( + description="The path to the model to use for generating completions." + ) + model_alias: Optional[str] = Field( + default=None, + description="The alias of the model to use for generating completions.", + ) + # Model Params + n_gpu_layers: int = Field( + default=0, + ge=-1, + description="The number of layers to put on the GPU. The rest will be on the CPU. Set -1 to move all to GPU.", + ) + split_mode: int = Field( + default=llama_cpp.LLAMA_SPLIT_MODE_LAYER, + description="The split mode to use.", + ) + main_gpu: int = Field( + default=0, + ge=0, + description="Main GPU to use.", + ) + tensor_split: Optional[List[float]] = Field( + default=None, + description="Split layers across multiple GPUs in proportion.", + ) + vocab_only: bool = Field( + default=False, description="Whether to only return the vocabulary." + ) + use_mmap: bool = Field( + default=llama_cpp.llama_supports_mmap(), + description="Use mmap.", + ) + use_mlock: bool = Field( + default=llama_cpp.llama_supports_mlock(), + description="Use mlock.", + ) + kv_overrides: Optional[List[str]] = Field( + default=None, + description="List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.", + ) + # Context Params + seed: int = Field( + default=llama_cpp.LLAMA_DEFAULT_SEED, description="Random seed. -1 for random." + ) + n_ctx: int = Field(default=2048, ge=0, description="The context size.") + n_batch: int = Field( + default=512, ge=1, description="The batch size to use per eval." + ) + n_threads: int = Field( + default=max(multiprocessing.cpu_count() // 2, 1), + ge=1, + description="The number of threads to use. Use -1 for max cpu threads", + ) + n_threads_batch: int = Field( + default=max(multiprocessing.cpu_count(), 1), + ge=0, + description="The number of threads to use when batch processing. Use -1 for max cpu threads", + ) + rope_scaling_type: int = Field( + default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED + ) + rope_freq_base: float = Field(default=0.0, description="RoPE base frequency") + rope_freq_scale: float = Field( + default=0.0, description="RoPE frequency scaling factor" + ) + yarn_ext_factor: float = Field(default=-1.0) + yarn_attn_factor: float = Field(default=1.0) + yarn_beta_fast: float = Field(default=32.0) + yarn_beta_slow: float = Field(default=1.0) + yarn_orig_ctx: int = Field(default=0) + mul_mat_q: bool = Field( + default=True, description="if true, use experimental mul_mat_q kernels" + ) + logits_all: bool = Field(default=True, description="Whether to return logits.") + embedding: bool = Field(default=True, description="Whether to use embeddings.") + offload_kqv: bool = Field( + default=True, description="Whether to offload kqv to the GPU." + ) + flash_attn: bool = Field( + default=False, description="Whether to use flash attention." + ) + # Sampling Params + last_n_tokens_size: int = Field( + default=64, + ge=0, + description="Last n tokens to keep for repeat penalty calculation.", + ) + # LoRA Params + lora_base: Optional[str] = Field( + default=None, + description="Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.", + ) + lora_path: Optional[str] = Field( + default=None, + description="Path to a LoRA file to apply to the model.", + ) + # Backend Params + numa: Union[bool, int] = Field( + default=False, + description="Enable NUMA support.", + ) + # Chat Format Params + chat_format: Optional[str] = Field( + default=None, + description="Chat format to use.", + ) + clip_model_path: Optional[str] = Field( + default=None, + description="Path to a CLIP model to use for multi-modal chat completion.", + ) + # Cache Params + cache: bool = Field( + default=False, + description="Use a cache to reduce processing times for evaluated prompts.", + ) + cache_type: Literal["ram", "disk"] = Field( + default="ram", + description="The type of cache to use. Only used if cache is True.", + ) + cache_size: int = Field( + default=2 << 30, + description="The size of the cache in bytes. Only used if cache is True.", + ) + # Tokenizer Options + hf_tokenizer_config_path: Optional[str] = Field( + default=None, + description="The path to a HuggingFace tokenizer_config.json file.", + ) + hf_pretrained_model_name_or_path: Optional[str] = Field( + default=None, + description="The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().", + ) + # Loading from HuggingFace Model Hub + hf_model_repo_id: Optional[str] = Field( + default=None, + description="The model repo id to use for the HuggingFace tokenizer model.", + ) + # Speculative Decoding + draft_model: Optional[str] = Field( + default=None, + description="Method to use for speculative decoding. One of (prompt-lookup-decoding).", + ) + draft_model_num_pred_tokens: int = Field( + default=10, + description="Number of tokens to predict using the draft model.", + ) + # KV Cache Quantization + type_k: Optional[int] = Field( + default=None, + description="Type of the key cache quantization.", + ) + type_v: Optional[int] = Field( + default=None, + description="Type of the value cache quantization.", + ) + # Misc + verbose: bool = Field( + default=True, description="Whether to print debug information." + ) + + @model_validator(mode="before") # pre=True to ensure this runs before any other validation + def set_dynamic_defaults(self) -> Self: + # If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count() + cpu_count = multiprocessing.cpu_count() + values = cast(Dict[str, int], self) + if values.get('n_threads', 0) == -1: + values['n_threads'] = cpu_count + if values.get('n_threads_batch', 0) == -1: + values['n_threads_batch'] = cpu_count + return self + + +class ServerSettings(BaseSettings): + """Server settings used to configure the FastAPI and Uvicorn server.""" + + # Uvicorn Settings + host: str = Field(default="localhost", description="Listen address") + port: int = Field(default=8000, description="Listen port") + ssl_keyfile: Optional[str] = Field( + default=None, description="SSL key file for HTTPS" + ) + ssl_certfile: Optional[str] = Field( + default=None, description="SSL certificate file for HTTPS" + ) + # FastAPI Settings + api_key: Optional[str] = Field( + default=None, + description="API key for authentication. If set all requests need to be authenticated.", + ) + interrupt_requests: bool = Field( + default=True, + description="Whether to interrupt requests when a new request is received.", + ) + disable_ping_events: bool = Field( + default=False, + description="Disable EventSource pings (may be needed for some clients).", + ) + + +class Settings(ServerSettings, ModelSettings): + pass + + +class ConfigFileSettings(ServerSettings): + """Configuration file format settings.""" + + models: List[ModelSettings] = Field(default=[], description="Model configs") diff --git a/llama-cpp-python/llama_cpp/server/types.py b/llama-cpp-python/llama_cpp/server/types.py new file mode 100644 index 0000000000000000000000000000000000000000..a20b3940f2e3ccedf56523d4f6c0450387e45134 --- /dev/null +++ b/llama-cpp-python/llama_cpp/server/types.py @@ -0,0 +1,308 @@ +from __future__ import annotations + +from typing import List, Optional, Union, Dict +from typing_extensions import TypedDict, Literal + +from pydantic import BaseModel, Field + +import llama_cpp + + +model_field = Field( + description="The model to use for generating completions.", default=None +) + +max_tokens_field = Field( + default=16, ge=1, description="The maximum number of tokens to generate." +) + +temperature_field = Field( + default=0.8, + description="Adjust the randomness of the generated text.\n\n" + + "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.", +) + +top_p_field = Field( + default=0.95, + ge=0.0, + le=1.0, + description="Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\n" + + "Top-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.", +) + +min_p_field = Field( + default=0.05, + ge=0.0, + le=1.0, + description="Sets a minimum base probability threshold for token selection.\n\n" + + "The Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.", +) + +stop_field = Field( + default=None, + description="A list of tokens at which to stop generation. If None, no stop tokens are used.", +) + +stream_field = Field( + default=False, + description="Whether to stream the results as they are generated. Useful for chatbots.", +) + +top_k_field = Field( + default=40, + ge=0, + description="Limit the next token selection to the K most probable tokens.\n\n" + + "Top-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.", +) + +repeat_penalty_field = Field( + default=1.1, + ge=0.0, + description="A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\n" + + "Repeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.", +) + +presence_penalty_field = Field( + default=0.0, + ge=-2.0, + le=2.0, + description="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", +) + +frequency_penalty_field = Field( + default=0.0, + ge=-2.0, + le=2.0, + description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", +) + +mirostat_mode_field = Field( + default=0, + ge=0, + le=2, + description="Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)", +) + +mirostat_tau_field = Field( + default=5.0, + ge=0.0, + le=10.0, + description="Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text", +) + +mirostat_eta_field = Field( + default=0.1, ge=0.001, le=1.0, description="Mirostat learning rate" +) + +grammar = Field( + default=None, + description="A CBNF grammar (as string) to be used for formatting the model's output.", +) + + +class CreateCompletionRequest(BaseModel): + prompt: Union[str, List[str]] = Field( + default="", description="The prompt to generate completions for." + ) + suffix: Optional[str] = Field( + default=None, + description="A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots.", + ) + max_tokens: Optional[int] = Field( + default=16, ge=0, description="The maximum number of tokens to generate." + ) + temperature: float = temperature_field + top_p: float = top_p_field + min_p: float = min_p_field + echo: bool = Field( + default=False, + description="Whether to echo the prompt in the generated text. Useful for chatbots.", + ) + stop: Optional[Union[str, List[str]]] = stop_field + stream: bool = stream_field + logprobs: Optional[int] = Field( + default=None, + ge=0, + description="The number of logprobs to generate. If None, no logprobs are generated.", + ) + presence_penalty: Optional[float] = presence_penalty_field + frequency_penalty: Optional[float] = frequency_penalty_field + logit_bias: Optional[Dict[str, float]] = Field(None) + seed: Optional[int] = Field(None) + + # ignored or currently unsupported + model: Optional[str] = model_field + n: Optional[int] = 1 + best_of: Optional[int] = 1 + user: Optional[str] = Field(default=None) + + # llama.cpp specific parameters + top_k: int = top_k_field + repeat_penalty: float = repeat_penalty_field + logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None) + mirostat_mode: int = mirostat_mode_field + mirostat_tau: float = mirostat_tau_field + mirostat_eta: float = mirostat_eta_field + grammar: Optional[str] = None + + model_config = { + "json_schema_extra": { + "examples": [ + { + "prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n", + "stop": ["\n", "###"], + } + ] + } + } + + +class CreateEmbeddingRequest(BaseModel): + model: Optional[str] = model_field + input: Union[str, List[str]] = Field(description="The input to embed.") + user: Optional[str] = Field(default=None) + + model_config = { + "json_schema_extra": { + "examples": [ + { + "input": "The food was delicious and the waiter...", + } + ] + } + } + + +class ChatCompletionRequestMessage(BaseModel): + role: Literal["system", "user", "assistant", "function"] = Field( + default="user", description="The role of the message." + ) + content: Optional[str] = Field( + default="", description="The content of the message." + ) + + +class CreateChatCompletionRequest(BaseModel): + messages: List[llama_cpp.ChatCompletionRequestMessage] = Field( + default=[], description="A list of messages to generate completions for." + ) + functions: Optional[List[llama_cpp.ChatCompletionFunction]] = Field( + default=None, + description="A list of functions to apply to the generated completions.", + ) + function_call: Optional[llama_cpp.ChatCompletionRequestFunctionCall] = Field( + default=None, + description="A function to apply to the generated completions.", + ) + tools: Optional[List[llama_cpp.ChatCompletionTool]] = Field( + default=None, + description="A list of tools to apply to the generated completions.", + ) + tool_choice: Optional[llama_cpp.ChatCompletionToolChoiceOption] = Field( + default=None, + description="A tool to apply to the generated completions.", + ) # TODO: verify + max_tokens: Optional[int] = Field( + default=None, + description="The maximum number of tokens to generate. Defaults to inf", + ) + logprobs: Optional[bool] = Field( + default=False, + description="Whether to output the logprobs or not. Default is True" + ) + top_logprobs: Optional[int] = Field( + default=None, + ge=0, + description="The number of logprobs to generate. If None, no logprobs are generated. logprobs need to set to True.", + ) + temperature: float = temperature_field + top_p: float = top_p_field + min_p: float = min_p_field + stop: Optional[Union[str, List[str]]] = stop_field + stream: bool = stream_field + presence_penalty: Optional[float] = presence_penalty_field + frequency_penalty: Optional[float] = frequency_penalty_field + logit_bias: Optional[Dict[str, float]] = Field(None) + seed: Optional[int] = Field(None) + response_format: Optional[llama_cpp.ChatCompletionRequestResponseFormat] = Field( + default=None, + ) + + # ignored or currently unsupported + model: Optional[str] = model_field + n: Optional[int] = 1 + user: Optional[str] = Field(None) + + # llama.cpp specific parameters + top_k: int = top_k_field + repeat_penalty: float = repeat_penalty_field + logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None) + mirostat_mode: int = mirostat_mode_field + mirostat_tau: float = mirostat_tau_field + mirostat_eta: float = mirostat_eta_field + grammar: Optional[str] = None + + model_config = { + "json_schema_extra": { + "examples": [ + { + "messages": [ + ChatCompletionRequestMessage( + role="system", content="You are a helpful assistant." + ).model_dump(), + ChatCompletionRequestMessage( + role="user", content="What is the capital of France?" + ).model_dump(), + ] + } + ] + } + } + + +class ModelData(TypedDict): + id: str + object: Literal["model"] + owned_by: str + permissions: List[str] + + +class ModelList(TypedDict): + object: Literal["list"] + data: List[ModelData] + + +class TokenizeInputRequest(BaseModel): + model: Optional[str] = model_field + input: str = Field(description="The input to tokenize.") + + model_config = { + "json_schema_extra": {"examples": [{"input": "How many tokens in this query?"}]} + } + + +class TokenizeInputResponse(BaseModel): + tokens: List[int] = Field(description="A list of tokens.") + + model_config = {"json_schema_extra": {"example": {"tokens": [123, 321, 222]}}} + + +class TokenizeInputCountResponse(BaseModel): + count: int = Field(description="The number of tokens in the input.") + + model_config = {"json_schema_extra": {"example": {"count": 5}}} + + +class DetokenizeInputRequest(BaseModel): + model: Optional[str] = model_field + tokens: List[int] = Field(description="A list of toekns to detokenize.") + + model_config = {"json_schema_extra": {"example": [{"tokens": [123, 321, 222]}]}} + + +class DetokenizeInputResponse(BaseModel): + text: str = Field(description="The detokenized text.") + + model_config = { + "json_schema_extra": {"example": {"text": "How many tokens in this query?"}} + } diff --git a/llama-cpp-python/mkdocs.yml b/llama-cpp-python/mkdocs.yml new file mode 100644 index 0000000000000000000000000000000000000000..79a9e67a1aee09c6d182f240ba5eef32feabcbce --- /dev/null +++ b/llama-cpp-python/mkdocs.yml @@ -0,0 +1,74 @@ +site_name: llama-cpp-python +repo_url: https://github.com/abetlen/llama-cpp-python + +theme: + name: material + palette: + + # Palette toggle for light mode + - scheme: default + primary: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + + # Palette toggle for dark mode + - scheme: slate + primary: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + +plugins: + - search + - mkdocstrings: + handlers: + python: + options: + members_order: source + group_by_category: false + signature_crossrefs: true + show_signature: true + docstring_section_style: list + show_root_heading: true + heading_level: 3 + preload_modules: + - typing + - typing_extensions + - ctypes + import: + - https://docs.python.org/3/objects.inv + - https://numpy.org/doc/stable/objects.inv + +watch: + - llama_cpp + - README.md + +nav: + - "Getting Started": "index.md" + - "Installation Guides": + - "macOS (Metal)": "install/macos.md" + - "API Reference": "api-reference.md" + - "OpenAI Compatible Web Server": "server.md" + - "Changelog": "changelog.md" + +markdown_extensions: + - attr_list + - pymdownx.emoji: + emoji_index: !!python/name:materialx.emoji.twemoji + emoji_generator: !!python/name:materialx.emoji.to_svg + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.magiclink: + repo_url_shorthand: true + user: abetlen + repo: llama-cpp-python + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true + - pymdownx.tilde + - tables diff --git a/llama-cpp-python/pyproject.toml b/llama-cpp-python/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..8345cb1f09a81f0003defeee8151f6050d45db50 --- /dev/null +++ b/llama-cpp-python/pyproject.toml @@ -0,0 +1,76 @@ +[build-system] +requires = ["scikit-build-core[pyproject]>=0.9.2"] +build-backend = "scikit_build_core.build" + +[project] +name = "llama_cpp_python" +dynamic = ["version"] +description = "Python bindings for the llama.cpp library" +readme = "README.md" +license = { text = "MIT" } +authors = [ + { name = "Andrei Betlen", email = "abetlen@gmail.com" }, +] +dependencies = [ + "typing-extensions>=4.5.0", + "numpy>=1.20.0", + "diskcache>=5.6.1", + "jinja2>=2.11.3", +] +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + + +[project.optional-dependencies] +server = [ + "uvicorn>=0.22.0", + "fastapi>=0.100.0", + "pydantic-settings>=2.0.1", + "sse-starlette>=1.6.1", + "starlette-context>=0.3.6,<0.4", + "PyYAML>=5.1", +] +test = [ + "pytest>=7.4.0", + "httpx>=0.24.1", + "scipy>=1.10", +] +dev = [ + "black>=23.3.0", + "twine>=4.0.2", + "mkdocs>=1.4.3", + "mkdocstrings[python]>=0.22.0", + "mkdocs-material>=9.1.18", + "pytest>=7.4.0", + "httpx>=0.24.1", +] +all = [ + "llama_cpp_python[server,test,dev]", +] + +[tool.scikit-build] +wheel.packages = ["llama_cpp"] +cmake.verbose = true +cmake.minimum-version = "3.21" +minimum-version = "0.5.1" +sdist.include = [".git", "vendor/llama.cpp/.git"] + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "llama_cpp/__init__.py" + +[project.urls] +Homepage = "https://github.com/abetlen/llama-cpp-python" +Issues = "https://github.com/abetlen/llama-cpp-python/issues" +Documentation = "https://llama-cpp-python.readthedocs.io/en/latest/" +Changelog = "https://llama-cpp-python.readthedocs.io/en/latest/changelog/" + +[tool.pytest.ini_options] +testpaths = "tests" diff --git a/llama-cpp-python/scripts/releases-to-pep-503.sh b/llama-cpp-python/scripts/releases-to-pep-503.sh new file mode 100644 index 0000000000000000000000000000000000000000..00ae56721ee0afd4c81e3aa4c731f753f0b955ea --- /dev/null +++ b/llama-cpp-python/scripts/releases-to-pep-503.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# Get output directory or default to index/whl/cpu +output_dir=${1:-"index/whl/cpu"} + +# Create output directory +mkdir -p $output_dir + +# Change to output directory +pushd $output_dir + +# Create an index html file +echo "" > index.html +echo "" >> index.html +echo " " >> index.html +echo " " >> index.html +echo " llama-cpp-python" >> index.html +echo "
" >> index.html +echo " " >> index.html +echo "" >> index.html +echo "" >> index.html + +# Create llama-cpp-python directory +mkdir -p llama-cpp-python + +# Change to llama-cpp-python directory +pushd llama-cpp-python + +# Create an index html file +echo "" > index.html +echo "" >> index.html +echo " " >> index.html +echo "

Links for llama-cpp-python

" >> index.html + +# Get all releases +releases=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases | jq -r .[].tag_name) + +# Get pattern from second arg or default to valid python package version pattern +pattern=${2:-"^[v]?[0-9]+\.[0-9]+\.[0-9]+$"} + +# Filter releases by pattern +releases=$(echo $releases | tr ' ' '\n' | grep -E $pattern) + +# For each release, get all assets +for release in $releases; do + assets=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases/tags/$release | jq -r .assets) + echo "

$release

" >> index.html + for asset in $(echo $assets | jq -r .[].browser_download_url); do + if [[ $asset == *".whl" ]]; then + echo " $asset" >> index.html + echo "
" >> index.html + fi + done +done + +echo " " >> index.html +echo "" >> index.html +echo "" >> index.html diff --git a/llama-cpp-python/tests/test_llama.py b/llama-cpp-python/tests/test_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..469ef91cabfc25afbd00d1a0c8a597c9015d3551 --- /dev/null +++ b/llama-cpp-python/tests/test_llama.py @@ -0,0 +1,293 @@ +import ctypes + +import numpy as np +import pytest +from scipy.special import log_softmax + +import llama_cpp + +MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama-spm.gguf" + + +def test_llama_cpp_tokenization(): + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, verbose=False) + + assert llama + assert llama._ctx.ctx is not None + + text = b"Hello World" + + tokens = llama.tokenize(text) + assert tokens[0] == llama.token_bos() + assert tokens == [1, 15043, 2787] + detokenized = llama.detokenize(tokens) + assert detokenized == text + + tokens = llama.tokenize(text, add_bos=False) + assert tokens[0] != llama.token_bos() + assert tokens == [15043, 2787] + + detokenized = llama.detokenize(tokens) + assert detokenized != text + + text = b"Hello World" + tokens = llama.tokenize(text) + assert tokens[-1] != llama.token_eos() + assert tokens == [1, 15043, 2787, 829, 29879, 29958] + + tokens = llama.tokenize(text, special=True) + assert tokens[-1] == llama.token_eos() + assert tokens == [1, 15043, 2787, 2] + + text = b"" + tokens = llama.tokenize(text, add_bos=True, special=True) + assert tokens[-1] != llama.token_eos() + assert tokens == [llama.token_bos()] + assert text == llama.detokenize(tokens) + + +@pytest.fixture +def mock_llama(monkeypatch): + def setup_mock(llama: llama_cpp.Llama, output_text: str): + n_ctx = llama.n_ctx() + n_vocab = llama.n_vocab() + output_tokens = llama.tokenize( + output_text.encode("utf-8"), add_bos=True, special=True + ) + logits = (ctypes.c_float * (n_vocab * n_ctx))(-100.0) + for i in range(n_ctx): + output_idx = i + 1 # logits for first tokens predict second token + if output_idx < len(output_tokens): + logits[i * n_vocab + output_tokens[output_idx]] = 100.0 + else: + logits[i * n_vocab + llama.token_eos()] = 100.0 + n = 0 + last_n_tokens = 0 + + def mock_decode(ctx: llama_cpp.llama_context_p, batch: llama_cpp.llama_batch): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + assert batch.n_tokens > 0, "no tokens in batch" + assert all( + batch.n_seq_id[i] == 1 for i in range(batch.n_tokens) + ), "n_seq >1 not supported by mock_llama" + assert all( + batch.seq_id[i][0] == 0 for i in range(batch.n_tokens) + ), "n_seq >1 not supported by mock_llama" + assert batch.logits[ + batch.n_tokens - 1 + ], "logits not allocated for last token" + # Update the mock context state + nonlocal n + nonlocal last_n_tokens + n = max(batch.pos[i] for i in range(batch.n_tokens)) + 1 + last_n_tokens = batch.n_tokens + return 0 + + def mock_get_logits(ctx: llama_cpp.llama_context_p): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + assert n > 0, "mock_llama_decode not called" + assert last_n_tokens > 0, "mock_llama_decode not called" + # Return view of logits for last_n_tokens + return (ctypes.c_float * (last_n_tokens * n_vocab)).from_address( + ctypes.addressof(logits) + + (n - last_n_tokens) * n_vocab * ctypes.sizeof(ctypes.c_float) + ) + + monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits) + + def mock_kv_cache_clear(ctx: llama_cpp.llama_context_p): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + return + + def mock_kv_cache_seq_rm( + ctx: llama_cpp.llama_context_p, + seq_id: llama_cpp.llama_seq_id, + pos0: llama_cpp.llama_pos, + pos1: llama_cpp.llama_pos, + ): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + return + + def mock_kv_cache_seq_cp( + ctx: llama_cpp.llama_context_p, + seq_id_src: llama_cpp.llama_seq_id, + seq_id_dst: llama_cpp.llama_seq_id, + pos0: llama_cpp.llama_pos, + pos1: llama_cpp.llama_pos, + ): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + return + + def mock_kv_cache_seq_keep( + ctx: llama_cpp.llama_context_p, + seq_id: llama_cpp.llama_seq_id, + ): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + return + + def mock_kv_cache_seq_add( + ctx: llama_cpp.llama_context_p, + seq_id: llama_cpp.llama_seq_id, + pos0: llama_cpp.llama_pos, + pos1: llama_cpp.llama_pos, + ): + # Test some basic invariants of this mocking technique + assert ctx == llama._ctx.ctx, "context does not match mock_llama" + return + + monkeypatch.setattr("llama_cpp.llama_cpp.llama_kv_cache_clear", mock_kv_cache_clear) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_kv_cache_seq_rm", mock_kv_cache_seq_rm) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_kv_cache_seq_cp", mock_kv_cache_seq_cp) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_kv_cache_seq_keep", mock_kv_cache_seq_keep) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_kv_cache_seq_add", mock_kv_cache_seq_add) + + return setup_mock + + +def test_llama_patch(mock_llama): + n_ctx = 128 + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx) + n_vocab = llama_cpp.llama_n_vocab(llama._model.model) + assert n_vocab == 32000 + + text = "The quick brown fox" + output_text = " jumps over the lazy dog." + all_text = text + output_text + + ## Test basic completion from bos until eos + mock_llama(llama, all_text) + completion = llama.create_completion("", max_tokens=36) + assert completion["choices"][0]["text"] == all_text + assert completion["choices"][0]["finish_reason"] == "stop" + + ## Test basic completion until eos + mock_llama(llama, all_text) + completion = llama.create_completion(text, max_tokens=20) + assert completion["choices"][0]["text"] == output_text + assert completion["choices"][0]["finish_reason"] == "stop" + + ## Test streaming completion until eos + mock_llama(llama, all_text) + chunks = list(llama.create_completion(text, max_tokens=20, stream=True)) + assert "".join(chunk["choices"][0]["text"] for chunk in chunks) == output_text + assert chunks[-1]["choices"][0]["finish_reason"] == "stop" + + ## Test basic completion until stop sequence + mock_llama(llama, all_text) + completion = llama.create_completion(text, max_tokens=20, stop=["lazy"]) + assert completion["choices"][0]["text"] == " jumps over the " + assert completion["choices"][0]["finish_reason"] == "stop" + + ## Test streaming completion until stop sequence + mock_llama(llama, all_text) + chunks = list( + llama.create_completion(text, max_tokens=20, stream=True, stop=["lazy"]) + ) + assert ( + "".join(chunk["choices"][0]["text"] for chunk in chunks) == " jumps over the " + ) + assert chunks[-1]["choices"][0]["finish_reason"] == "stop" + + ## Test basic completion until length + mock_llama(llama, all_text) + completion = llama.create_completion(text, max_tokens=2) + assert completion["choices"][0]["text"] == " jumps" + assert completion["choices"][0]["finish_reason"] == "length" + + ## Test streaming completion until length + mock_llama(llama, all_text) + chunks = list(llama.create_completion(text, max_tokens=2, stream=True)) + assert "".join(chunk["choices"][0]["text"] for chunk in chunks) == " jumps" + assert chunks[-1]["choices"][0]["finish_reason"] == "length" + + +def test_llama_pickle(): + import pickle + import tempfile + + fp = tempfile.TemporaryFile() + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True) + pickle.dump(llama, fp) + fp.seek(0) + llama = pickle.load(fp) + + assert llama + assert llama.ctx is not None + + text = b"Hello World" + + assert llama.detokenize(llama.tokenize(text)) == text + + +def test_utf8(mock_llama): + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, logits_all=True) + + output_text = "😀" + + ## Test basic completion with utf8 multibyte + mock_llama(llama, output_text) + completion = llama.create_completion("", max_tokens=4) + assert completion["choices"][0]["text"] == output_text + + ## Test basic completion with incomplete utf8 multibyte + mock_llama(llama, output_text) + completion = llama.create_completion("", max_tokens=1) + assert completion["choices"][0]["text"] == "" + + +def test_llama_server(): + from fastapi.testclient import TestClient + from llama_cpp.server.app import create_app, Settings + + settings = Settings( + model=MODEL, + vocab_only=True, + ) + app = create_app(settings) + client = TestClient(app) + response = client.get("/v1/models") + assert response.json() == { + "object": "list", + "data": [ + { + "id": MODEL, + "object": "model", + "owned_by": "me", + "permissions": [], + } + ], + } + + +@pytest.mark.parametrize( + "size_and_axis", + [ + ((32_000,), -1), # last token's next-token logits + ((10, 32_000), -1), # many tokens' next-token logits, or batch of last tokens + ((4, 10, 32_000), -1), # batch of texts + ], +) +@pytest.mark.parametrize("convert_to_list", [True, False]) +def test_logits_to_logprobs(size_and_axis, convert_to_list: bool, atol: float = 1e-7): + size, axis = size_and_axis + logits: np.ndarray = -np.random.uniform(low=0, high=60, size=size) + logits = logits.astype(np.single) + if convert_to_list: + # Currently, logits are converted from arrays to lists. This may change soon + logits = logits.tolist() + log_probs = llama_cpp.Llama.logits_to_logprobs(logits, axis=axis) + log_probs_correct = log_softmax(logits, axis=axis) + assert log_probs.dtype == np.single + assert log_probs.shape == size + assert np.allclose(log_probs, log_probs_correct, atol=atol) + + +def test_llama_cpp_version(): + assert llama_cpp.__version__ diff --git a/llama-cpp-python/tests/test_llama_chat_format.py b/llama-cpp-python/tests/test_llama_chat_format.py new file mode 100644 index 0000000000000000000000000000000000000000..c10aee42e0da547428df7cac9845e246badf1803 --- /dev/null +++ b/llama-cpp-python/tests/test_llama_chat_format.py @@ -0,0 +1,88 @@ +import json + +import jinja2 + +from llama_cpp import ( + ChatCompletionRequestUserMessage, +) +import llama_cpp.llama_types as llama_types +import llama_cpp.llama_chat_format as llama_chat_format + +from llama_cpp.llama_chat_format import hf_tokenizer_config_to_chat_formatter + +def test_mistral_instruct(): + chat_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" + chat_formatter = jinja2.Template(chat_template) + messages = [ + llama_types.ChatCompletionRequestUserMessage(role="user", content="Instruction"), + llama_types.ChatCompletionRequestAssistantMessage(role="assistant", content="Model answer"), + llama_types.ChatCompletionRequestUserMessage(role="user", content="Follow-up instruction"), + ] + response = llama_chat_format.format_mistral_instruct( + messages=messages, + ) + reference = chat_formatter.render( + messages=messages, + bos_token="", + eos_token="", + ) + assert response.prompt == reference + + +mistral_7b_tokenizer_config = """{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" +}""" + + +def test_hf_tokenizer_config_str_to_chat_formatter(): + tokenizer_config = json.loads(mistral_7b_tokenizer_config) + chat_formatter = hf_tokenizer_config_to_chat_formatter( + tokenizer_config + ) + chat_formatter_respoonse = chat_formatter( + messages=[ + ChatCompletionRequestUserMessage(role="user", content="Hello, world!"), + ] + ) + + assert chat_formatter_respoonse.prompt == ("[INST] Hello, world! [/INST]" "") diff --git a/llama-cpp-python/tests/test_llama_grammar.py b/llama-cpp-python/tests/test_llama_grammar.py new file mode 100644 index 0000000000000000000000000000000000000000..cb221880a66e3c1f2ca15a9df52ac4bcb765e7d4 --- /dev/null +++ b/llama-cpp-python/tests/test_llama_grammar.py @@ -0,0 +1,78 @@ +import llama_cpp +import json + +tree = """ +leaf ::= "." +node ::= leaf | "(" node node ")" +root ::= node +""" + + +def test_grammar_from_string(): + grammar = llama_cpp.LlamaGrammar.from_string(tree) + assert grammar._n_rules == 3 + assert grammar._start_rule_index == 2 + assert grammar.grammar is not None + + +def test_composed_pydantic_grammar(): + """ + from pydantic import BaseModel + + class A(BaseModel): + a: int + + class B(BaseModel): + a: A + b: int + """ + + # This schema corresponds to the grammar in the comment above. + # We don't use the pydantic models directly to avoid the dependency. + schema = { + "$defs": { + "A": { + "properties": {"a": {"title": "A", "type": "integer"}}, + "required": ["a"], + "title": "A", + "type": "object", + } + }, + "properties": { + "a": {"$ref": "#/$defs/A"}, + "b": {"title": "B", "type": "integer"}, + }, + "required": ["a", "b"], + "title": "B", + "type": "object", + } + + grammar = llama_cpp.LlamaGrammar.from_json_schema(json.dumps(schema)) + + assert grammar.grammar is not None + + +def test_grammar_anyof(): + sch = { + "properties": { + "temperature": { + "description": "The temperature mentioned", + "type": "number", + }, + "unit": { + "anyOf": [ + { + "description": "Unit for temperature", + "enum": ["celsius", "fahrenheit"], + "type": "string", + }, + {"type": "null"}, + ], + }, + }, + "type": "object", + } + + grammar = llama_cpp.LlamaGrammar.from_json_schema(json.dumps(sch)) + + assert grammar.grammar is not None \ No newline at end of file diff --git a/llama-cpp-python/tests/test_llama_speculative.py b/llama-cpp-python/tests/test_llama_speculative.py new file mode 100644 index 0000000000000000000000000000000000000000..b5d450567b052dcb5a687b21fcc5d67d2229cca7 --- /dev/null +++ b/llama-cpp-python/tests/test_llama_speculative.py @@ -0,0 +1,16 @@ +import numpy as np + +from llama_cpp.llama_speculative import LlamaPromptLookupDecoding + +def test_find_candidate_pred_tokens(): + find_candidate_pred_tokens = LlamaPromptLookupDecoding.find_candidate_pred_tokens + + # Test Case 1: Matching ngram is found + input_ids1 = np.array([1, 2, 3, 1, 2, 3, 1, 2, 3]) + result1 = find_candidate_pred_tokens(input_ids1, max_ngram_size=3, num_pred_tokens=2) + assert np.array_equal(result1, np.array([1, 2])) + + # Test Case 2: Matching ngram is not found + input_ids2 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) + result2 = find_candidate_pred_tokens(input_ids2, max_ngram_size=3, num_pred_tokens=2) + assert np.array_equal(result2, np.array([])) diff --git a/llama-cpp-python/vendor/llama.cpp/.clang-tidy b/llama-cpp-python/vendor/llama.cpp/.clang-tidy new file mode 100644 index 0000000000000000000000000000000000000000..952c0cca82580e2068864c6ee2344c167a4aff02 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.clang-tidy @@ -0,0 +1,24 @@ +--- +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-misplaced-widening-cast, + -bugprone-narrowing-conversions, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-uppercase-literal-suffix, + -readability-simplify-boolean-expr, + clang-analyzer-*, + -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + performance-*, + portability-*, + misc-*, + -misc-const-correctness, + -misc-non-private-member-variables-in-classes, + -misc-no-recursion, +FormatStyle: none diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/cloud-v-pipeline b/llama-cpp-python/vendor/llama.cpp/.devops/cloud-v-pipeline new file mode 100644 index 0000000000000000000000000000000000000000..f3a4944f8a419fdaaca79296006511e0bbfd0401 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/cloud-v-pipeline @@ -0,0 +1,22 @@ +node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries + stage('Cleanup'){ + cleanWs() // Cleaning previous CI build in workspace + } + stage('checkout repo'){ + retry(5){ // Retry if the cloning fails due to some reason + checkout scm // Clone the repo on Runner + } + } + stage('Compiling llama.cpp'){ + sh'''#!/bin/bash + make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V + ''' + } + stage('Running llama.cpp'){ + sh'''#!/bin/bash + module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc + qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64 + cat llama_log.txt # Printing results + ''' + } +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/full-cuda.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/full-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..059fd26950607a9a911e0a8d9e2c6055fb118f19 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/full-cuda.Dockerfile @@ -0,0 +1,36 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=11.7.1 + +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +ARG CUDA_DOCKER_ARCH=all + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} +# Enable CUDA +ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 + +RUN make + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/full-rocm.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/full-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6ecf3bcc7cb83817ef03ee60b46cdb25ece6cf81 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/full-rocm.Dockerfile @@ -0,0 +1,50 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH=\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV LLAMA_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ + +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +RUN make + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/full.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/full.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..432fb5dadcbca5f73f7d87e70190a26aa663019b --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/full.Dockerfile @@ -0,0 +1,25 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +ENV LLAMA_CURL=1 + + +RUN make + +ENV LC_ALL=C.utf8 + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-clblast.srpm.spec b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-clblast.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..774f63ddd5c4ea0c7753dae1ec615b13ecdda46d --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-clblast.srpm.spec @@ -0,0 +1,84 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp-clblast +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: OpenCL Inference of LLaMA model in C/C++ +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel +Requires: clblast +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. + +%prep +%setup -n llama.cpp-master + +%build +make -j LLAMA_CLBLAST=1 + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p main %{buildroot}%{_bindir}/llamaclblast +cp -p server %{buildroot}%{_bindir}/llamaclblastserver +cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llamaclblast.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llamaclblast +%{_bindir}/llamaclblastserver +%{_bindir}/llamaclblastsimple +/usr/lib/systemd/system/llamaclblast.service +%config /etc/sysconfig/llama + + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-cuda.srpm.spec b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-cuda.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..ba9cb7cbb824fc03f8274bf26494d1a5d946ad73 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp-cuda.srpm.spec @@ -0,0 +1,83 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp-cuda +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git cuda-toolkit +Requires: cuda-toolkit +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. + +%prep +%setup -n llama.cpp-master + +%build +make -j LLAMA_CUDA=1 + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p main %{buildroot}%{_bindir}/llamacppcuda +cp -p server %{buildroot}%{_bindir}/llamacppcudaserver +cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llamacuda.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llamacppcuda +%{_bindir}/llamacppcudaserver +%{_bindir}/llamacppcudasimple +/usr/lib/systemd/system/llamacuda.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp.srpm.spec b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp.srpm.spec new file mode 100644 index 0000000000000000000000000000000000000000..1d9e4f425b43add0b46fbc90cd79160f85915ec2 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/llama-cpp.srpm.spec @@ -0,0 +1,85 @@ +# SRPM for building from source and packaging an RPM for RPM-based distros. +# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages +# Built and maintained by John Boero - boeroboy@gmail.com +# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal + +# Notes for llama.cpp: +# 1. Tags are currently based on hash - which will not sort asciibetically. +# We need to declare standard versioning if people want to sort latest releases. +# In the meantime, YYYYMMDD format will be used. +# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. +# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. +# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo +# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. +# It is up to the user to install the correct vendor-specific support. + +Name: llama.cpp +Version: %( date "+%%Y%%m%%d" ) +Release: 1%{?dist} +Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) +License: MIT +Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz +BuildRequires: coreutils make gcc-c++ git libstdc++-devel +Requires: libstdc++ +URL: https://github.com/ggerganov/llama.cpp + +%define debug_package %{nil} +%define source_date_epoch_from_changelog 0 + +%description +CPU inference for Meta's Lllama2 models using default options. +Models are not included in this package and must be downloaded separately. + +%prep +%setup -n llama.cpp-master + +%build +make -j + +%install +mkdir -p %{buildroot}%{_bindir}/ +cp -p main %{buildroot}%{_bindir}/llama +cp -p server %{buildroot}%{_bindir}/llamaserver +cp -p simple %{buildroot}%{_bindir}/llamasimple + +mkdir -p %{buildroot}/usr/lib/systemd/system +%{__cat} < %{buildroot}/usr/lib/systemd/system/llama.service +[Unit] +Description=Llama.cpp server, CPU only (no GPU support in this build). +After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target + +[Service] +Type=simple +EnvironmentFile=/etc/sysconfig/llama +ExecStart=/usr/bin/llamaserver $LLAMA_ARGS +ExecReload=/bin/kill -s HUP $MAINPID +Restart=never + +[Install] +WantedBy=default.target +EOF + +mkdir -p %{buildroot}/etc/sysconfig +%{__cat} < %{buildroot}/etc/sysconfig/llama +LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" +EOF + +%clean +rm -rf %{buildroot} +rm -rf %{_builddir}/* + +%files +%{_bindir}/llama +%{_bindir}/llamaserver +%{_bindir}/llamasimple +/usr/lib/systemd/system/llama.service +%config /etc/sysconfig/llama + +%pre + +%post + +%preun +%postun + +%changelog diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/main-cuda.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/main-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..b937a482988b6cd2f87acbcdafa7a9a1fada78c3 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/main-cuda.Dockerfile @@ -0,0 +1,32 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=11.7.1 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the CUDA runtime image +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +ARG CUDA_DOCKER_ARCH=all + +RUN apt-get update && \ + apt-get install -y build-essential git + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} +# Enable CUDA +ENV LLAMA_CUDA=1 + +RUN make + +FROM ${BASE_CUDA_RUN_CONTAINER} as runtime + +COPY --from=build /app/main /main + +ENTRYPOINT [ "/main" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/main-intel.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/main-intel.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..274b91b71bfbae8065a29caf634252d0449011be --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/main-intel.Dockerfile @@ -0,0 +1,26 @@ +ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 + +FROM intel/oneapi-basekit:$ONEAPI_VERSION as build + +ARG LLAMA_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git + +WORKDIR /app + +COPY . . + +RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ + echo "LLAMA_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ + fi && \ + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \ + cmake --build build --config Release --target main + +FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime + +COPY --from=build /app/build/bin/main /main + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/main" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/main-rocm.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/main-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0a706dc73227d0b73aecac0a58baaaeccbd791bd --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/main-rocm.Dockerfile @@ -0,0 +1,45 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH=\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV LLAMA_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ + +RUN make + +ENTRYPOINT [ "/app/main" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/main-vulkan.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/main-vulkan.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6c2b2ed5b05d88f33ee3e9914352685473342b94 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/main-vulkan.Dockerfile @@ -0,0 +1,27 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION as build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk + +# Build it +WORKDIR /app +COPY . . +RUN cmake -B build -DLLAMA_VULKAN=1 && \ + cmake --build build --config Release --target main + +# Clean up +WORKDIR / +RUN cp /app/build/bin/main /main && \ + rm -rf /app + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/main" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/main.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/main.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..3ab1decd6c2b5515eed5f7254026cc7f08acb081 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/main.Dockerfile @@ -0,0 +1,20 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential git + +WORKDIR /app + +COPY . . + +RUN make + +FROM ubuntu:$UBUNTU_VERSION as runtime + +COPY --from=build /app/main /main + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/main" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/apps.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/apps.nix new file mode 100644 index 0000000000000000000000000000000000000000..b8a12cc0a0463063a6083acd16e4bf6491420a77 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/apps.nix @@ -0,0 +1,22 @@ +{ + perSystem = + { config, lib, ... }: + { + apps = + let + inherit (config.packages) default; + binaries = [ + "llama" + "llama-embedding" + "llama-server" + "quantize" + "train-text-from-scratch" + ]; + mkApp = name: { + type = "app"; + program = "${default}/bin/${name}"; + }; + in + lib.genAttrs binaries mkApp; + }; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/devshells.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/devshells.nix new file mode 100644 index 0000000000000000000000000000000000000000..1862f0f085100117c75a66088cd614c9eee80202 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/devshells.nix @@ -0,0 +1,13 @@ +{ + perSystem = + { config, lib, ... }: + { + devShells = + lib.concatMapAttrs + (name: package: { + ${name} = package.passthru.shell; + ${name + "-extra"} = package.passthru.shell-extra; + }) + config.packages; + }; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/docker.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/docker.nix new file mode 100644 index 0000000000000000000000000000000000000000..d607b4575772c5330e962649ff6e14e5562ecfad --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/docker.nix @@ -0,0 +1,37 @@ +{ + lib, + dockerTools, + buildEnv, + llama-cpp, + interactive ? true, + coreutils, +}: + +# A tar that can be fed into `docker load`: +# +# $ nix build .#llamaPackages.docker +# $ docker load < result + +# For details and variations cf. +# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage +# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922 +# - https://nixery.dev/ + +# Approximate (compressed) sizes, at the time of writing, are: +# +# .#llamaPackages.docker: 125M; +# .#llamaPackagesCuda.docker: 537M; +# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M. + +dockerTools.buildLayeredImage { + name = llama-cpp.pname; + tag = "latest"; + + contents = + [ llama-cpp ] + ++ lib.optionals interactive [ + coreutils + dockerTools.binSh + dockerTools.caCertificates + ]; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/jetson-support.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/jetson-support.nix new file mode 100644 index 0000000000000000000000000000000000000000..78e2e40e03864e3df046389f7b751a1fd4575656 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/jetson-support.nix @@ -0,0 +1,39 @@ +{ inputs, ... }: +{ + perSystem = + { + config, + system, + lib, + pkgsCuda, + ... + }: + { + legacyPackages = + let + caps.llamaPackagesXavier = "7.2"; + caps.llamaPackagesOrin = "8.7"; + caps.llamaPackagesTX2 = "6.2"; + caps.llamaPackagesNano = "5.3"; + + pkgsFor = + cap: + import inputs.nixpkgs { + inherit system; + config = { + cudaSupport = true; + cudaCapabilities = [ cap ]; + cudaEnableForwardCompat = false; + inherit (pkgsCuda.config) allowUnfreePredicate; + }; + }; + in + builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps; + + packages = lib.optionalAttrs (system == "aarch64-linux") { + jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp; + jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp; + jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp; + }; + }; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix new file mode 100644 index 0000000000000000000000000000000000000000..4a2f81c4bfd044835a89bbfca09e039172dee354 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/nixpkgs-instances.nix @@ -0,0 +1,47 @@ +{ inputs, ... }: +{ + # The _module.args definitions are passed on to modules as arguments. E.g. + # the module `{ pkgs ... }: { /* config */ }` implicitly uses + # `_module.args.pkgs` (defined in this case by flake-parts). + perSystem = + { system, ... }: + { + _module.args = { + # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs + # again, the below creates several nixpkgs instances which the + # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`. + # + # This is currently "slow" and "expensive", on a certain scale. + # This also isn't "right" in that this hinders dependency injection at + # the level of flake inputs. This might get removed in the foreseeable + # future. + # + # Note that you can use these expressions without Nix + # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point). + + pkgsCuda = import inputs.nixpkgs { + inherit system; + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all + ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) + (p.meta.licenses or [ p.meta.license ]); + }; + # Ensure dependencies use ROCm consistently + pkgsRocm = import inputs.nixpkgs { + inherit system; + config.rocmSupport = true; + }; + }; + }; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/package.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/package.nix new file mode 100644 index 0000000000000000000000000000000000000000..2c0ae4e2a071b2c6bd0d9c95de4a8f1fe027eac8 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/package.nix @@ -0,0 +1,319 @@ +{ + lib, + glibc, + config, + stdenv, + mkShell, + runCommand, + cmake, + ninja, + pkg-config, + git, + python3, + mpi, + blas, + cudaPackages, + darwin, + rocmPackages, + vulkan-headers, + vulkan-loader, + clblast, + useBlas ? builtins.all (x: !x) [ + useCuda + useMetalKit + useOpenCL + useRocm + useVulkan + ] && blas.meta.available, + useCuda ? config.cudaSupport, + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, + useMpi ? false, # Increases the runtime closure size by ~700M + useOpenCL ? false, + useRocm ? config.rocmSupport, + useVulkan ? false, + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake + + # It's necessary to consistently use backendStdenv when building with CUDA support, + # otherwise we get libstdc++ errors downstream. + effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, + enableStatic ? effectiveStdenv.hostPlatform.isStatic, + precompileMetalShaders ? false +}@inputs: + +let + inherit (lib) + cmakeBool + cmakeFeature + optionals + strings + versionOlder + ; + + stdenv = throw "Use effectiveStdenv instead"; + + suffices = + lib.optionals useBlas [ "BLAS" ] + ++ lib.optionals useCuda [ "CUDA" ] + ++ lib.optionals useMetalKit [ "MetalKit" ] + ++ lib.optionals useMpi [ "MPI" ] + ++ lib.optionals useOpenCL [ "OpenCL" ] + ++ lib.optionals useRocm [ "ROCm" ] + ++ lib.optionals useVulkan [ "Vulkan" ]; + + pnameSuffix = + strings.optionalString (suffices != [ ]) + "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; + descriptionSuffix = + strings.optionalString (suffices != [ ]) + ", accelerated with ${strings.concatStringsSep ", " suffices}"; + + executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; + + # TODO: package the Python in this repository in a Nix-like way. + # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo + # is PEP 517-compatible, and ensure the correct .dist-info is generated. + # https://peps.python.org/pep-0517/ + # + # TODO: Package up each Python script or service appropriately, by making + # them into "entrypoints" + llama-python = python3.withPackages ( + ps: [ + ps.numpy + ps.sentencepiece + ] + ); + + # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime + llama-python-extra = python3.withPackages ( + ps: [ + ps.numpy + ps.sentencepiece + ps.tiktoken + ps.torchWithoutCuda + ps.transformers + ] + ); + + xcrunHost = runCommand "xcrunHost" {} '' + mkdir -p $out/bin + ln -s /usr/bin/xcrun $out/bin + ''; + + # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 + # separately + darwinBuildInputs = + with darwin.apple_sdk.frameworks; + [ + Accelerate + CoreVideo + CoreGraphics + ] + ++ optionals useMetalKit [ MetalKit ]; + + cudaBuildInputs = with cudaPackages; [ + cuda_cccl.dev # + + # A temporary hack for reducing the closure size, remove once cudaPackages + # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 + cuda_cudart.dev + cuda_cudart.lib + cuda_cudart.static + libcublas.dev + libcublas.lib + libcublas.static + ]; + + rocmBuildInputs = with rocmPackages; [ + clr + hipblas + rocblas + ]; + + vulkanBuildInputs = [ + vulkan-headers + vulkan-loader + ]; +in + +effectiveStdenv.mkDerivation ( + finalAttrs: { + pname = "llama-cpp${pnameSuffix}"; + version = llamaVersion; + + # Note: none of the files discarded here are visible in the sandbox or + # affect the output hash. This also means they can be modified without + # triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + noneOf = builtins.all (x: !x); + baseName = baseNameOf name; + in + noneOf [ + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths + (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths + (lib.hasPrefix "." baseName) # Skip hidden files and directories + (baseName == "flake.lock") + ]; + src = lib.cleanSource ../../.; + }; + + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" + ''; + + # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, + # `default.metallib` may be compiled with Metal compiler from XCode + # and we need to escape sandbox on MacOS to access Metal compiler. + # `xcrun` is used find the path of the Metal compiler, which is varible + # and not on $PATH + # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion + __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; + + nativeBuildInputs = + [ + cmake + ninja + pkg-config + git + ] + ++ optionals useCuda [ + cudaPackages.cuda_nvcc + + # TODO: Replace with autoAddDriverRunpath + # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged + cudaPackages.autoAddOpenGLRunpathHook + ] + ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ + glibc.static + ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ + xcrunHost + ]; + + buildInputs = + optionals effectiveStdenv.isDarwin darwinBuildInputs + ++ optionals useCuda cudaBuildInputs + ++ optionals useMpi [ mpi ] + ++ optionals useOpenCL [ clblast ] + ++ optionals useRocm rocmBuildInputs + ++ optionals useBlas [ blas ] + ++ optionals useVulkan vulkanBuildInputs; + + cmakeFlags = + [ + (cmakeBool "LLAMA_NATIVE" false) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_BLAS" useBlas) + (cmakeBool "LLAMA_CLBLAST" useOpenCL) + (cmakeBool "LLAMA_CUDA" useCuda) + (cmakeBool "LLAMA_HIPBLAS" useRocm) + (cmakeBool "LLAMA_METAL" useMetalKit) + (cmakeBool "LLAMA_MPI" useMpi) + (cmakeBool "LLAMA_VULKAN" useVulkan) + (cmakeBool "LLAMA_STATIC" enableStatic) + ] + ++ optionals useCuda [ + ( + with cudaPackages.flags; + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) + ) + ) + ] + ++ optionals useRocm [ + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt + # and select the line that matches the current nixpkgs version of rocBLAS. + # Should likely use `rocmPackages.clr.gpuTargets`. + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + ] + ++ optionals useMetalKit [ + (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") + (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) + ]; + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix} + mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix} + mkdir -p $out/include + cp $src/llama.h $out/include/ + ''; + + # Define the shells here, but don't add in the inputsFrom to avoid recursion. + passthru = { + inherit + useBlas + useCuda + useMetalKit + useMpi + useOpenCL + useRocm + useVulkan + ; + + shell = mkShell { + name = "shell-${finalAttrs.finalPackage.name}"; + description = "contains numpy and sentencepiece"; + buildInputs = [ llama-python ]; + inputsFrom = [ finalAttrs.finalPackage ]; + shellHook = '' + addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" + ''; + }; + + shell-extra = mkShell { + name = "shell-extra-${finalAttrs.finalPackage.name}"; + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; + buildInputs = [ llama-python-extra ]; + inputsFrom = [ finalAttrs.finalPackage ]; + }; + }; + + meta = { + # Configurations we don't want even the CI to evaluate. Results in the + # "unsupported platform" messages. This is mostly a no-op, because + # cudaPackages would've refused to evaluate anyway. + badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; + + # Configurations that are known to result in build failures. Can be + # overridden by importing Nixpkgs with `allowBroken = true`. + broken = (useMetalKit && !effectiveStdenv.isDarwin); + + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + homepage = "https://github.com/ggerganov/llama.cpp/"; + license = lib.licenses.mit; + + # Accommodates `nix run` and `lib.getExe` + mainProgram = "llama"; + + # These people might respond, on the best effort basis, if you ping them + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. + # Consider adding yourself to this list if you want to ensure this flake + # stays maintained and you're willing to invest your time. Do not add + # other people without their consent. Consider removing people after + # they've been unreachable for long periods of time. + + # Note that lib.maintainers is defined in Nixpkgs, but you may just add + # an attrset following the same format as in + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; + + # Extend `badPlatforms` instead + platforms = lib.platforms.all; + }; + } +) diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/scope.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/scope.nix new file mode 100644 index 0000000000000000000000000000000000000000..78530c9e8a2301e4716b7e2c30da5f64dd289692 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/scope.nix @@ -0,0 +1,19 @@ +{ + lib, + newScope, + llamaVersion ? "0.0.0", +}: + +# We're using `makeScope` instead of just writing out an attrset +# because it allows users to apply overlays later using `overrideScope'`. +# Cf. https://noogle.dev/f/lib/makeScope + +lib.makeScope newScope ( + self: { + inherit llamaVersion; + llama-cpp = self.callPackage ./package.nix { }; + docker = self.callPackage ./docker.nix { }; + docker-min = self.callPackage ./docker.nix { interactive = false; }; + sif = self.callPackage ./sif.nix { }; + } +) diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/nix/sif.nix b/llama-cpp-python/vendor/llama.cpp/.devops/nix/sif.nix new file mode 100644 index 0000000000000000000000000000000000000000..7a5e1dd0ffc4c61e9b88b25d14a10afbd4f8cda9 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/nix/sif.nix @@ -0,0 +1,27 @@ +{ + lib, + singularity-tools, + llama-cpp, + bashInteractive, + interactive ? false, +}: + +let + optionalInt = cond: x: if cond then x else 0; +in +singularity-tools.buildImage rec { + inherit (llama-cpp) name; + contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ]; + + # These are excessive (but safe) for most variants. Building singularity + # images requires superuser privileges, so we build them inside a VM in a + # writable image of pre-determined size. + # + # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846 + # + # Expected image sizes: + # - cpu/blas: 150M, + # - cuda, all gencodes: 560M, + diskSize = 4096 + optionalInt llama-cpp.useRocm 16384; + memSize = diskSize; +} diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/server-cuda.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/server-cuda.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..59a52ba21a3f1491b9c5ce1ba25b24bf7230d2be --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/server-cuda.Dockerfile @@ -0,0 +1,37 @@ +ARG UBUNTU_VERSION=22.04 +# This needs to generally match the container host's environment. +ARG CUDA_VERSION=11.7.1 +# Target the CUDA build image +ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} +# Target the CUDA runtime image +ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} + +FROM ${BASE_CUDA_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +ARG CUDA_DOCKER_ARCH=all + +RUN apt-get update && \ + apt-get install -y build-essential git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} +# Enable CUDA +ENV LLAMA_CUDA=1 +# Enable cURL +ENV LLAMA_CURL=1 + +RUN make + +FROM ${BASE_CUDA_RUN_CONTAINER} as runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +COPY --from=build /app/server /server + +ENTRYPOINT [ "/server" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/server-intel.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/server-intel.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a8e451fa917ca9cb983474eb7e0f51fdd9b15aad --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/server-intel.Dockerfile @@ -0,0 +1,29 @@ +ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04 + +FROM intel/oneapi-basekit:$ONEAPI_VERSION as build + +ARG LLAMA_SYCL_F16=OFF +RUN apt-get update && \ + apt-get install -y git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \ + echo "LLAMA_SYCL_F16 is set" && \ + export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \ + fi && \ + cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ + cmake --build build --config Release --target server + +FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +COPY --from=build /app/build/bin/server /server + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/server" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/server-rocm.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/server-rocm.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c02a31dd8c756089b99fba4753ddc2396e8503e4 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/server-rocm.Dockerfile @@ -0,0 +1,50 @@ +ARG UBUNTU_VERSION=22.04 + +# This needs to generally match the container host's environment. +ARG ROCM_VERSION=5.6 + +# Target the CUDA build image +ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete + +FROM ${BASE_ROCM_DEV_CONTAINER} as build + +# Unless otherwise specified, we make a fat build. +# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 +# This is mostly tied to rocBLAS supported archs. +ARG ROCM_DOCKER_ARCH=\ + gfx803 \ + gfx900 \ + gfx906 \ + gfx908 \ + gfx90a \ + gfx1010 \ + gfx1030 \ + gfx1100 \ + gfx1101 \ + gfx1102 + +COPY requirements.txt requirements.txt +COPY requirements requirements + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +# Set nvcc architecture +ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} +# Enable ROCm +ENV LLAMA_HIPBLAS=1 +ENV CC=/opt/rocm/llvm/bin/clang +ENV CXX=/opt/rocm/llvm/bin/clang++ + +# Enable cURL +ENV LLAMA_CURL=1 +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +RUN make + +ENTRYPOINT [ "/app/server" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/server-vulkan.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/server-vulkan.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6e757e171efeec46b8cf0091194e62650b7f247f --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/server-vulkan.Dockerfile @@ -0,0 +1,31 @@ +ARG UBUNTU_VERSION=jammy + +FROM ubuntu:$UBUNTU_VERSION as build + +# Install build tools +RUN apt update && apt install -y git build-essential cmake wget + +# Install Vulkan SDK +RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \ + apt update -y && \ + apt-get install -y vulkan-sdk + +# Install cURL +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +# Build it +WORKDIR /app +COPY . . +RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \ + cmake --build build --config Release --target server + +# Clean up +WORKDIR / +RUN cp /app/build/bin/server /server && \ + rm -rf /app + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/server" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/server.Dockerfile b/llama-cpp-python/vendor/llama.cpp/.devops/server.Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..be964e0e83648ab5a70246a789aedbc4841b892e --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/server.Dockerfile @@ -0,0 +1,25 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential git libcurl4-openssl-dev + +WORKDIR /app + +COPY . . + +ENV LLAMA_CURL=1 + +RUN make + +FROM ubuntu:$UBUNTU_VERSION as runtime + +RUN apt-get update && \ + apt-get install -y libcurl4-openssl-dev + +COPY --from=build /app/server /server + +ENV LC_ALL=C.utf8 + +ENTRYPOINT [ "/server" ] diff --git a/llama-cpp-python/vendor/llama.cpp/.devops/tools.sh b/llama-cpp-python/vendor/llama.cpp/.devops/tools.sh new file mode 100644 index 0000000000000000000000000000000000000000..3a7d274e46619188d9f886e44605f17d0e4d55c1 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.devops/tools.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e + +# Read the first argument into a variable +arg1="$1" + +# Shift the arguments to remove the first one +shift + +if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then + python3 ./convert.py "$@" +elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then + ./quantize "$@" +elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then + ./main "$@" +elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then + ./finetune "$@" +elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then + echo "Converting PTH to GGML..." + for i in `ls $1/$2/ggml-model-f16.bin*`; do + if [ -f "${i/f16/q4_0}" ]; then + echo "Skip model quantization, it already exists: ${i/f16/q4_0}" + else + echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." + ./quantize "$i" "${i/f16/q4_0}" q4_0 + fi + done +elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then + ./server "$@" +else + echo "Unknown command: $arg1" + echo "Available commands: " + echo " --run (-r): Run a model previously converted into ggml" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" + echo " --convert (-c): Convert a llama model into ggml" + echo " ex: --outtype f16 \"/models/7B/\" " + echo " --quantize (-q): Optimize with quantization process ggml" + echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" + echo " --finetune (-f): Run finetune command to create a lora finetune of the model" + echo " See documentation for finetune for command-line parameters" + echo " --all-in-one (-a): Execute --convert & --quantize" + echo " ex: \"/models/\" 7B" + echo " --server (-s): Run a model on the server" + echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080" +fi diff --git a/llama-cpp-python/vendor/llama.cpp/.dockerignore b/llama-cpp-python/vendor/llama.cpp/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..633bbc3a971c117193b64bddcc38c451ba984faa --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.dockerignore @@ -0,0 +1,20 @@ +*.o +*.a +.cache/ +.git/ +.github/ +.gitignore +.vs/ +.vscode/ +.DS_Store + +build*/ + +models/* + +/main +/quantize + +arm_neon.h +compile_commands.json +Dockerfile diff --git a/llama-cpp-python/vendor/llama.cpp/.ecrc b/llama-cpp-python/vendor/llama.cpp/.ecrc new file mode 100644 index 0000000000000000000000000000000000000000..a3351f4e6442dfdf9f97280722730e26721fedb2 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.ecrc @@ -0,0 +1,6 @@ +{ + "Exclude": ["^\\.gitmodules$"], + "Disable": { + "IndentSize": true + } +} diff --git a/llama-cpp-python/vendor/llama.cpp/.editorconfig b/llama-cpp-python/vendor/llama.cpp/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..16d16b3b55bf5575e956bebed8de6b635bee4d8c --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.editorconfig @@ -0,0 +1,28 @@ +# https://EditorConfig.org + +# Top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file, utf-8 charset +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +charset = utf-8 +indent_style = space +indent_size = 4 + +[Makefile] +indent_style = tab + +[scripts/*.mk] +indent_style = tab + +[prompts/*.txt] +insert_final_newline = unset + +[examples/server/public/*] +indent_size = 2 + +[examples/llama.swiftui/llama.swiftui.xcodeproj/*] +indent_style = tab diff --git a/llama-cpp-python/vendor/llama.cpp/.flake8 b/llama-cpp-python/vendor/llama.cpp/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..18fba2c1574a6952001ea1003bd0bb96ce7c53ea --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 125 +ignore = W503 diff --git a/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md b/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md new file mode 100644 index 0000000000000000000000000000000000000000..49812832ca542bc7ad46519d17681c92202dd079 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/bug.md @@ -0,0 +1,11 @@ +--- +name: Bug template +about: Used to report bugs in llama.cpp +labels: ["bug-unconfirmed"] +assignees: '' + +--- + +Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug. + +If the bug concerns the server, please try to reproduce it first using the [server test scenario framework](https://github.com/ggerganov/llama.cpp/tree/master/examples/server/tests). diff --git a/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md b/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md new file mode 100644 index 0000000000000000000000000000000000000000..dcffda7500f527a7b66f6599cb03582c5e95dd86 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/ISSUE_TEMPLATE/enhancement.md @@ -0,0 +1,28 @@ +--- +name: Enhancement template +about: Used to request enhancements for llama.cpp +labels: ["enhancement"] +assignees: '' + +--- + +# Prerequisites + +Please answer the following questions for yourself before submitting an issue. + +- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now. +- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). +- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed). +- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share. + +# Feature Description + +Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. + +# Motivation + +Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. + +# Possible Implementation + +If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/bench.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/bench.yml new file mode 100644 index 0000000000000000000000000000000000000000..3e968d17909a42e2db61e58bd6913e3b66945305 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/bench.yml @@ -0,0 +1,298 @@ +# Benchmark +name: Benchmark + +on: + workflow_dispatch: + inputs: + gpu-series: + description: 'Azure GPU series to run with' + required: true + type: choice + options: + - Standard_NC4as_T4_v3 + - Standard_NC24ads_A100_v4 + - Standard_NC80adis_H100_v5 + sha: + description: 'Commit SHA1 to build' + required: false + type: string + duration: + description: 'Duration of the bench' + type: string + default: 10m + + push: + branches: + - master + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] + pull_request_target: + types: [opened, synchronize, reopened] + paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] + schedule: + - cron: '04 2 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} + cancel-in-progress: true + +jobs: + bench-server-baseline: + runs-on: Standard_NC4as_T4_v3 + env: + RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it + N_USERS: 8 + DURATION: 10m + + strategy: + matrix: + model: [phi-2] + ftype: [q4_0, q8_0, f16] + include: + - model: phi-2 + ftype: q4_0 + pr_comment_enabled: "true" + + if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }} + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Install python env + id: pipenv + run: | + cd examples/server/bench + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + + - name: Prometheus + id: install_prometheus + run: | + wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz + tar xzf prometheus*.tar.gz --strip-components=1 + ./prometheus --config.file=examples/server/bench/prometheus.yml & + while ! nc -z localhost 9090; do + sleep 0.1 + done + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Install k6 and xk6-sse + id: k6_installation + run: | + cd examples/server/bench + go install go.k6.io/xk6/cmd/xk6@latest + xk6 build master \ + --with github.com/phymbert/xk6-sse + + - name: Build + id: cmake_build + run: | + set -eux + cmake -B build \ + -DLLAMA_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DLLAMA_CUBLAS=ON \ + -DCUDAToolkit_ROOT=/usr/local/cuda \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DCMAKE_CUDA_ARCHITECTURES=75 \ + -DLLAMA_FATAL_WARNINGS=OFF \ + -DLLAMA_ALL_WARNINGS=OFF \ + -DCMAKE_BUILD_TYPE=Release; + cmake --build build --config Release -j $(nproc) --target server + + - name: Download the dataset + id: download_dataset + run: | + cd examples/server/bench + wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + + - name: Server bench + id: server_bench + run: | + set -eux + + cd examples/server/bench + source venv/bin/activate + python bench.py \ + --runner-label ${{ env.RUNNER_LABEL }} \ + --name ${{ github.job }} \ + --branch ${{ github.head_ref || github.ref_name }} \ + --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ + --scenario script.js \ + --duration ${{ github.event.inputs.duration || env.DURATION }} \ + --hf-repo ggml-org/models \ + --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ + --model-path-prefix /models \ + --parallel ${{ env.N_USERS }} \ + -ngl 33 \ + --batch-size 2048 \ + --ubatch-size 256 \ + --ctx-size 16384 \ + --n-prompts 1000 \ + --max-prompt-tokens 1024 \ + --max-tokens 2048 + + cat results.github.env >> $GITHUB_ENV + + # Remove dataset as we do not want it in the artefact + rm ShareGPT_V3_unfiltered_cleaned_split.json + + - uses: actions/upload-artifact@v4 + with: + name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + compression-level: 9 + path: | + examples/server/bench/*.jpg + examples/server/bench/*.json + examples/server/bench/*.log + + - name: Commit status + uses: Sibz/github-status-action@v1 + with: + authToken: ${{secrets.GITHUB_TOKEN}} + sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} + context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + description: | + ${{ env.BENCH_RESULTS }} + state: 'success' + + - name: Upload benchmark images + uses: devicons/public-upload-to-imgur@v2.2.2 + continue-on-error: true # Important as it looks unstable: 503 + id: imgur_step + with: + client_id: ${{secrets.IMGUR_CLIENT_ID}} + path: | + examples/server/bench/prompt_tokens_seconds.jpg + examples/server/bench/predicted_tokens_seconds.jpg + examples/server/bench/kv_cache_usage_ratio.jpg + examples/server/bench/requests_processing.jpg + + - name: Extract mermaid + id: set_mermaid + run: | + set -eux + + cd examples/server/bench + PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) + echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) + echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV + echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) + echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV + echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + REQUESTS_PROCESSING=$(cat requests_processing.mermaid) + echo "REQUESTS_PROCESSING<> $GITHUB_ENV + echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + - name: Extract image url + id: extract_image_url + continue-on-error: true + run: | + set -eux + + echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV + echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV + echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV + echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV + + - name: Comment PR + uses: mshick/add-pr-comment@v2 + id: comment_pr + if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} + with: + message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} + message: | +

+ + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +

+ +
+ + Expand details for performance related PR only + + - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} + - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} + - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s + - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s + - ${{ env.BENCH_GRAPH_XLABEL }} + + +

+ + prompt_tokens_seconds + +

+ + More + + ```mermaid + ${{ env.PROMPT_TOKENS_SECONDS }} + ``` + +
+ + predicted_tokens_seconds + +
+ More + + ```mermaid + ${{ env.PREDICTED_TOKENS_SECONDS }} + ``` + +
+ +

+ +
+ + Details + +

+ + kv_cache_usage_ratio + +

+ More + + ```mermaid + ${{ env.KV_CACHE_USAGE_RATIO }} + ``` + +
+ + requests_processing + +
+ More + + ```mermaid + ${{ env.REQUESTS_PROCESSING }} + ``` + +
+ +

+
+
diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/build.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/build.yml new file mode 100644 index 0000000000000000000000000000000000000000..2d747e688437af34a3bff5a1cc9ca0753e278b36 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/build.yml @@ -0,0 +1,1261 @@ +name: CI + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + +jobs: + macOS-latest-cmake-arm64: + runs-on: macos-14 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + mkdir build + cd build + cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON .. + cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L 'main|curl' --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip + name: llama-bin-macos-arm64.zip + + macOS-latest-cmake-x64: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + mkdir build + cd build + # Metal is disabled due to intermittent failures with Github runners not having a GPU: + # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 + cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON .. + cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip + name: llama-bin-macos-x64.zip + + ubuntu-focal-make: + runs-on: ubuntu-20.04 + env: + LLAMA_NODE_AVAILABLE: true + LLAMA_PYTHON_AVAILABLE: true + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential gcc-8 + + - uses: actions/setup-node@v4 + with: + node-version: "20" + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Build + id: make_build + env: + LLAMA_FATAL_WARNINGS: 1 + run: | + CC=gcc-8 make -j $(nproc) + + - name: Test + id: make_test + run: | + CC=gcc-8 make tests -j $(nproc) + make test -j $(nproc) + + ubuntu-focal-make-curl: + runs-on: ubuntu-20.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential gcc-8 libcurl4-openssl-dev + + - name: Build + id: make_build + env: + LLAMA_FATAL_WARNINGS: 1 + LLAMA_CURL: 1 + run: | + CC=gcc-8 make -j $(nproc) + + ubuntu-latest-cmake: + runs-on: ubuntu-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libcurl4-openssl-dev + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON + cmake --build . --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L 'main|curl' --verbose --timeout 900 + + - name: Test llama2c conversion + id: llama2c_test + run: | + cd build + echo "Fetch tokenizer" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin + echo "Fetch llama2c model" + wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin + ./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf + ./bin/main -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + cp LICENSE ./build/bin/ + zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip + name: llama-bin-ubuntu-x64.zip + +# ubuntu-latest-cmake-sanitizer: +# runs-on: ubuntu-latest +# +# continue-on-error: true +# +# strategy: +# matrix: +# sanitizer: [ADDRESS, THREAD, UNDEFINED] +# build_type: [Debug, Release] +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v4 +# +# - name: Dependencies +# id: depends +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# +# - name: Build +# id: cmake_build +# run: | +# mkdir build +# cd build +# cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} +# cmake --build . --config ${{ matrix.build_type }} -j $(nproc) +# +# - name: Test +# id: cmake_test +# run: | +# cd build +# ctest -L main --verbose --timeout 900 + + ubuntu-latest-cmake-mpi: + runs-on: ubuntu-latest + + continue-on-error: true + + strategy: + matrix: + mpi_library: [mpich, libopenmpi-dev] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential ${{ matrix.mpi_library }} + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake -DLLAMA_MPI=ON .. + cmake --build . --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose + + ubuntu-22-cmake-vulkan: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get install build-essential libvulkan-dev + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake -DLLAMA_VULKAN=ON .. + cmake --build . --config Release -j $(nproc) + + ubuntu-22-cmake-sycl: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v2 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + mkdir build + cd build + cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. + cmake --build . --config Release -j $(nproc) + + ubuntu-22-cmake-sycl-fp16: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v2 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + mkdir build + cd build + cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON .. + cmake --build . --config Release -j $(nproc) + + # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know + # how to debug it. + # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 + macOS-latest-make: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: make_build + env: + LLAMA_FATAL_WARNINGS: 1 + run: | + LLAMA_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: make_test + run: | + LLAMA_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu) + LLAMA_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu) + + # TODO: build with LLAMA_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know + # how to debug it. + # ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584 + # would be great if we fix these + macOS-latest-cmake: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + mkdir build + cd build + cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF .. + cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + + macOS-latest-cmake-ios: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + mkdir build + cd build + cmake -G Xcode .. \ + -DLLAMA_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=iOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 + cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + + macOS-latest-cmake-tvos: + runs-on: macos-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: Build + id: cmake_build + run: | + sysctl -a + mkdir build + cd build + cmake -G Xcode .. \ + -DLLAMA_METAL_EMBED_LIBRARY=ON \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + -DCMAKE_SYSTEM_NAME=tvOS \ + -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 + cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) + + macOS-latest-swift: + runs-on: macos-latest + + strategy: + matrix: + destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + + - name: xcodebuild for swift package + id: xcodebuild + run: | + xcodebuild -scheme llama -destination "${{ matrix.destination }}" + + - name: Build Swift Example + id: make_build_swift_example + run: | + make swift + + windows-msys2: + runs-on: windows-latest + + strategy: + fail-fast: false + matrix: + include: + - { sys: UCRT64, env: ucrt-x86_64, build: Release } + - { sys: CLANG64, env: clang-x86_64, build: Release } + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Setup ${{ matrix.sys }} + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{matrix.sys}} + install: >- + base-devel + mingw-w64-${{matrix.env}}-toolchain + mingw-w64-${{matrix.env}}-cmake + mingw-w64-${{matrix.env}}-openblas + + - name: Build using make + shell: msys2 {0} + run: | + make -j $(nproc) + + - name: Clean after building using make + shell: msys2 {0} + run: | + make clean + + - name: Build using make w/ OpenBLAS + shell: msys2 {0} + run: | + make LLAMA_OPENBLAS=1 -j $(nproc) + + - name: Build using CMake + shell: msys2 {0} + run: | + cmake -B build + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + - name: Clean after building using CMake + shell: msys2 {0} + run: | + rm -rf build + + - name: Build using CMake w/ OpenBLAS + shell: msys2 {0} + run: | + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS + cmake --build build --config ${{ matrix.build }} -j $(nproc) + + windows-latest-cmake: + runs-on: windows-latest + + env: + OPENBLAS_VERSION: 0.3.23 + OPENCL_VERSION: 2023.04.17 + CLBLAST_VERSION: 1.6.0 + SDE_VERSION: 9.33.0-2024-01-07 + VULKAN_VERSION: 1.3.261.1 + + strategy: + matrix: + include: + - build: 'noavx' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON' + - build: 'avx2' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON' + - build: 'avx' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON' + - build: 'avx512' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' + - build: 'clblast' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' + - build: 'openblas' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + - build: 'kompute' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON' + - build: 'vulkan' + defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON' + - build: 'arm64' + defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON' + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Clone Kompute submodule + id: clone_kompute + if: ${{ matrix.build == 'kompute' }} + run: | + git submodule update --init kompute + + - name: Download OpenCL SDK + id: get_opencl + if: ${{ matrix.build == 'clblast' }} + run: | + curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip" + mkdir $env:RUNNER_TEMP/opencl + tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl + + - name: Download CLBlast + id: get_clblast + if: ${{ matrix.build == 'clblast' }} + run: | + curl.exe -o $env:RUNNER_TEMP/clblast.7z -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-windows-x64.7z" + curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE" + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/clblast.7z + rename-item $env:RUNNER_TEMP/CLBlast-${env:CLBLAST_VERSION}-windows-x64 clblast + foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) { + $txt = Get-Content -Path $f -Raw + $txt.Replace('C:/vcpkg/packages/opencl_x64-windows/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8 + } + + - name: Download OpenBLAS + id: get_openblas + if: ${{ matrix.build == 'openblas' }} + run: | + curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" + curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" + mkdir $env:RUNNER_TEMP/openblas + tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') + & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll + + - name: Install Vulkan SDK + id: get_vulkan + if: ${{ matrix.build == 'kompute' || matrix.build == 'vulkan' }} + run: | + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install + Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" + Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. ${{ matrix.defines }} + cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + + - name: Add clblast.dll + id: add_clblast_dll + if: ${{ matrix.build == 'clblast' }} + run: | + cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release + cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt + + - name: Add libopenblas.dll + id: add_libopenblas_dll + if: ${{ matrix.build == 'openblas' }} + run: | + cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll + cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt + + - name: Check AVX512F support + id: check_avx512f + if: ${{ matrix.build == 'avx512' }} + continue-on-error: true + run: | + cd build + $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) + $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) + $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') + echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c + & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main + .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" + + - name: Test + id: cmake_test + # not all machines have native AVX-512 + if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} + run: | + cd build + ctest -L main -C Release --verbose --timeout 900 + + - name: Test (Intel SDE) + id: cmake_test_sde + if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation + run: | + curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" + # for some weird reason windows tar doesn't like sde tar.xz + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz + 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar + $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) + cd build + & $sde -future -- ctest -L main -C Release --verbose --timeout 900 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-x64.zip + name: llama-bin-win-${{ matrix.build }}-x64.zip + + windows-latest-cmake-cuda: + runs-on: windows-latest + + strategy: + matrix: + cuda: ['12.2.0', '11.7.1'] + build: ['cuda'] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: Jimver/cuda-toolkit@v0.2.11 + id: cuda-toolkit + with: + cuda: ${{ matrix.cuda }} + method: 'network' + sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]' + + - name: Build + id: cmake_build + run: | + mkdir build + cd build + cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=ON + cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip + name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip + + - name: Copy and pack Cuda runtime + run: | + echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}" + $dst='.\build\bin\cudart\' + robocopy "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll + 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\* + + - name: Upload Cuda runtime + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip + + windows-latest-cmake-sycl: + runs-on: windows-latest + + defaults: + run: + shell: bash + + env: + WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/62641e01-1e8d-4ace-91d6-ae03f7f8a71f/w_BaseKit_p_2024.0.0.49563_offline.exe + WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install + run: scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL + + - name: Build + id: cmake_build + run: examples/sycl/win-build-sycl.bat + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Pack artifacts + id: pack_artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + run: | + 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/* + + - name: Upload artifacts + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip + name: llama-bin-win-sycl-x64.zip + + ios-xcode-build: + runs-on: macos-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Build Xcode project + run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build + + android-build: + runs-on: ubuntu-latest + + steps: + - name: Clone + uses: actions/checkout@v4 + + - name: Set up JDK + uses: actions/setup-java@v3 + with: + java-version: 17 + distribution: zulu + + - name: Setup Android SDK + uses: android-actions/setup-android@v3 + with: + log-accepted-android-sdk-licenses: false + + - name: Build + run: | + cd examples/llama.android + + ./gradlew build --no-daemon + +# freeBSD-latest: +# runs-on: macos-12 +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Build +# uses: cross-platform-actions/action@v0.19.0 +# with: +# operating_system: freebsd +# version: '13.2' +# hypervisor: 'qemu' +# run: | +# sudo pkg update +# sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas +# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu` + + release: + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + + runs-on: ubuntu-latest + + needs: + - ubuntu-focal-make + - ubuntu-latest-cmake + - macOS-latest-make + - macOS-latest-cmake + - windows-latest-cmake + - windows-latest-cmake-cuda + - macOS-latest-cmake-arm64 + - macOS-latest-cmake-x64 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Download artifacts + id: download-artifact + uses: actions/download-artifact@v4 + with: + path: ./artifact + + - name: Move artifacts + id: move_artifacts + run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release + + - name: Create release + id: create_release + uses: anzz1/action-create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.tag.outputs.name }} + + - name: Upload release + id: upload_release + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const path = require('path'); + const fs = require('fs'); + const release_id = '${{ steps.create_release.outputs.id }}'; + for (let file of await fs.readdirSync('./artifact/release')) { + if (path.extname(file) === '.zip') { + console.log('uploadReleaseAsset', file); + await github.repos.uploadReleaseAsset({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: release_id, + name: file, + data: await fs.readFileSync(`./artifact/release/${file}`) + }); + } + } + +# ubuntu-latest-gcc: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-clang: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Debug, Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang +# +# - name: Build +# run: | +# make +# +# ubuntu-latest-gcc-sanitized: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# sanitizer: [ADDRESS, THREAD, UNDEFINED] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# sudo apt-get install cmake +# +# - name: Configure +# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON +# +# - name: Build +# run: | +# make +# +# windows: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# include: +# - arch: Win32 +# s2arc: x86 +# - arch: x64 +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Upload binaries +# uses: actions/upload-artifact@v4 +# with: +# name: llama-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# windows-blas: +# runs-on: windows-latest +# +# strategy: +# matrix: +# build: [Release] +# arch: [Win32, x64] +# blas: [ON] +# include: +# - arch: Win32 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip +# s2arc: x86 +# - arch: x64 +# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip +# s2arc: x64 +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Add msbuild to PATH +# uses: microsoft/setup-msbuild@v1 +# +# - name: Fetch OpenBLAS +# if: matrix.blas == 'ON' +# run: | +# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }} +# 7z x blas.zip -oblas -y +# copy blas/include/cblas.h . +# copy blas/include/openblas_config.h . +# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV +# +# - name: Configure +# run: > +# cmake -S . -B ./build -A ${{ matrix.arch }} +# -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }} +# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib" +# +# - name: Build +# run: | +# cd ./build +# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} +# +# - name: Copy libopenblas.dll +# if: matrix.blas == 'ON' +# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }} +# +# - name: Upload binaries +# if: matrix.blas == 'ON' +# uses: actions/upload-artifact@v4 +# with: +# name: llama-blas-bin-${{ matrix.arch }} +# path: build/bin/${{ matrix.build }} +# +# emscripten: +# runs-on: ubuntu-latest +# +# strategy: +# matrix: +# build: [Release] +# +# steps: +# - name: Clone +# uses: actions/checkout@v4 +# +# - name: Dependencies +# run: | +# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz +# tar -xvf master.tar.gz +# emsdk-master/emsdk update +# emsdk-master/emsdk install latest +# emsdk-master/emsdk activate latest +# +# - name: Configure +# run: echo "tmp" +# +# - name: Build +# run: | +# pushd emsdk-master +# source ./emsdk_env.sh +# popd +# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} +# make diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/close-issue.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/close-issue.yml new file mode 100644 index 0000000000000000000000000000000000000000..69c9f4f69e53b7a601b92ac827f794a9aa55b9bf --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/close-issue.yml @@ -0,0 +1,23 @@ +name: Close inactive issues +on: + schedule: + - cron: "42 0 * * *" + +jobs: + close-issues: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v5 + with: + exempt-issue-labels: "refactor,help wanted,good first issue,research,bug" + days-before-issue-stale: 30 + days-before-issue-close: 14 + stale-issue-label: "stale" + close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." + days-before-pr-stale: -1 + days-before-pr-close: -1 + operations-per-run: 10000 + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/code-coverage.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/code-coverage.yml new file mode 100644 index 0000000000000000000000000000000000000000..f12c558f81baed63a3e841371ba9ed7fc00aff09 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/code-coverage.yml @@ -0,0 +1,40 @@ +name: Code Coverage +on: [push, pull_request] + +env: + GGML_NLOOP: 3 + GGML_N_THREADS: 1 + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + run: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Dependencies + run: | + sudo apt-get update + sudo apt-get install build-essential gcc-8 lcov + + - name: Build + run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests + + - name: Run tests + run: CC=gcc-8 make test + + - name: Generate coverage report + run: | + make coverage + make lcov-report + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + with: + files: lcov-report/coverage.info diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/docker.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/docker.yml new file mode 100644 index 0000000000000000000000000000000000000000..9b03d19bc77c616cf11946baf917c594851086f3 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/docker.yml @@ -0,0 +1,117 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# GitHub recommends pinning actions to a commit SHA. +# To get a newer version, you will need to update the SHA. +# You can also reference a tag or branch, but the action may change without warning. + +name: Publish Docker image + +on: + pull_request: + push: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + if: github.event.pull_request.draft == false + + runs-on: ubuntu-latest + env: + COMMIT_SHA: ${{ github.sha }} + strategy: + matrix: + config: + - { tag: "light", dockerfile: ".devops/main.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "server", dockerfile: ".devops/server.Dockerfile", platforms: "linux/amd64,linux/arm64" } + # NOTE(canardletter): The CUDA builds on arm64 are very slow, so I + # have disabled them for now until the reason why + # is understood. + - { tag: "light-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platforms: "linux/amd64" } + - { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } + - { tag: "server-cuda", dockerfile: ".devops/server-cuda.Dockerfile", platforms: "linux/amd64" } + - { tag: "light-rocm", dockerfile: ".devops/main-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "server-rocm", dockerfile: ".devops/server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } + - { tag: "light-intel", dockerfile: ".devops/main-intel.Dockerfile", platforms: "linux/amd64" } + - { tag: "server-intel", dockerfile: ".devops/server-intel.Dockerfile", platforms: "linux/amd64" } + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + # https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Determine tag name + id: tag + shell: bash + run: | + BUILD_NUMBER="$(git rev-list --count HEAD)" + SHORT_HASH="$(git rev-parse --short=7 HEAD)" + if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then + echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT + else + SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') + echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT + fi + + - name: Downcase github.repository_owner + run: | + echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV + env: + GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' + + - name: Build and push Docker image (versioned) + if: github.event_name == 'push' + uses: docker/build-push-action@v4 + with: + context: . + push: true + platforms: ${{ matrix.config.platforms }} + tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" + file: ${{ matrix.config.dockerfile }} + + - name: Build and push Docker image (tagged) + uses: docker/build-push-action@v4 + with: + context: . + push: ${{ github.event_name == 'push' }} + platforms: ${{ matrix.config.platforms }} + tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}" + file: ${{ matrix.config.dockerfile }} diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/editorconfig.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/editorconfig.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae86e9927526521eadcef6b2ed6ecd26ef8e815b --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/editorconfig.yml @@ -0,0 +1,27 @@ +name: EditorConfig Checker + +on: + workflow_dispatch: # allows manual triggering + inputs: + create_release: + description: 'Create new release' + required: true + type: boolean + push: + branches: + - master + pull_request: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + editorconfig: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: editorconfig-checker/action-editorconfig-checker@main + - run: editorconfig-checker diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/gguf-publish.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/gguf-publish.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ca4d30581074cc1b69d7b7306eef170f0baa0d6 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/gguf-publish.yml @@ -0,0 +1,44 @@ +# This workflow will upload a Python Package using Twine when a GGUF release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +# See `gguf-py/README.md` for how to make a release. + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + workflow_dispatch: + push: + # Pattern matched against refs/tags + tags: + - 'gguf-v*' # Push events to every version tag + + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9.x' + - name: Install dependencies + run: | + cd gguf-py + python -m pip install poetry + poetry install + + - name: Build package + run: cd gguf-py && poetry build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + packages-dir: gguf-py/dist diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml new file mode 100644 index 0000000000000000000000000000000000000000..4aa4b2379dccf20a00d965fe2d7f076f72c2ddd3 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci-aarch64.yml @@ -0,0 +1,65 @@ +name: Nix aarch64 builds + +on: + workflow_dispatch: # allows manual triggering + schedule: + # Rebuild daily rather than on every push because QEMU is expensive (e.g. + # 1.5h instead of minutes with the cold cache). + # + # randint(0, 59), randint(0, 23) + - cron: '26 12 * * *' + # But also rebuild if we touched any of the Nix expressions: + push: + branches: + - master + paths: ['**/*.nix', 'flake.lock'] + pull_request: + types: [opened, synchronize, reopened] + paths: ['**/*.nix', 'flake.lock'] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + nix-build-aarch64: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install QEMU + # Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654 + run: | + sudo apt-get update + sudo apt-get install -y qemu-user-static qemu-system-aarch64 + sudo usermod -a -G kvm $USER + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@v9 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + extra-conf: | + extra-platforms = aarch64-linux + extra-system-features = nixos-test kvm + extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org + extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= + - uses: DeterminateSystems/magic-nix-cache-action@v2 + with: + upstream-cache: https://${{ matrix.cachixName }}.cachix.org + - name: Set-up cachix to push the results to + uses: cachix/cachix-action@v13 + with: + authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' + name: llama-cpp + - name: Show all output paths + run: > + nix run github:nix-community/nix-eval-jobs + -- --gc-roots-dir gcroot + --flake + ".#packages.aarch64-linux" + - name: Build + run: > + nix run github:Mic92/nix-fast-build + -- --skip-cached --no-nom + --systems aarch64-linux + --flake + ".#checks.aarch64-linux" diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..8955f38d020a60309826439a6ac7daefac9da38a --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-ci.yml @@ -0,0 +1,72 @@ +name: Nix CI + +on: + workflow_dispatch: # allows manual triggering + push: + branches: + - master + pull_request: + types: [opened, synchronize, reopened] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + nix-eval: + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, macos-latest ] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@v9 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + extra-conf: | + extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org + extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= + - uses: DeterminateSystems/magic-nix-cache-action@v2 + with: + upstream-cache: https://${{ matrix.cachixName }}.cachix.org + - name: List all flake outputs + run: nix flake show --all-systems + - name: Show all output paths + run: > + nix run github:nix-community/nix-eval-jobs + -- --gc-roots-dir gcroot + --flake + ".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)" + nix-build: + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest, macos-latest ] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@v9 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + extra-conf: | + extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org + extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E= + - uses: DeterminateSystems/magic-nix-cache-action@v2 + with: + upstream-cache: https://${{ matrix.cachixName }}.cachix.org + - name: Set-up cachix to push the results to + uses: cachix/cachix-action@v13 + with: + authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' + name: llama-cpp + - name: Build + run: > + nix run github:Mic92/nix-fast-build + -- --skip-cached --no-nom + --flake + ".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)" diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-flake-update.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-flake-update.yml new file mode 100644 index 0000000000000000000000000000000000000000..3a6a96e263e59f6468b04385163f2c04c11f10fb --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-flake-update.yml @@ -0,0 +1,22 @@ +name: update-flake-lock +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * 0' # runs weekly on Sunday at 00:00 + +jobs: + lockfile: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@main + - name: Update flake.lock + uses: DeterminateSystems/update-flake-lock@main + with: + pr-title: "nix: update flake.lock" + pr-labels: | + nix + pr-reviewers: philiptaron,SomeoneSerge + token: ${{ secrets.FLAKE_TOKEN }} diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml new file mode 100644 index 0000000000000000000000000000000000000000..2c3c1ebdaeff1461bb610e9d03932f50cdd1faeb --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/nix-publish-flake.yml @@ -0,0 +1,36 @@ +# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes +name: "Publish a flake to flakestry & flakehub" +on: + push: + tags: + - "*" + workflow_dispatch: + inputs: + tag: + description: "The existing tag to publish" + type: "string" + required: true +jobs: + flakestry-publish: + runs-on: ubuntu-latest + permissions: + id-token: "write" + contents: "read" + steps: + - uses: flakestry/flakestry-publish@main + with: + version: "${{ inputs.tag || github.ref_name }}" + flakehub-publish: + runs-on: "ubuntu-latest" + permissions: + id-token: "write" + contents: "read" + steps: + - uses: "actions/checkout@v4" + with: + ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}" + - uses: "DeterminateSystems/nix-installer-action@main" + - uses: "DeterminateSystems/flakehub-push@main" + with: + visibility: "public" + tag: "${{ inputs.tag }}" diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-check-requirements.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-check-requirements.yml new file mode 100644 index 0000000000000000000000000000000000000000..4e0374fc63d95e2443678bcf6818535d4cf24ceb --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-check-requirements.yml @@ -0,0 +1,35 @@ +name: Python check requirements.txt + +on: + push: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - 'requirements.txt' + - 'requirements/*.txt' + pull_request: + paths: + - '.github/workflows/python-check-requirements.yml' + - 'scripts/check-requirements.sh' + - 'convert*.py' + - 'requirements.txt' + - 'requirements/*.txt' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + python-check-requirements: + runs-on: ubuntu-latest + name: check-requirements + steps: + - name: Check out source repository + uses: actions/checkout@v4 + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Run check-requirements.sh script + run: bash scripts/check-requirements.sh diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-lint.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-lint.yml new file mode 100644 index 0000000000000000000000000000000000000000..5be17f1576ebbc0c1f85450b27a641ad385895e4 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/python-lint.yml @@ -0,0 +1,24 @@ +name: flake8 Lint + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + flake8-lint: + runs-on: ubuntu-latest + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v4 + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503" + exclude: "examples/*,examples/*/**,*/**/__init__.py,convert-hf-to-gguf-update.py" diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/server.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/server.yml new file mode 100644 index 0000000000000000000000000000000000000000..afac89c5b80b19e226ed651f16e491db4f6e5b3e --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/server.yml @@ -0,0 +1,175 @@ +# Server build and tests +name: Server + +on: + workflow_dispatch: # allows manual triggering + inputs: + sha: + description: 'Commit SHA1 to build' + required: false + type: string + slow_tests: + description: 'Run slow tests' + required: true + type: boolean + push: + branches: + - master + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] + pull_request_target: + types: [opened, synchronize, reopened] + paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] + schedule: + - cron: '2 4 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + server: + runs-on: ubuntu-latest + + strategy: + matrix: + # TODO: temporary disabled due to linux kernel issues + #sanitizer: [ADDRESS, THREAD, UNDEFINED] + sanitizer: [UNDEFINED] + build_type: [Debug] + include: + - build_type: Release + sanitizer: "" + fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken + + steps: + - name: Dependencies + id: depends + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + xxd \ + git \ + cmake \ + curl \ + wget \ + language-pack-en \ + libcurl4-openssl-dev + + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Verify server deps + id: verify_server_deps + run: | + git config --global --add safe.directory $(realpath .) + cd examples/server + git ls-files --others --modified + git status + ./deps.sh + git status + not_ignored_files="$(git ls-files --others --modified)" + echo "Modified files: ${not_ignored_files}" + if [ -n "${not_ignored_files}" ]; then + echo "Repository is dirty or server deps are not built as expected" + echo "${not_ignored_files}" + exit 1 + fi + + - name: Build + id: cmake_build + run: | + cmake -B build \ + -DLLAMA_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server + + + - name: Tests + id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} + run: | + cd examples/server/tests + PORT=8888 ./tests.sh + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd examples/server/tests + PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow + + + server-windows: + runs-on: windows-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: libCURL + id: get_libcurl + env: + CURL_VERSION: 8.6.0_6 + run: | + curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" + mkdir $env:RUNNER_TEMP/libcurl + tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl + + - name: Build + id: cmake_build + run: | + cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" + cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server + + - name: Python setup + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Tests dependencies + id: test_dependencies + run: | + pip install -r examples/server/tests/requirements.txt + + - name: Copy Libcurl + id: prepare_libcurl + run: | + cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll + + - name: Tests + id: server_integration_tests + if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} + run: | + cd examples/server/tests + behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp + + - name: Slow tests + id: server_integration_tests_slow + if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} + run: | + cd examples/server/tests + behave.exe --stop --no-skipped --no-capture --tags slow diff --git a/llama-cpp-python/vendor/llama.cpp/.github/workflows/zig-build.yml b/llama-cpp-python/vendor/llama.cpp/.github/workflows/zig-build.yml new file mode 100644 index 0000000000000000000000000000000000000000..747c35cc07a96f53233fd8da609f819758f8dcc8 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.github/workflows/zig-build.yml @@ -0,0 +1,29 @@ +name: Zig CI + +on: + pull_request: + push: + branches: + - master + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + runs-on: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.runs-on }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + - uses: goto-bus-stop/setup-zig@v2 + with: + version: 0.11.0 + - name: Build Summary + run: zig build --summary all -freference-trace diff --git a/llama-cpp-python/vendor/llama.cpp/.gitignore b/llama-cpp-python/vendor/llama.cpp/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..50ae0973ae3b306be0f817a30bdc63c8254186e6 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.gitignore @@ -0,0 +1,126 @@ +*.o +*.a +*.so +*.gguf +*.gguf.json +*.bin +*.exe +*.dll +*.log +*.gcov +*.gcno +*.gcda +*.dot +*.bat +*.tmp +*.metallib +*.etag +*.lastModified +.DS_Store +.build/ +.cache/ +.ccls-cache/ +.direnv/ +.envrc +.swiftpm +.venv +.clang-tidy +.vs/ +.vscode/ +.idea/ + +ggml-metal-embed.metal + +lcov-report/ +gcovr-report/ + +build* +!build.zig +cmake-build-* +out/ +tmp/ + +models/* +models-mnt + +/Pipfile +/baby-llama +/beam-search +/benchmark-matmult +/convert-llama2c-to-ggml +/embd-input-test +/embedding +/eval-callback +/gguf +/gguf-llama-simple +/gguf-split +/gritlm +/imatrix +/infill +/libllama.so +/llama-bench +/llava-cli +/lookahead +/lookup +/lookup-create +/lookup-merge +/lookup-stats +/main +/metal +/passkey +/perplexity +/q8dot +/quantize +/quantize-stats +/result +/save-load-state +/server +/simple +/batched +/batched-bench +/export-lora +/finetune +/retrieval +/speculative +/parallel +/train-text-from-scratch +/tokenize +/vdot +/common/build-info.cpp +arm_neon.h +compile_commands.json +CMakeSettings.json + +__pycache__ +dist + +zig-out/ +zig-cache/ + +ppl-*.txt +qnt-*.txt +perf-*.txt + +examples/jeopardy/results.txt +examples/server/*.html.hpp +examples/server/*.js.hpp +examples/server/*.mjs.hpp + +poetry.lock +poetry.toml +nppBackup + +# Test binaries +/tests/test-grammar-parser +/tests/test-llama-grammar +/tests/test-double-float +/tests/test-grad0 +/tests/test-opt +/tests/test-quantize-fns +/tests/test-quantize-perf +/tests/test-sampling +/tests/test-tokenizer-0 +/tests/test-tokenizer-1-spm +/tests/test-tokenizer-1-bpe +/tests/test-rope +/tests/test-backend-ops diff --git a/llama-cpp-python/vendor/llama.cpp/.gitmodules b/llama-cpp-python/vendor/llama.cpp/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..b7e8b8ff2f64efced25885bbbd3073f8d3f12ac0 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kompute"] + path = kompute + url = https://github.com/nomic-ai/kompute.git diff --git a/llama-cpp-python/vendor/llama.cpp/.pre-commit-config.yaml b/llama-cpp-python/vendor/llama.cpp/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..65796fe2e423b3fd2d89f6f1938da5ac5da6bf0d --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +exclude: prompts/.*.txt +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 diff --git a/llama-cpp-python/vendor/llama.cpp/AUTHORS b/llama-cpp-python/vendor/llama.cpp/AUTHORS new file mode 100644 index 0000000000000000000000000000000000000000..b029f13da3b56ba2d500fd5141aa53e6f90673f4 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/AUTHORS @@ -0,0 +1,655 @@ +# date: Tue Apr 9 09:17:14 EEST 2024 +# this file is auto-generated by scripts/gen-authors.sh + +0cc4m +0xspringtime <110655352+0xspringtime@users.noreply.github.com> +2f38b454 +3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> +44670 <44670@users.noreply.github.com> +AN Long +AT +Aarni Koskela +Aaron Miller +Aaryaman Vasishta +Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> +Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com> +Adithya Balaji +AdithyanI +Adrian +Adrian Hesketh +AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> +Aisuko +Alberto <57916483+albbus-stack@users.noreply.github.com> +Alex +Alex Azarov +Alex Azarov +Alex Klinkhamer +Alex Klinkhamer +Alex Nguyen +Alex Petenchea +Alex Renda +Alex von Gluck IV +Alexey Parfenov +Ali Chraghi <63465728+alichraghi@users.noreply.github.com> +Ali Nehzat +Ali Tariq +Alon +AlpinDale <52078762+AlpinDale@users.noreply.github.com> +AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> +Ananta Bastola +Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> +András Salamon +Andrei +Andrew Canis +Andrew Duffy +Andrew Godfrey +Arik Poznanski +Artem +Artyom Lebedev +Asbjørn Olling +Ásgeir Bjarni Ingvarsson +Ashok Gelal <401055+ashokgelal@users.noreply.github.com> +Ashraful Islam +Atsushi Tatsuma +Austin <77757836+teleprint-me@users.noreply.github.com> +AustinMroz +BADR +Bach Le +Bailey Chittle <39804642+bachittle@users.noreply.github.com> +BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> +Behnam M <58621210+ibehnam@users.noreply.github.com> +Ben Garney +Ben Siraphob +Ben Williams +Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> +Bernat Vadell +Bodo Graumann +Bono Lv +Borislav Stanimirov +Branden Butler +Brian +Bruce MacDonald +CJ Pais +CRD716 +Cameron +Cameron Kaiser +Casey Primozic +Casey Primozic +CausalLM <148736309+CausalLM@users.noreply.github.com> +Cebtenzzre +Chad Brewbaker +Cheng Shao +Chris Kuehl +Christian Demsar +Christian Demsar +Christian Falch <875252+chrfalch@users.noreply.github.com> +Christian Kögler +Clark Saben <76020733+csaben@users.noreply.github.com> +Clint Herron +Cuong Trinh Manh +DAN™ +Damian Stewart +Dane Madsen +DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com> +Daniel Bevenius +Daniel Drake +Daniel Hiltgen +Daniel Illescas Romero +DannyDaemonic +Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> +Dave Della Costa +David Friehs +David Kennedy +David Pflug +David Renshaw +David Sommers <12738+databyte@users.noreply.github.com> +David Yang +Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com> +Dean +Deins +Didzis Gosko +Don Mahurin +DooWoong Lee (David) +Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com> +Douglas Hanley +Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> +Ebey Abraham +Ed Lee +Ed Lepedus +Edward Taylor +Elbios <141279586+Elbios@users.noreply.github.com> +Engininja2 <139037756+Engininja2@users.noreply.github.com> +Equim +Eric Sommerlade +Eric Zhang <34133756+EZForever@users.noreply.github.com> +Erik Garrison +Erik Scholz +Ettore Di Giacinto +Evan Jones +Evan Miller +Eve <139727413+netrunnereve@users.noreply.github.com> +Evgeny Kurnevsky +Ewout ter Hoeven +ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> +FK +Fabian +Fabio R. Sluzala +Faez Shakil +FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> +Fattire <528174+fat-tire@users.noreply.github.com> +Felix +Finn Voorhees +Firat +Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> +Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> +Francisco Melo <43780565+francis2tm@users.noreply.github.com> +FrankHB +Frederik Vogel +Gabe Goodhart +GainLee +Galunid +Gary Linscott +Gary Mulder +Genkagaku.GPT +Georgi Gerganov +Gilad S +GiviMAD +Govlzkoy +Guillaume "Vermeille" Sanchez +Guillaume Wenzek +Guoteng <32697156+SolenoidWGT@users.noreply.github.com> +Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> +Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> +Haohui Mai +Haoxiang Fei +Harald Fernengel +Hatsune Miku <129688334+at8u@users.noreply.github.com> +Henk Poley +Henri Vasserman +Henrik Forstén +Herman Semenov +Hesen Peng +Hoang Nguyen +Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> +Howard Su +Hua Jiang +Huawei Lin +Ian Bull +Ian Bull +Ian Scrivener +Ido S +IgnacioFDM +Igor Okulist +Ikko Eltociear Ashimine +Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> +Ionoclast Laboratories +Isaac McFadyen +IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com> +Ivan Komarov +Ivan Stepanov +JH23X <165871467+JH23X@users.noreply.github.com> +Jack Mousseau +JackJollimore <130917767+JackJollimore@users.noreply.github.com> +Jag Chadha +Jakub N +James Reynolds +Jan Boon +Jan Boon +Jan Ploski +Jannis Schönleber +Jared Van Bortel +Jared Van Bortel +Jason McCartney +Jean-Christophe Hoelt +Jean-Michaël Celerier +Jed Fox +Jeffrey Quesnelle +Jesse Jojo Johnson +Jhen-Jie Hong +Jiahao Li +Jian Liao +JidongZhang-THU <1119708529@qq.com> +Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com> +Jiří Podivín <66251151+jpodivin@users.noreply.github.com> +Johannes Gäßler +Johannes Rudolph +John <78893154+cmp-nct@users.noreply.github.com> +John Balis +John Smith <67539080+kingsidelee@users.noreply.github.com> +JohnnyB +Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com> +Jorge A <161275481+jorgealias@users.noreply.github.com> +Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com> +Joseph Stahl <1269177+josephst@users.noreply.github.com> +Joyce +Juan Calderon-Perez <835733+gaby@users.noreply.github.com> +Judd +Julius Arkenberg +Jun Jie <71215065+junnjiee16@users.noreply.github.com> +Juraj Bednar +Justin Parker +Justin Suess +Justine Tunney +Juuso Alasuutari +KASR +Kamil Tomšík +Karsten Weiss +Karthick +Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> +Karthik Sethuraman +Kasumi <90275229+kasumi-1@users.noreply.github.com> +Kawrakow <48489457+ikawrakow@users.noreply.github.com> +Keiichi Tabata +Kenvix ⭐ +Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> +Kevin Ji <1146876+kevinji@users.noreply.github.com> +Kevin Kwok +Kevin Lo +Kolen Cheung +Konstantin Herud +Konstantin Zhuravlyov +Kunshang Ji +Kyle Liang +Kyle Mistele +Kylin <56434533+KyL0N@users.noreply.github.com> +Lars Grammel +Laura +Lee <44310445+lx200916@users.noreply.github.com> +Lee Drake +Leng Yue +LeonEricsson <70749762+LeonEricsson@users.noreply.github.com> +Leonardo Neumann +Li Tan +Linwei Wang +LoganDark +LostRuins <39025047+LostRuins@users.noreply.github.com> +Luciano +Luo Tian +M. Yusuf Sarıgöz +Maarten ter Huurne +Mack Straight +Maël Kerbiriou +MaggotHATE +Marc Köhlbrugge +Marco Matthies <71844+marcom@users.noreply.github.com> +Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> +Marian Cepok +Mark Fairbairn +Marko Tasic +Martin Krasser +Martin Schwaighofer +Marvin Gießing +Mateusz Charytoniuk +Matheus C. França +Matheus Gabriel Alves Silva +Mathieu Nayrolles +Mathijs de Bruin +Matt Clayton <156335168+mattjcly@users.noreply.github.com> +Matt Pulver +Matteo Boschini <12133566+mbosc@users.noreply.github.com> +Matthew Tejo +Matvey Soloviev +Maxime <672982+maximegmd@users.noreply.github.com> +Maximilian Winter +Meng Zhang +Meng, Hengyu +Merrick Christensen +Michael Coppola +Michael Hueschen +Michael Kesper +Michael Klimenko +Michael Podvitskiy +Michael Potter +Michaël de Vries +Mihai +Mike +Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> +Mirko185 +Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> +Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> +Mohammadreza Hendiani +Murilo Santana +Musab Gultekin +Nam D. Tran <42194884+namtranase@users.noreply.github.com> +NawafAlansari <72708095+NawafAlansari@users.noreply.github.com> +Nebula +Neo Zhang Jianyu +Neuman Vong +Nexesenex <124105151+Nexesenex@users.noreply.github.com> +Niall Coates <1349685+Niall-@users.noreply.github.com> +Nicolai Weitkemper +Nigel Bosch +Niklas Korz +Nindaleth +Oleksandr Nikitin +Oleksii Maryshchenko +Olivier Chafik +Ondřej Čertík +Ouadie EL FAROUKI +Paul Tsochantaris +Pavol Rusnak +Pedro Cuenca +Peter Sugihara +Phil H <5756783+phiharri@users.noreply.github.com> +Philip Taron +Phillip Kravtsov +Pierre Alexandre SCHEMBRI +Pierrick Hymbert +Przemysław Pawełczyk +Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> +Qingyou Meng +Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> +RJ Adriaansen +Radoslav Gerganov +Radosław Gryta +Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com> +Rand Xie +Randall Fitzgerald +Reinforce-II +Riceball LEE +Richard Kiss +Richard Roberson +Rick G <26732651+TheFlipbook@users.noreply.github.com> +Rickard Edén +Rickard Hallerbäck +Rickey Bowers Jr +Riley Stewart +Rinne +Rinne +Robert Brisita <986796+rbrisita@users.noreply.github.com> +Robert Sung-wook Shin +Robey Holderith +Robyn +Roger Meier +Roland <14355895+rbur0425@users.noreply.github.com> +Romain D <90720+Artefact2@users.noreply.github.com> +Romain Neutron +Roman Parykin +Ron Evans +Ron Jailall +Ronny Brendel +Ronsor +Rowan Hart +Rune <43761327+Rune-AI@users.noreply.github.com> +Ryan Landay +Ryder Wishart +Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> +SakuraUmi +Salvador E. Tropea +Sam Spilsbury +Sami Farin <3876865+Safari77@users.noreply.github.com> +Samuel Maynard +Sang-Kil Park +Seb C <47074056+Sebby37@users.noreply.github.com> +Sebastián A +SebastianApel <13675545+SebastianApel@users.noreply.github.com> +Senemu <10880819+Senemu@users.noreply.github.com> +Sergey Alirzaev +Sergio López +SeungWon Jeong <65549245+redlion0929@users.noreply.github.com> +ShadovvBeast +Shakhar Dasgupta +Shangning Xu <32517059+xushangning@users.noreply.github.com> +Shijie <821898965@qq.com> +Shintarou Okada +Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> +Shouzheng Liu +Sigbjørn Skjæret +Simon Willison +Siwen Yu +Sky Yan +Slaren <2141330+slaren@users.noreply.github.com> +Slava Primenko +SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com> +Someone +Someone Serge +Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> +Spencer Sutton +Srinivas Billa +Stefan Sydow +Stephan Walter +Stephen Nichols +Steve Grubb +Steven Roussey +Steward Garcia <57494570+FSSRepo@users.noreply.github.com> +Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> +SuperUserNameMan +Tai Duc Nguyen +Taikono-Himazin +Tameem <113388789+AhmadTameem@users.noreply.github.com> +Tamotsu Takahashi +Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> +Thatcher Chamberlin +Theia Vogel +Thérence <13496987+Royalphax@users.noreply.github.com> +Thibault Terrasson +Thomas Klausner +Tim Miller +Timmy Knight +Timothy Cronin <40186632+4imothy@users.noreply.github.com> +Ting Lou +Ting Sun +Tobias Lütke +Tom C +Tom Jobbins <784313+TheBloke@users.noreply.github.com> +Tomas +Tomáš Pazdiora +Tristan Ross +Tungsten842 <886724vf@anonaddy.me> +Tungsten842 +Tushar +UEXTM.com <84163508+uextm@users.noreply.github.com> +Uzo Nweke +Vaibhav Srivastav +Val Kharitonov +Valentin Konovalov +Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> +Victor Z. Peng +Vlad +Vladimir +Vladimir Malyutin +Vladimir Zorin +Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> +WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> +Weird Constructor +Welby Seely +Wentai Zhang +WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> +Willy Tarreau +Wu Jian Ping +Wu Jian Ping +Xiake Sun +Xiang (Kevin) Li +Xiao-Yong Jin +XiaotaoChen +Xiaoyi Chen +Xingchen Song(宋星辰) +Xuan Son Nguyen +Yann Follet <131855179+YannFollet@users.noreply.github.com> +Yiming Cui +Yishuo Wang +Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> +Yui +Yusuf Kağan Hanoğlu +Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> +ZHAOKAI WANG +Zane Shannon +Zay <95888118+isaiahbjork@users.noreply.github.com> +Zenix +Zhang Peiyuan +ZhouYuChen +Ziad Ben Hadj-Alouane +Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> +Zsapi +a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> +adel boussaken +afrideva <95653597+afrideva@users.noreply.github.com> +akawrykow <142945436+akawrykow@users.noreply.github.com> +alexpinel <93524949+alexpinel@users.noreply.github.com> +alonfaraj +andrijdavid +anon998 <131767832+anon998@users.noreply.github.com> +anzz1 +apaz +apcameron <37645737+apcameron@users.noreply.github.com> +arcrank +arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> +at8u <129688334+at8u@users.noreply.github.com> +automaticcat +bandoti <141645996+bandoti@users.noreply.github.com> +beiller +bhubbb <79117352+bhubbb@users.noreply.github.com> +bmwl +bobqianic <129547291+bobqianic@users.noreply.github.com> +bryanSwk <93190252+bryanSwk@users.noreply.github.com> +bsilvereagle +bssrdf +byte-6174 <88070277+byte-6174@users.noreply.github.com> +cebtenzzre +chaihahaha +chiranko <96988916+chiranko@users.noreply.github.com> +clibdev <52199778+clibdev@users.noreply.github.com> +clyang +cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> +coezbek +comex +compilade <113953597+compilade@users.noreply.github.com> +crasm +crasm +daboe01 +david raistrick +ddpasa <112642920+ddpasa@users.noreply.github.com> +deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> +divinity76 +dotpy314 <33351922+dotpy314@users.noreply.github.com> +drbh +ds5t5 <145942675+ds5t5@users.noreply.github.com> +dylan +eastriver +ebraminio +eiery <19350831+eiery@users.noreply.github.com> +eric8607242 +fraxy-v <65565042+fraxy-v@users.noreply.github.com> +github-actions[bot] +gliptic +goerch +grahameth <96447521+grahameth@users.noreply.github.com> +gwjr <502526+gwjr@users.noreply.github.com> +h-h-h-h <13482553+h-h-h-h@users.noreply.github.com> +hankcs +hoangmit +hongbo.mo <352280764@qq.com> +howlger +howlger +hutli <6594598+hutli@users.noreply.github.com> +hutli +hutli +hxer7963 +hydai +iSma +iacore <74560659+iacore@users.noreply.github.com> +igarnier +iohub +jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> +jameswu2014 <545426914@qq.com> +jneem +johnson442 <56517414+johnson442@users.noreply.github.com> +jon-chuang <9093549+jon-chuang@users.noreply.github.com> +jp-x-g +jwj7140 <32943891+jwj7140@users.noreply.github.com> +kaizau +kalomaze <66376113+kalomaze@users.noreply.github.com> +kang +katsu560 <118887472+katsu560@users.noreply.github.com> +kchro3 <62481661+kchro3@users.noreply.github.com> +khimaros +kiltyj +klosax <131523366+klosax@users.noreply.github.com> +kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> +kunnis +kuronekosaiko +kuvaus <22169537+kuvaus@users.noreply.github.com> +kwin1412 <42286931+kwin1412@users.noreply.github.com> +l3utterfly +ldwang +le.chang +leejet +limitedAtonement +lon <114724657+longregen@users.noreply.github.com> +m3ndax +maddes8cht <55592906+maddes8cht@users.noreply.github.com> +makomk +manikbhandari +mdrokz +mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> +minarchist +mj-shifu <77107165+mj-shifu@users.noreply.github.com> +mmyjona +momonga <115213907+mmnga@users.noreply.github.com> +moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> +mzcu +nanahi <130121847+na-na-hi@users.noreply.github.com> +ngc92 <7938269+ngc92@users.noreply.github.com> +nhamanasu <45545786+nhamanasu@users.noreply.github.com> +niansa/tuxifan +niansa/tuxifan +ningshanwutuobang +nold +nopperl <54780682+nopperl@users.noreply.github.com> +nusu-github <29514220+nusu-github@users.noreply.github.com> +olexiyb +oobabooga <112222186+oobabooga@users.noreply.github.com> +opparco +ostix360 <55257054+ostix360@users.noreply.github.com> +perserk +postmasters +pudepiedj +qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> +qouoq +qunash +rabidcopy +rankaiyx +rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> +rhuddleston +rimoliga <53384203+rimoliga@users.noreply.github.com> +runfuture +sandyiscool +semidark +sharpHL <132747147+sharpHL@users.noreply.github.com> +shibe2 +singularity <12184989+singularity-s0@users.noreply.github.com> +sjinzh +slaren <2141330+slaren@users.noreply.github.com> +slaren +snadampal <87143774+snadampal@users.noreply.github.com> +staviq +stduhpf +swittk +takov751 <40316768+takov751@users.noreply.github.com> +tarcey +texmex76 <40733439+texmex76@users.noreply.github.com> +thement <40525767+thement@users.noreply.github.com> +tjohnman +tslmy +ubik2 +uint256_t +uint256_t +unbounded +valiray <133289098+valiray@users.noreply.github.com> +vodkaslime <646329483@qq.com> +vvhg1 <94630311+vvhg1@users.noreply.github.com> +vxiiduu <73044267+vxiiduu@users.noreply.github.com> +wbpxre150 <100937007+wbpxre150@users.noreply.github.com> +whoreson <139810751+whoreson@users.noreply.github.com> +wonjun Jang +wzy <32936898+Freed-Wu@users.noreply.github.com> +xaedes +xaedes +xloem <0xloem@gmail.com> +yangli2 +yuiseki +zakkor +zhouwg <6889919+zhouwg@users.noreply.github.com> +zrm +源文雨 <41315874+fumiama@users.noreply.github.com> +Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/llama-cpp-python/vendor/llama.cpp/CMakeLists.txt b/llama-cpp-python/vendor/llama.cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..477c5b57c20e7deb7fdfebd277c5ad05d1c7b397 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/CMakeLists.txt @@ -0,0 +1,1316 @@ +cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +project("llama.cpp" C CXX) +include(CheckIncludeFileCXX) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(LLAMA_STANDALONE ON) + + # configure project version + # TODO +else() + set(LLAMA_STANDALONE OFF) +endif() + +if (EMSCRIPTEN) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + + option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) +else() + if (MINGW) + set(BUILD_SHARED_LIBS_DEFAULT OFF) + else() + set(BUILD_SHARED_LIBS_DEFAULT ON) + endif() +endif() + + +# +# Option list +# + +if (APPLE) + set(LLAMA_METAL_DEFAULT ON) +else() + set(LLAMA_METAL_DEFAULT OFF) +endif() + +set(LLAMA_LLAMAFILE_DEFAULT ON) + +# general +option(BUILD_SHARED_LIBS "build shared libraries" OFF) +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" ON) +option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_CCACHE "llama: use ccache if available" ON) + +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# build +option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# instruction set specific +if (LLAMA_NATIVE) + set(INS_ENB OFF) +else() + set(INS_ENB ON) +endif() + +option(LLAMA_AVX "llama: enable AVX" ${INS_ENB}) +option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB}) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" ${INS_ENB}) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ${INS_ENB}) +endif() + +if (WIN32) + set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") +endif() + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_BLAS "llama: use BLAS" OFF) +option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT}) +set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") +option(LLAMA_CUDA "llama: use CUDA" OFF) +option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF) +option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) +option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) +set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") +set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") +option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) +set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") +set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING + "llama: max. batch size for using peer access") +option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF) +option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) +option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) +option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) +option(LLAMA_CLBLAST "llama: use CLBlast" OFF) +option(LLAMA_VULKAN "llama: use Vulkan" OFF) +option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) +option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) +option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) +option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) +option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) +option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) +option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) +option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) +set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING + "llama: metal minimum macOS version") +set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)") +option(LLAMA_KOMPUTE "llama: use Kompute" OFF) +option(LLAMA_MPI "llama: use MPI" OFF) +option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) +option(LLAMA_SYCL "llama: use SYCL" OFF) +option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) +set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") +option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) +set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism") + +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_SERVER "llama: build server example" ON) + +# add perf arguments +option(LLAMA_PERF "llama: enable perf" OFF) + +# Required for relocatable CMake package +include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) + +# +# Compile flags +# + +if (LLAMA_SYCL) + set(CMAKE_CXX_STANDARD 17) +else() + set(CMAKE_CXX_STANDARD 11) +endif() + +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +find_package(Threads REQUIRED) +include(CheckCXXCompilerFlag) + +add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES}) + +# enable libstdc++ assertions for debug builds +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + add_compile_definitions($<$:_GLIBCXX_ASSERTIONS>) +endif() + +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) + add_compile_options(-fsanitize=thread) + link_libraries (-fsanitize=thread) + endif() + + if (LLAMA_SANITIZE_ADDRESS) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries (-fsanitize=address) + endif() + + if (LLAMA_SANITIZE_UNDEFINED) + add_compile_options(-fsanitize=undefined) + link_libraries (-fsanitize=undefined) + endif() +endif() + +if (APPLE AND LLAMA_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") + + add_compile_definitions(GGML_USE_ACCELERATE) + add_compile_definitions(ACCELERATE_NEW_LAPACK) + add_compile_definitions(ACCELERATE_LAPACK_ILP64) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) + else() + message(WARNING "Accelerate framework not found") + endif() +endif() + +if (LLAMA_METAL) + find_library(FOUNDATION_LIBRARY Foundation REQUIRED) + find_library(METAL_FRAMEWORK Metal REQUIRED) + find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) + + message(STATUS "Metal framework found") + set(GGML_HEADERS_METAL ggml-metal.h) + set(GGML_SOURCES_METAL ggml-metal.m) + + add_compile_definitions(GGML_USE_METAL) + if (LLAMA_METAL_NDEBUG) + add_compile_definitions(GGML_METAL_NDEBUG) + endif() + + # copy ggml-common.h and ggml-metal.metal to bin directory + configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) + configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) + + if (LLAMA_METAL_EMBED_LIBRARY) + enable_language(ASM) + add_compile_definitions(GGML_METAL_EMBED_LIBRARY) + + set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") + set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + + file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") + + # merge ggml-common.h and ggml-metal.metal into a single file + set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") + set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") + + add_custom_command( + OUTPUT ${METALLIB_EMBED_ASM} + COMMAND echo "Embedding Metal library" + COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED} + COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM} + COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM} + COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM} + DEPENDS ggml-metal.metal ggml-common.h + COMMENT "Generate assembly for embedded Metal library" + ) + + set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM}) + else() + if (LLAMA_METAL_SHADER_DEBUG) + # custom command to do the following: + # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air + # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib + # + # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works + # disabling fast math is needed in order to pass tests/test-backend-ops + # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 + # note: unfortunately, we have to call it default.metallib instead of ggml.metallib + # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 + set(XC_FLAGS -fno-fast-math -fno-inline -g) + else() + set(XC_FLAGS -O3) + endif() + + # Append macOS metal versioning flags + if (LLAMA_METAL_MACOSX_VERSION_MIN) + message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation") + list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN}) + endif() + if (LLAMA_METAL_STD) + message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation") + list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD}) + endif() + + add_custom_command( + OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h + COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal + DEPENDS ggml-metal.metal ggml-common.h + COMMENT "Compiling Metal kernels" + ) + + add_custom_target( + ggml-metal ALL + DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + ) + endif() # LLAMA_METAL_EMBED_LIBRARY + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} + ${FOUNDATION_LIBRARY} + ${METAL_FRAMEWORK} + ${METALKIT_FRAMEWORK} + ) +endif() + +if (LLAMA_BLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22) + set(BLA_SIZEOF_INTEGER 8) + endif() + + set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) + find_package(BLAS) + + if (BLAS_FOUND) + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + + if ("${BLAS_INCLUDE_DIRS}" STREQUAL "") + # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. + # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 + find_package(PkgConfig REQUIRED) + if (${LLAMA_BLAS_VENDOR} MATCHES "Generic") + pkg_check_modules(DepBLAS REQUIRED blas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS") + # As of openblas v0.3.22, the 64-bit is named openblas64.pc + pkg_check_modules(DepBLAS openblas64) + if (NOT DepBLAS_FOUND) + pkg_check_modules(DepBLAS REQUIRED openblas) + endif() + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME") + pkg_check_modules(DepBLAS REQUIRED blis) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS") + pkg_check_modules(DepBLAS REQUIRED blas-atlas) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS") + pkg_check_modules(DepBLAS REQUIRED flexiblas_api) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel") + # all Intel* libraries share the same include path + pkg_check_modules(DepBLAS REQUIRED mkl-sdl) + elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC") + # this doesn't provide pkg-config + # suggest to assign BLAS_INCLUDE_DIRS on your own + if ("${NVHPC_VERSION}" STREQUAL "") + message(WARNING "Better to set NVHPC_VERSION") + else() + set(DepBLAS_FOUND ON) + set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") + endif() + endif() + if (DepBLAS_FOUND) + set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" + " detected by pkgconfig, trying to find cblas.h from possible paths...") + find_path(BLAS_INCLUDE_DIRS + NAMES cblas.h + HINTS + /usr/include + /usr/local/include + /usr/include/openblas + /opt/homebrew/opt/openblas/include + /usr/local/opt/openblas/include + /usr/include/x86_64-linux-gnu/openblas/include + ) + endif() + endif() + + message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") + + add_compile_options(${BLAS_LINKER_FLAGS}) + + add_compile_definitions(GGML_USE_OPENBLAS) + + if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel")) + add_compile_definitions(GGML_BLAS_USE_MKL) + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct LLAMA_BLAS_VENDOR") + endif() +endif() + +if (LLAMA_LLAMAFILE) + add_compile_definitions(GGML_USE_LLAMAFILE) + + set(GGML_HEADERS_LLAMAFILE sgemm.h) + set(GGML_SOURCES_LLAMAFILE sgemm.cpp) +endif() + +if (LLAMA_QKK_64) + add_compile_definitions(GGML_QKK_64) +endif() + +if (LLAMA_CUBLAS) + message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead") + set(LLAMA_CUDA ON) +endif() + +if (LLAMA_CUDA) + cmake_minimum_required(VERSION 3.17) + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + message(STATUS "CUDA found") + + enable_language(CUDA) + + set(GGML_HEADERS_CUDA ggml-cuda.h) + + file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") + list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") + + add_compile_definitions(GGML_USE_CUDA) + if (LLAMA_CUDA_FORCE_DMMV) + add_compile_definitions(GGML_CUDA_FORCE_DMMV) + endif() + if (LLAMA_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) + endif() + add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) + add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) + if (DEFINED LLAMA_CUDA_DMMV_Y) + add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility + endif() + if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) + add_compile_definitions(GGML_CUDA_F16) + endif() + add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) + add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE}) + if (LLAMA_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) + endif() + + if (LLAMA_STATIC) + if (WIN32) + # As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) + else () + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + endif() + else() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) + + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + # 52 == lowest CUDA 12 standard + # 60 == f16 CUDA intrinsics + # 61 == integer CUDA intrinsics + # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster + if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) + set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics + else() + set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics + #set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work + endif() + endif() + message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") + + else() + message(WARNING "CUDA not found") + endif() +endif() + +if (LLAMA_MPI) + cmake_minimum_required(VERSION 3.10) + find_package(MPI) + if (MPI_C_FOUND) + message(STATUS "MPI found") + + set(GGML_HEADERS_MPI ggml-mpi.h) + set(GGML_SOURCES_MPI ggml-mpi.c) + + add_compile_definitions(GGML_USE_MPI) + add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS}) + + if (NOT MSVC) + add_compile_options(-Wno-cast-qual) + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES}) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS}) + + # Even if you're only using the C header, C++ programs may bring in MPI + # C++ functions, so more linkage is needed + if (MPI_CXX_FOUND) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_CXX_LIBRARIES}) + endif() + else() + message(WARNING "MPI not found") + endif() +endif() + +if (LLAMA_CLBLAST) + find_package(CLBlast) + if (CLBlast_FOUND) + message(STATUS "CLBlast found") + + set(GGML_HEADERS_OPENCL ggml-opencl.h) + set(GGML_SOURCES_OPENCL ggml-opencl.cpp) + + add_compile_definitions(GGML_USE_CLBLAST) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast) + else() + message(WARNING "CLBlast not found") + endif() +endif() + +if (LLAMA_VULKAN) + find_package(Vulkan) + if (Vulkan_FOUND) + message(STATUS "Vulkan found") + + set(GGML_HEADERS_VULKAN ggml-vulkan.h) + set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) + + add_compile_definitions(GGML_USE_VULKAN) + + if (LLAMA_VULKAN_CHECK_RESULTS) + add_compile_definitions(GGML_VULKAN_CHECK_RESULTS) + endif() + + if (LLAMA_VULKAN_DEBUG) + add_compile_definitions(GGML_VULKAN_DEBUG) + endif() + + if (LLAMA_VULKAN_VALIDATE) + add_compile_definitions(GGML_VULKAN_VALIDATE) + endif() + + if (LLAMA_VULKAN_RUN_TESTS) + add_compile_definitions(GGML_VULKAN_RUN_TESTS) + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan) + else() + message(WARNING "Vulkan not found") + endif() +endif() + +if (LLAMA_HIPBLAS) + list(APPEND CMAKE_PREFIX_PATH /opt/rocm) + + if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang") + endif() + + if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") + endif() + + find_package(hip REQUIRED) + find_package(hipblas REQUIRED) + find_package(rocblas REQUIRED) + + message(STATUS "HIP and hipBLAS found") + + set(GGML_HEADERS_ROCM ggml-cuda.h) + + file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu") + list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu") + + add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA) + + if (LLAMA_HIP_UMA) + add_compile_definitions(GGML_HIP_UMA) + endif() + + if (LLAMA_CUDA_FORCE_DMMV) + add_compile_definitions(GGML_CUDA_FORCE_DMMV) + endif() + + if (LLAMA_CUDA_FORCE_MMQ) + add_compile_definitions(GGML_CUDA_FORCE_MMQ) + endif() + + if (LLAMA_CUDA_NO_PEER_COPY) + add_compile_definitions(GGML_CUDA_NO_PEER_COPY) + endif() + + add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) + add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) + add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) + + set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) + + if (LLAMA_STATIC) + message(FATAL_ERROR "Static linking not supported for HIP/ROCm") + endif() + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device PUBLIC hip::host roc::rocblas roc::hipblas) +endif() + +if (LLAMA_SYCL) + if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") + message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") + endif() + + if ( NOT DEFINED ENV{ONEAPI_ROOT}) + message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") + endif() + #todo: AOT + + find_package(IntelSYCL REQUIRED) + + message(STATUS "SYCL found") + + add_compile_definitions(GGML_USE_SYCL) + + if (LLAMA_SYCL_F16) + add_compile_definitions(GGML_SYCL_F16) + endif() + + add_compile_options(-I./) #include DPCT + add_compile_options(-I/${SYCL_INCLUDE_DIR}) + + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") + if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") + endif() + + set(GGML_HEADERS_SYCL ggml-sycl.h) + set(GGML_SOURCES_SYCL ggml-sycl.cpp) + + if (WIN32) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) + else() + if (LLAMA_SYCL_TARGET STREQUAL "INTEL") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) + elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) + endif() + endif() +endif() + +if (LLAMA_KOMPUTE) + add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) + find_package(Vulkan COMPONENTS glslc REQUIRED) + find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) + if (NOT glslc_executable) + message(FATAL_ERROR "glslc not found") + endif() + + function(compile_shader) + set(options) + set(oneValueArgs) + set(multiValueArgs SOURCES) + cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + foreach(source ${compile_shader_SOURCES}) + get_filename_component(filename ${source} NAME) + set(spv_file ${filename}.spv) + add_custom_command( + OUTPUT ${spv_file} + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} + ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp + ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp + ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp + ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp + COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} + COMMENT "Compiling ${source} to ${spv_file}" + ) + + get_filename_component(RAW_FILE_NAME ${spv_file} NAME) + set(FILE_NAME "shader${RAW_FILE_NAME}") + string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME}) + string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE) + string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}") + set(OUTPUT_HEADER_FILE "${HEADER_FILE}") + message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}") + if(CMAKE_GENERATOR MATCHES "Visual Studio") + add_custom_command( + OUTPUT ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_BINARY_DIR}/bin/$/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + DEPENDS ${spv_file} xxd + COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$/xxd" + ) + else() + add_custom_command( + OUTPUT ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} + COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} + DEPENDS ${spv_file} xxd + COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd" + ) + endif() + endforeach() + endfunction() + + if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt") + message(STATUS "Kompute found") + set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level") + add_subdirectory(kompute) + + # Compile our shaders + compile_shader(SOURCES + kompute-shaders/op_scale.comp + kompute-shaders/op_scale_8.comp + kompute-shaders/op_add.comp + kompute-shaders/op_addrow.comp + kompute-shaders/op_mul.comp + kompute-shaders/op_silu.comp + kompute-shaders/op_relu.comp + kompute-shaders/op_gelu.comp + kompute-shaders/op_softmax.comp + kompute-shaders/op_norm.comp + kompute-shaders/op_rmsnorm.comp + kompute-shaders/op_diagmask.comp + kompute-shaders/op_mul_mat_mat_f32.comp + kompute-shaders/op_mul_mat_f16.comp + kompute-shaders/op_mul_mat_q8_0.comp + kompute-shaders/op_mul_mat_q4_0.comp + kompute-shaders/op_mul_mat_q4_1.comp + kompute-shaders/op_mul_mat_q6_k.comp + kompute-shaders/op_getrows_f16.comp + kompute-shaders/op_getrows_q4_0.comp + kompute-shaders/op_getrows_q4_1.comp + kompute-shaders/op_getrows_q6_k.comp + kompute-shaders/op_rope_f16.comp + kompute-shaders/op_rope_f32.comp + kompute-shaders/op_cpy_f16_f16.comp + kompute-shaders/op_cpy_f16_f32.comp + kompute-shaders/op_cpy_f32_f16.comp + kompute-shaders/op_cpy_f32_f32.comp + ) + + # Create a custom target for our generated shaders + add_custom_target(generated_shaders DEPENDS + shaderop_scale.h + shaderop_scale_8.h + shaderop_add.h + shaderop_addrow.h + shaderop_mul.h + shaderop_silu.h + shaderop_relu.h + shaderop_gelu.h + shaderop_softmax.h + shaderop_norm.h + shaderop_rmsnorm.h + shaderop_diagmask.h + shaderop_mul_mat_mat_f32.h + shaderop_mul_mat_f16.h + shaderop_mul_mat_q8_0.h + shaderop_mul_mat_q4_0.h + shaderop_mul_mat_q4_1.h + shaderop_mul_mat_q6_k.h + shaderop_getrows_f16.h + shaderop_getrows_q4_0.h + shaderop_getrows_q4_1.h + shaderop_getrows_q6_k.h + shaderop_rope_f16.h + shaderop_rope_f32.h + shaderop_cpy_f16_f16.h + shaderop_cpy_f16_f32.h + shaderop_cpy_f32_f16.h + shaderop_cpy_f32_f32.h + ) + + # Create a custom command that depends on the generated_shaders + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp + DEPENDS generated_shaders + COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" + ) + + # Add the stamp to the main sources to ensure dependency tracking + set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) + + add_compile_definitions(GGML_USE_KOMPUTE) + + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) + set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) + else() + message(WARNING "Kompute not found") + endif() +endif() + +if (LLAMA_CPU_HBM) + find_library(memkind memkind REQUIRED) + + add_compile_definitions(GGML_USE_CPU_HBM) + + target_link_libraries(ggml PUBLIC memkind) +endif() + +if (LLAMA_PERF) + add_compile_definitions(GGML_PERF) +endif() + +function(get_flags CCID CCVER) + set(C_FLAGS "") + set(CXX_FLAGS "") + + if (CCID MATCHES "Clang") + set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) + set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) + + if ( + (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR + (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) + ) + list(APPEND C_FLAGS -Wdouble-promotion) + endif() + elseif (CCID STREQUAL "GNU") + set(C_FLAGS -Wdouble-promotion) + set(CXX_FLAGS -Wno-array-bounds) + + if (CCVER VERSION_GREATER_EQUAL 7.1.0) + list(APPEND CXX_FLAGS -Wno-format-truncation) + endif() + if (CCVER VERSION_GREATER_EQUAL 8.1.0) + list(APPEND CXX_FLAGS -Wextra-semi) + endif() + endif() + + set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) + set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) +endfunction() + +if (LLAMA_FATAL_WARNINGS) + if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND C_FLAGS -Werror) + list(APPEND CXX_FLAGS -Werror) + elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + add_compile_options(/WX) + endif() +endif() + +if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) + list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) + list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes + -Werror=implicit-int -Werror=implicit-function-declaration) + list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) + + list(APPEND C_FLAGS ${WARNING_FLAGS}) + list(APPEND CXX_FLAGS ${WARNING_FLAGS}) + + get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) + + add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" + "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") + else() + # todo : msvc + set(C_FLAGS "") + set(CXX_FLAGS "") + endif() +endif() + +set(CUDA_CXX_FLAGS "") + +if (LLAMA_CUDA) + set(CUDA_FLAGS -use_fast_math) + + if (LLAMA_FATAL_WARNINGS) + list(APPEND CUDA_FLAGS -Werror all-warnings) + endif() + + if (LLAMA_ALL_WARNINGS AND NOT MSVC) + set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) + if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") + list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER}) + endif() + + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler --version + OUTPUT_VARIABLE CUDA_CCFULLVER + ERROR_QUIET + ) + + if (NOT CUDA_CCFULLVER MATCHES clang) + set(CUDA_CCID "GNU") + execute_process( + COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" + OUTPUT_VARIABLE CUDA_CCVER + ERROR_QUIET + ) + else() + if (CUDA_CCFULLVER MATCHES Apple) + set(CUDA_CCID "AppleClang") + else() + set(CUDA_CCID "Clang") + endif() + string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) + endif() + + message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") + + get_flags(${CUDA_CCID} ${CUDA_CCVER}) + list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later + endif() + + if (NOT MSVC) + list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) + endif() +endif() + +if (WIN32) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(WARNING "IPO is not supported: ${output}") + endif() +endif() + +if (LLAMA_CCACHE) + find_program(LLAMA_CCACHE_FOUND ccache) + if (LLAMA_CCACHE_FOUND) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) + set(ENV{CCACHE_SLOPPINESS} time_macros) + message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.") + else() + message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF") + endif () +endif() + +# this version of Apple ld64 is buggy +execute_process( + COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v + ERROR_VARIABLE output + OUTPUT_QUIET +) + +if (output MATCHES "dyld-1015\.7") + add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) +endif() + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (MSVC) + string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) + message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") +else () + set(CMAKE_GENERATOR_PLATFORM_LWR "") +endif () + +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() +endif() + +set(ARCH_FLAGS "") + +if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) + message(STATUS "ARM detected") + if (MSVC) + add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead + add_compile_definitions(__ARM_NEON) + add_compile_definitions(__ARM_FEATURE_FMA) + + set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) + string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") + check_cxx_source_compiles("#include \nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) + if (GGML_COMPILER_SUPPORT_DOTPROD) + add_compile_definitions(__ARM_FEATURE_DOTPROD) + endif () + check_cxx_source_compiles("#include \nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) + if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) + add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + endif () + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV}) + else() + check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) + if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") + list(APPEND ARCH_FLAGS -mfp16-format=ieee) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") + # Raspberry Pi 1, Zero + list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access) + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") + if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android") + # Android armeabi-v7a + list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations) + else() + # Raspberry Pi 2 + list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations) + endif() + endif() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") + # Android arm64-v8a + # Raspberry Pi 3, 4, Zero 2 (32-bit) + list(APPEND ARCH_FLAGS -mno-unaligned-access) + endif() + endif() +elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR + (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND + CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$")) + message(STATUS "x86 detected") + if (MSVC) + # instruction set detection for MSVC only + if (LLAMA_NATIVE) + include(cmake/FindSIMD.cmake) + endif () + if (LLAMA_AVX512) + list(APPEND ARCH_FLAGS /arch:AVX512) + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + if (LLAMA_AVX512_VBMI) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) + endif() + elseif (LLAMA_AVX2) + list(APPEND ARCH_FLAGS /arch:AVX2) + elseif (LLAMA_AVX) + list(APPEND ARCH_FLAGS /arch:AVX) + endif() + else() + if (LLAMA_NATIVE) + list(APPEND ARCH_FLAGS -march=native) + endif() + if (LLAMA_F16C) + list(APPEND ARCH_FLAGS -mf16c) + endif() + if (LLAMA_FMA) + list(APPEND ARCH_FLAGS -mfma) + endif() + if (LLAMA_AVX) + list(APPEND ARCH_FLAGS -mavx) + endif() + if (LLAMA_AVX2) + list(APPEND ARCH_FLAGS -mavx2) + endif() + if (LLAMA_AVX512) + list(APPEND ARCH_FLAGS -mavx512f) + list(APPEND ARCH_FLAGS -mavx512bw) + endif() + if (LLAMA_AVX512_VBMI) + list(APPEND ARCH_FLAGS -mavx512vbmi) + endif() + if (LLAMA_AVX512_VNNI) + list(APPEND ARCH_FLAGS -mavx512vnni) + endif() + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") + message(STATUS "PowerPC detected") + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") + list(APPEND ARCH_FLAGS -mcpu=powerpc64le) + else() + list(APPEND ARCH_FLAGS -mcpu=native -mtune=native) + #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) + endif() +else() + message(STATUS "Unknown architecture") +endif() + +add_compile_options("$<$:${ARCH_FLAGS}>") +add_compile_options("$<$:${ARCH_FLAGS}>") + +if (LLAMA_CUDA) + list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS}) + list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument + if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") + list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) + endif() + add_compile_options("$<$:${CUDA_FLAGS}>") +endif() + +if (MINGW) + # Target Windows 8 for PrefetchVirtualMemory + add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER}) +endif() + +# +# POSIX conformance +# + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) +add_compile_definitions(_XOPEN_SOURCE=600) + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + remove_definitions(-D_XOPEN_SOURCE=600) + add_compile_definitions(_XOPEN_SOURCE=700) +endif() + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + add_compile_definitions(_GNU_SOURCE) +endif() + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +if ( + CMAKE_SYSTEM_NAME MATCHES "Darwin" OR + CMAKE_SYSTEM_NAME MATCHES "iOS" OR + CMAKE_SYSTEM_NAME MATCHES "tvOS" OR + CMAKE_SYSTEM_NAME MATCHES "DragonFly" +) + add_compile_definitions(_DARWIN_C_SOURCE) +endif() + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + add_compile_definitions(__BSD_VISIBLE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") + add_compile_definitions(_NETBSD_SOURCE) +endif() +if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_compile_definitions(_BSD_SOURCE) +endif() + +# +# libraries +# + +# ggml + +add_library(ggml OBJECT + ggml.c + ggml.h + ggml-alloc.c + ggml-alloc.h + ggml-backend.c + ggml-backend.h + ggml-quants.c + ggml-quants.h + ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA} + ${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL} + ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL} + ${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI} + ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} + ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL} + ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE} + ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN} + ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM} + ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE} + ) + +target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) +target_compile_features (ggml PUBLIC c_std_11) # don't bump + +target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) + +add_library(ggml_static STATIC $) + +if (BUILD_SHARED_LIBS) + set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) + add_library(ggml_shared SHARED $) + target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) + install(TARGETS ggml_shared LIBRARY) +endif() + +# llama + +add_library(llama + llama.cpp + llama.h + unicode.h + unicode.cpp + unicode-data.cpp + ) + +target_include_directories(llama PUBLIC .) +target_compile_features (llama PUBLIC cxx_std_11) # don't bump + +target_link_libraries(llama PRIVATE + ggml + ${LLAMA_EXTRA_LIBS} + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) + if (LLAMA_METAL) + set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") + endif() +endif() + + +# +# install +# + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} + CACHE PATH "Location of header files") +set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} + CACHE PATH "Location of library files") +set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} + CACHE PATH "Location of binary files") +set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER}) +set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) +set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) +get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS) + +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama + PATH_VARS LLAMA_INCLUDE_INSTALL_DIR + LLAMA_LIB_INSTALL_DIR + LLAMA_BIN_INSTALL_DIR ) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake + VERSION ${LLAMA_INSTALL_VERSION} + COMPATIBILITY SameMajorVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama) + +set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h" + "${GGML_HEADERS_CUDA}" "${GGML_HEADERS_OPENCL}" + "${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}") + +set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") +install(TARGETS ggml PUBLIC_HEADER) + +set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/llama.h) +install(TARGETS llama LIBRARY PUBLIC_HEADER) + +install( + FILES convert.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) +install( + FILES convert-lora-to-ggml.py + PERMISSIONS + OWNER_READ + OWNER_WRITE + OWNER_EXECUTE + GROUP_READ + GROUP_EXECUTE + WORLD_READ + WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR}) +if (LLAMA_METAL) + install( + FILES ggml-metal.metal + PERMISSIONS + OWNER_READ + OWNER_WRITE + GROUP_READ + WORLD_READ + DESTINATION ${CMAKE_INSTALL_BINDIR}) + if (NOT LLAMA_METAL_EMBED_LIBRARY) + install( + FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + DESTINATION ${CMAKE_INSTALL_BINDIR} + ) + endif() +endif() + +# +# programs, examples and tests +# + +add_subdirectory(common) + +if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + include(CTest) + add_subdirectory(tests) +endif () + +if (LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) +endif() diff --git a/llama-cpp-python/vendor/llama.cpp/LICENSE b/llama-cpp-python/vendor/llama.cpp/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..acb96ce78e0486e9dc7602cdcdd2f491c34f335a --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023-2024 The ggml authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/llama-cpp-python/vendor/llama.cpp/Makefile b/llama-cpp-python/vendor/llama.cpp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0a73f2a582a204b7c3668e4a586a3afadca4f375 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/Makefile @@ -0,0 +1,1036 @@ +# Define the default target now so that it is always the first target +BUILD_TARGETS = \ + main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \ + simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama beam-search \ + retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o + +# Binaries only useful for tests +TEST_TARGETS = \ + tests/test-autorelease \ + tests/test-backend-ops \ + tests/test-double-float \ + tests/test-grad0 \ + tests/test-grammar-integration \ + tests/test-grammar-parser \ + tests/test-json-schema-to-grammar \ + tests/test-llama-grammar \ + tests/test-model-load-cancel \ + tests/test-opt \ + tests/test-quantize-fns \ + tests/test-quantize-perf \ + tests/test-rope \ + tests/test-sampling \ + tests/test-tokenizer-0 \ + tests/test-tokenizer-1-bpe \ + tests/test-tokenizer-1-spm + +# Code coverage output files +COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report + +ifndef UNAME_S +UNAME_S := $(shell uname -s) +endif + +ifndef UNAME_P +UNAME_P := $(shell uname -p) +endif + +ifndef UNAME_M +UNAME_M := $(shell uname -m) +endif + +# In GNU make default CXX is g++ instead of c++. Let's fix that so that users +# of non-gcc compilers don't have to provide g++ alias or wrapper. +DEFCC := cc +DEFCXX := c++ +ifeq ($(origin CC),default) +CC := $(DEFCC) +endif +ifeq ($(origin CXX),default) +CXX := $(DEFCXX) +endif + +# Mac OS + Arm can report x86_64 +# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 +ifeq ($(UNAME_S),Darwin) + ifndef LLAMA_NO_METAL + LLAMA_METAL := 1 + endif + + ifneq ($(UNAME_P),arm) + SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) + ifeq ($(SYSCTL_M),1) + # UNAME_P := arm + # UNAME_M := arm64 + warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) + endif + endif +endif + +default: $(BUILD_TARGETS) + +test: $(TEST_TARGETS) + @failures=0; \ + for test_target in $(TEST_TARGETS); do \ + if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-llm.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \ + ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \ + continue; \ + elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \ + continue; \ + else \ + echo "Running test $$test_target..."; \ + ./$$test_target; \ + fi; \ + if [ $$? -ne 0 ]; then \ + printf 'Test %s FAILED!\n\n' $$test_target; \ + failures=$$(( failures + 1 )); \ + else \ + printf 'Test %s passed.\n\n' $$test_target; \ + fi; \ + done; \ + if [ $$failures -gt 0 ]; then \ + printf '\n%s tests failed.\n' $$failures; \ + exit 1; \ + fi + @echo 'All tests passed.' + +all: $(BUILD_TARGETS) $(TEST_TARGETS) + +coverage: ## Run code coverage + gcov -pb tests/*.cpp + +lcov-report: coverage ## Generate lcov report + mkdir -p lcov-report + lcov --capture --directory . --output-file lcov-report/coverage.info + genhtml lcov-report/coverage.info --output-directory lcov-report + +gcovr-report: coverage ## Generate gcovr report + mkdir -p gcovr-report + gcovr --root . --html --html-details --output gcovr-report/coverage.html + +ifdef RISCV_CROSS_COMPILE +CC := riscv64-unknown-linux-gnu-gcc +CXX := riscv64-unknown-linux-gnu-g++ +endif + +# +# Compile flags +# + +# keep standard at C11 and C++11 +MK_CPPFLAGS = -I. -Icommon +MK_CFLAGS = -std=c11 -fPIC +MK_CXXFLAGS = -std=c++11 -fPIC +MK_NVCCFLAGS = -std=c++11 + +# -Ofast tends to produce faster code, but may not be available for some compilers. +ifdef LLAMA_FAST +MK_CFLAGS += -Ofast +HOST_CXXFLAGS += -Ofast +MK_NVCCFLAGS += -O3 +else +MK_CFLAGS += -O3 +MK_CXXFLAGS += -O3 +MK_NVCCFLAGS += -O3 +endif + +ifndef LLAMA_NO_CCACHE +CCACHE := $(shell which ccache) +ifdef CCACHE +export CCACHE_SLOPPINESS = time_macros +$(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE.) +CC := $(CCACHE) $(CC) +CXX := $(CCACHE) $(CXX) +else +$(info I ccache not found. Consider installing it for faster compilation.) +endif # CCACHE +endif # LLAMA_NO_CCACHE + +# clock_gettime came in POSIX.1b (1993) +# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional +# posix_memalign came in POSIX.1-2001 / SUSv3 +# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) +MK_CPPFLAGS += -D_XOPEN_SOURCE=600 + +# Somehow in OpenBSD whenever POSIX conformance is specified +# some string functions rely on locale_t availability, +# which was introduced in POSIX.1-2008, forcing us to go higher +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 +endif + +# Data types, macros and functions related to controlling CPU affinity and +# some memory allocation are available on Linux through GNU extensions in libc +ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GNU_SOURCE +endif + +# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, +# and on macOS its availability depends on enabling Darwin extensions +# similarly on DragonFly, enabling BSD extensions is necessary +ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -D_DARWIN_C_SOURCE +endif +ifeq ($(UNAME_S),DragonFly) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif + +# alloca is a non-standard interface that is not visible on BSDs when +# POSIX conformance is specified, but not all of them provide a clean way +# to enable it in such cases +ifeq ($(UNAME_S),FreeBSD) + MK_CPPFLAGS += -D__BSD_VISIBLE +endif +ifeq ($(UNAME_S),NetBSD) + MK_CPPFLAGS += -D_NETBSD_SOURCE +endif +ifeq ($(UNAME_S),OpenBSD) + MK_CPPFLAGS += -D_BSD_SOURCE +endif + +ifdef LLAMA_SCHED_MAX_COPIES + MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES) +endif + +ifdef LLAMA_DEBUG + MK_CFLAGS += -O0 -g + MK_CXXFLAGS += -O0 -g + MK_LDFLAGS += -g + + ifeq ($(UNAME_S),Linux) + MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS + endif +else + MK_CPPFLAGS += -DNDEBUG +endif + +ifdef LLAMA_SANITIZE_THREAD + MK_CFLAGS += -fsanitize=thread -g + MK_CXXFLAGS += -fsanitize=thread -g + MK_LDFLAGS += -fsanitize=thread -g +endif + +ifdef LLAMA_SANITIZE_ADDRESS + MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g + MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g +endif + +ifdef LLAMA_SANITIZE_UNDEFINED + MK_CFLAGS += -fsanitize=undefined -g + MK_CXXFLAGS += -fsanitize=undefined -g + MK_LDFLAGS += -fsanitize=undefined -g +endif + +ifdef LLAMA_SERVER_VERBOSE + MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE) +endif + +ifdef LLAMA_SERVER_SSL + MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT + MK_LDFLAGS += -lssl -lcrypto +endif + +ifdef LLAMA_CODE_COVERAGE + MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase '' +endif + +ifdef LLAMA_DISABLE_LOGS + MK_CPPFLAGS += -DLOG_DISABLE_LOGS +endif # LLAMA_DISABLE_LOGS + +# warnings +WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function +MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \ + -Werror=implicit-function-declaration +MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn + +ifeq ($(LLAMA_FATAL_WARNINGS),1) + MK_CFLAGS += -Werror + MK_CXXFLAGS += -Werror +endif + +# this version of Apple ld64 is buggy +ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' + MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER +endif + +# OS specific +# TODO: support Windows +ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' + MK_CFLAGS += -pthread + MK_CXXFLAGS += -pthread +endif + +# detect Windows +ifneq ($(findstring _NT,$(UNAME_S)),) + _WIN32 := 1 +endif + +# library name prefix +ifneq ($(_WIN32),1) + LIB_PRE := lib +endif + +# Dynamic Shared Object extension +ifneq ($(_WIN32),1) + DSO_EXT := .so +else + DSO_EXT := .dll +endif + +# Windows Sockets 2 (Winsock) for network-capable apps +ifeq ($(_WIN32),1) + LWINSOCK2 := -lws2_32 +endif + +ifdef LLAMA_GPROF + MK_CFLAGS += -pg + MK_CXXFLAGS += -pg +endif +ifdef LLAMA_PERF + MK_CPPFLAGS += -DGGML_PERF +endif + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue + +ifndef RISCV + +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) + # Use all CPU extensions that are available: + MK_CFLAGS += -march=native -mtune=native + HOST_CXXFLAGS += -march=native -mtune=native + + # Usage AVX-only + #MK_CFLAGS += -mfma -mf16c -mavx + #MK_CXXFLAGS += -mfma -mf16c -mavx + + # Usage SSSE3-only (Not is SSE3!) + #MK_CFLAGS += -mssse3 + #MK_CXXFLAGS += -mssse3 +endif + +ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' + # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 + # https://github.com/ggerganov/llama.cpp/issues/2922 + MK_CFLAGS += -Xassembler -muse-unaligned-vector-move + MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move + + # Target Windows 8 for PrefetchVirtualMemory + MK_CPPFLAGS += -D_WIN32_WINNT=0x602 +endif + +ifneq ($(filter aarch64%,$(UNAME_M)),) + # Apple M1, M2, etc. + # Raspberry Pi 3, 4, Zero 2 (64-bit) + # Nvidia Jetson + MK_CFLAGS += -mcpu=native + MK_CXXFLAGS += -mcpu=native + JETSON_RELEASE_INFO = $(shell jetson_release) + ifdef JETSON_RELEASE_INFO + ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) + JETSON_EOL_MODULE_DETECT = 1 + CC = aarch64-unknown-linux-gnu-gcc + cxx = aarch64-unknown-linux-gnu-g++ + endif + endif +endif + +ifneq ($(filter armv6%,$(UNAME_M)),) + # Raspberry Pi 1, Zero + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter armv7%,$(UNAME_M)),) + # Raspberry Pi 2 + MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations + MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations +endif + +ifneq ($(filter armv8%,$(UNAME_M)),) + # Raspberry Pi 3, 4, Zero 2 (32-bit) + MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access + MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter ppc64%,$(UNAME_M)),) + POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) + ifneq (,$(findstring POWER9,$(POWER9_M))) + MK_CFLAGS += -mcpu=power9 + MK_CXXFLAGS += -mcpu=power9 + endif +endif + +ifneq ($(filter ppc64le%,$(UNAME_M)),) + MK_CFLAGS += -mcpu=powerpc64le + MK_CXXFLAGS += -mcpu=powerpc64le + CUDA_POWER_ARCH = 1 +endif + +else + MK_CFLAGS += -march=rv64gcv -mabi=lp64d + MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d +endif + +ifdef LLAMA_QKK_64 + MK_CPPFLAGS += -DGGML_QKK_64 +endif + +ifndef LLAMA_NO_ACCELERATE + # Mac OS - include Accelerate framework. + # `-framework Accelerate` works both with Apple Silicon and Mac Intel + ifeq ($(UNAME_S),Darwin) + MK_CPPFLAGS += -DGGML_USE_ACCELERATE + MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK + MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 + MK_LDFLAGS += -framework Accelerate + endif +endif # LLAMA_NO_ACCELERATE + +ifdef LLAMA_MPI + MK_CPPFLAGS += -DGGML_USE_MPI + MK_CFLAGS += -Wno-cast-qual + MK_CXXFLAGS += -Wno-cast-qual + OBJS += ggml-mpi.o +endif # LLAMA_MPI + +ifdef LLAMA_OPENBLAS + MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas) + MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) + MK_LDFLAGS += $(shell pkg-config --libs openblas) +endif # LLAMA_OPENBLAS + +ifndef LLAMA_NO_LLAMAFILE + MK_CPPFLAGS += -DGGML_USE_LLAMAFILE + OBJS += sgemm.o +endif + +ifdef LLAMA_BLIS + MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis + MK_LDFLAGS += -lblis -L/usr/local/lib +endif # LLAMA_BLIS + +ifdef LLAMA_CUBLAS +# LLAMA_CUBLAS is deprecated and will be removed in the future + LLAMA_CUDA := 1 +endif + +ifdef LLAMA_CUDA + ifneq ('', '$(wildcard /opt/cuda)') + CUDA_PATH ?= /opt/cuda + else + CUDA_PATH ?= /usr/local/cuda + endif + MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include + MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib + OBJS += ggml-cuda.o + OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu)) + MK_NVCCFLAGS += -use_fast_math +ifdef LLAMA_FATAL_WARNINGS + MK_NVCCFLAGS += -Werror all-warnings +endif # LLAMA_FATAL_WARNINGS +ifndef JETSON_EOL_MODULE_DETECT + MK_NVCCFLAGS += --forward-unknown-to-host-compiler +endif # JETSON_EOL_MODULE_DETECT +ifdef LLAMA_DEBUG + MK_NVCCFLAGS += -lineinfo +endif # LLAMA_DEBUG +ifdef LLAMA_CUDA_NVCC + NVCC = $(CCACHE) $(LLAMA_CUDA_NVCC) +else + NVCC = $(CCACHE) nvcc +endif #LLAMA_CUDA_NVCC +ifdef CUDA_DOCKER_ARCH + MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) +else ifndef CUDA_POWER_ARCH + MK_NVCCFLAGS += -arch=native +endif # CUDA_DOCKER_ARCH +ifdef LLAMA_CUDA_FORCE_DMMV + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # LLAMA_CUDA_FORCE_DMMV +ifdef LLAMA_CUDA_FORCE_MMQ + MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ +endif # LLAMA_CUDA_FORCE_MMQ +ifdef LLAMA_CUDA_DMMV_X + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) +else + MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 +endif # LLAMA_CUDA_DMMV_X +ifdef LLAMA_CUDA_MMV_Y + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) +else ifdef LLAMA_CUDA_DMMV_Y + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_DMMV_Y) # for backwards compatibility +else + MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1 +endif # LLAMA_CUDA_MMV_Y +ifdef LLAMA_CUDA_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # LLAMA_CUDA_F16 +ifdef LLAMA_CUDA_DMMV_F16 + MK_NVCCFLAGS += -DGGML_CUDA_F16 +endif # LLAMA_CUDA_DMMV_F16 +ifdef LLAMA_CUDA_KQUANTS_ITER + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +else + MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 +endif +ifdef LLAMA_CUDA_PEER_MAX_BATCH_SIZE + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(LLAMA_CUDA_PEER_MAX_BATCH_SIZE) +else + MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 +endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE +ifdef LLAMA_CUDA_NO_PEER_COPY + MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # LLAMA_CUDA_NO_PEER_COPY +ifdef LLAMA_CUDA_CCBIN + MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) +endif + +ifdef JETSON_EOL_MODULE_DETECT +define NVCC_COMPILE + $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE +else +define NVCC_COMPILE + $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ +endef # NVCC_COMPILE +endif # JETSON_EOL_MODULE_DETECT + +ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh + $(NVCC_COMPILE) + +ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh) + $(NVCC_COMPILE) +endif # LLAMA_CUDA + +ifdef LLAMA_CLBLAST + MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL) + MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL) + MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL) + + # Mac provides OpenCL as a framework + ifeq ($(UNAME_S),Darwin) + MK_LDFLAGS += -lclblast -framework OpenCL + else + MK_LDFLAGS += $(shell pkg-config --libs clblast OpenCL) + endif + OBJS += ggml-opencl.o + +ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif # LLAMA_CLBLAST + +ifdef LLAMA_VULKAN + MK_CPPFLAGS += -DGGML_USE_VULKAN + MK_LDFLAGS += -lvulkan + OBJS += ggml-vulkan.o + +ifdef LLAMA_VULKAN_CHECK_RESULTS + MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS +endif + +ifdef LLAMA_VULKAN_DEBUG + MK_CPPFLAGS += -DGGML_VULKAN_DEBUG +endif + +ifdef LLAMA_VULKAN_VALIDATE + MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE +endif + +ifdef LLAMA_VULKAN_RUN_TESTS + MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS +endif + +ggml-vulkan.o: ggml-vulkan.cpp ggml-vulkan.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif # LLAMA_VULKAN + +ifdef LLAMA_HIPBLAS + ifeq ($(wildcard /opt/rocm),) + ROCM_PATH ?= /usr + GPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) + else + ROCM_PATH ?= /opt/rocm + GPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) + endif + HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc + LLAMA_CUDA_DMMV_X ?= 32 + LLAMA_CUDA_MMV_Y ?= 1 + LLAMA_CUDA_KQUANTS_ITER ?= 2 + MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA +ifdef LLAMA_HIP_UMA + MK_CPPFLAGS += -DGGML_HIP_UMA +endif # LLAMA_HIP_UMA + MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib + MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas + HIPFLAGS += $(addprefix --offload-arch=,$(GPU_TARGETS)) + HIPFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) + HIPFLAGS += -DGGML_CUDA_MMV_Y=$(LLAMA_CUDA_MMV_Y) + HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) +ifdef LLAMA_CUDA_FORCE_DMMV + HIPFLAGS += -DGGML_CUDA_FORCE_DMMV +endif # LLAMA_CUDA_FORCE_DMMV +ifdef LLAMA_CUDA_NO_PEER_COPY + HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY +endif # LLAMA_CUDA_NO_PEER_COPY + OBJS += ggml-cuda.o + OBJS += $(patsubst %.cu,%.o,$(wildcard ggml-cuda/*.cu)) + +ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh) + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< + +ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/common.cuh + $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< + +endif # LLAMA_HIPBLAS + +ifdef LLAMA_METAL + MK_CPPFLAGS += -DGGML_USE_METAL + MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit + OBJS += ggml-metal.o +ifdef LLAMA_METAL_NDEBUG + MK_CPPFLAGS += -DGGML_METAL_NDEBUG +endif +ifdef LLAMA_METAL_EMBED_LIBRARY + MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY + OBJS += ggml-metal-embed.o +endif +endif # LLAMA_METAL + +ifdef LLAMA_METAL +ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h + $(CC) $(CFLAGS) -c $< -o $@ + +ifdef LLAMA_METAL_EMBED_LIBRARY +ggml-metal-embed.o: ggml-metal.metal ggml-common.h + @echo "Embedding Metal library" + @sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal + $(eval TEMP_ASSEMBLY=$(shell mktemp)) + @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY) + @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY) + @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY) + @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY) + @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY) + @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY) + @$(AS) $(TEMP_ASSEMBLY) -o $@ + @rm -f ${TEMP_ASSEMBLY} +endif +endif # LLAMA_METAL + +ifdef LLAMA_MPI +ggml-mpi.o: ggml-mpi.c ggml-mpi.h + $(CC) $(CFLAGS) -c $< -o $@ +endif # LLAMA_MPI + +ifndef LLAMA_NO_LLAMAFILE +sgemm.o: sgemm.cpp sgemm.h ggml.h + $(CXX) $(CXXFLAGS) -c $< -o $@ +endif + +GF_CC := $(CC) +include scripts/get-flags.mk + +# combine build flags with cmdline overrides +override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) +override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) +BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) +override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) +override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) +override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) + +# identify CUDA host compiler +ifdef LLAMA_CUDA +GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler +include scripts/get-flags.mk +CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic +endif + +ifdef LLAMA_CURL +override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL +override LDFLAGS := $(LDFLAGS) -lcurl +endif + +# +# Print build information +# + +$(info I llama.cpp build info: ) +$(info I UNAME_S: $(UNAME_S)) +$(info I UNAME_P: $(UNAME_P)) +$(info I UNAME_M: $(UNAME_M)) +$(info I CFLAGS: $(CFLAGS)) +$(info I CXXFLAGS: $(CXXFLAGS)) +$(info I NVCCFLAGS: $(NVCCFLAGS)) +$(info I LDFLAGS: $(LDFLAGS)) +$(info I CC: $(shell $(CC) --version | head -n 1)) +$(info I CXX: $(shell $(CXX) --version | head -n 1)) +ifdef LLAMA_CUDA +$(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) +CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') +ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) +ifndef CUDA_DOCKER_ARCH +ifndef CUDA_POWER_ARCH +$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus ) +endif # CUDA_POWER_ARCH +endif # CUDA_DOCKER_ARCH +endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) +endif # LLAMA_CUDA +$(info ) + +ifdef LLAMA_CUBLAS +$(info !!!!) +$(info LLAMA_CUBLAS is deprecated and will be removed in the future. Use LLAMA_CUDA instead.) +$(info !!!!) +$(info ) +endif + +# +# Build library +# + +ggml.o: ggml.c ggml.h ggml-cuda.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h + $(CC) $(CFLAGS) -c $< -o $@ + +ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h + $(CC) $(CFLAGS) -c $< -o $@ + +unicode.o: unicode.cpp unicode.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +unicode-data.o: unicode-data.cpp unicode-data.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o + +llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +COMMON_H_DEPS = common/common.h common/sampling.h common/log.h llama.h +COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o + +common.o: common/common.cpp $(COMMON_H_DEPS) + $(CXX) $(CXXFLAGS) -c $< -o $@ + +sampling.o: common/sampling.cpp $(COMMON_H_DEPS) + $(CXX) $(CXXFLAGS) -c $< -o $@ + +console.o: common/console.cpp common/console.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +grammar-parser.o: common/grammar-parser.cpp common/grammar-parser.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +json-schema-to-grammar.o: common/json-schema-to-grammar.cpp common/json-schema-to-grammar.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +train.o: common/train.cpp common/train.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +ngram-cache.o: common/ngram-cache.cpp common/ngram-cache.h + $(CXX) $(CXXFLAGS) -c $< -o $@ + +libllama.so: llama.o ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) + +libllama.a: llama.o ggml.o $(OBJS) $(COMMON_DEPS) + ar rcs libllama.a llama.o ggml.o $(OBJS) $(COMMON_DEPS) + +clean: + rm -vrf *.o tests/*.o *.so *.a *.dll benchmark-matmult lookup-create lookup-merge lookup-stats common/build-info.cpp *.dot $(COV_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS) + rm -vrf ggml-cuda/*.o + find examples pocs -type f -name "*.o" -delete + +# +# Examples +# + +# $< is the first prerequisite, i.e. the source file. +# Explicitly compile this to an object file so that it can be cached with ccache. +# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. + +# Helper function that replaces .c, .cpp, and .cu file endings with .o: +GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) + +main: examples/main/main.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + @echo + @echo '==== Run ./main -h for help. ====' + @echo + +infill: examples/infill/infill.cpp ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +simple: examples/simple/simple.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tokenize: examples/tokenize/tokenize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +batched: examples/batched/batched.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.o ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +imatrix: examples/imatrix/imatrix.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +embedding: examples/embedding/embedding.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) + +# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`: +examples/server/%.hpp: examples/server/public/% Makefile + @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \ + echo "unsigned char $${NAME}[] = {" && \ + cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \ + echo "};" && \ + echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \ + ) > $@ + +gguf: examples/gguf/gguf.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +eval-callback: examples/eval-callback/eval-callback.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +llama-bench: examples/llama-bench/llama-bench.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +libllava.a: examples/llava/llava.cpp examples/llava/llava.h examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h common/base64.hpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual + +llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual + $(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp) + $(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS) + +baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +retrieval: examples/retrieval/retrieval.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +parallel: examples/parallel/parallel.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +lookahead: examples/lookahead/lookahead.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +lookup: examples/lookup/lookup.cpp ggml.o llama.o ngram-cache.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-create.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-create.cpp) -o lookup-create $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-merge.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-merge.cpp) -o lookup-merge $(LDFLAGS) + $(CXX) $(CXXFLAGS) -c examples/lookup/lookup-stats.cpp -o $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, examples/lookup/lookup-stats.cpp) -o lookup-stats $(LDFLAGS) + +passkey: examples/passkey/passkey.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +ifeq ($(UNAME_S),Darwin) +swift: examples/batched.swift + (cd examples/batched.swift; make build) +endif + +common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh + @sh scripts/build-info.sh "$(CC)" > $@.tmp + @if ! cmp -s $@.tmp $@; then \ + mv $@.tmp $@; \ + else \ + rm $@.tmp; \ + fi + +build-info.o: common/build-info.cpp + $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ + +# +# Tests +# + +tests: $(TEST_TARGETS) + +benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.o ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +run-benchmark-matmult: benchmark-matmult + ./$@ + +.PHONY: run-benchmark-matmult swift + +vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grammar-integration: tests/test-grammar-integration.cpp ggml.o llama.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp json-schema-to-grammar.o ggml.o llama.o grammar-parser.o $(OBJS) + $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-c.o: tests/test-c.c llama.h + $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ + +tests/test-backend-ops: tests/test-backend-ops.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) + +tests/test-chat-template: tests/test-chat-template.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) + $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) + $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) diff --git a/llama-cpp-python/vendor/llama.cpp/Package.swift b/llama-cpp-python/vendor/llama.cpp/Package.swift new file mode 100644 index 0000000000000000000000000000000000000000..183e647575b42802edbaa110ca27bbcfd54e721c --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/Package.swift @@ -0,0 +1,78 @@ +// swift-tools-version:5.5 + +import PackageDescription + +var sources = [ + "ggml.c", + "sgemm.cpp", + "llama.cpp", + "unicode.cpp", + "unicode-data.cpp", + "ggml-alloc.c", + "ggml-backend.c", + "ggml-quants.c", +] + +var resources: [Resource] = [] +var linkerSettings: [LinkerSetting] = [] +var cSettings: [CSetting] = [ + .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), + .unsafeFlags(["-fno-objc-arc"]), + // NOTE: NEW_LAPACK will required iOS version 16.4+ + // We should consider add this in the future when we drop support for iOS 14 + // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc) + // .define("ACCELERATE_NEW_LAPACK"), + // .define("ACCELERATE_LAPACK_ILP64") +] + +#if canImport(Darwin) +sources.append("ggml-metal.m") +resources.append(.process("ggml-metal.metal")) +linkerSettings.append(.linkedFramework("Accelerate")) +cSettings.append( + contentsOf: [ + .define("GGML_USE_ACCELERATE"), + .define("GGML_USE_METAL") + ] +) +#endif + +#if os(Linux) + cSettings.append(.define("_GNU_SOURCE")) +#endif + +let package = Package( + name: "llama", + platforms: [ + .macOS(.v12), + .iOS(.v14), + .watchOS(.v4), + .tvOS(.v14) + ], + products: [ + .library(name: "llama", targets: ["llama"]), + ], + targets: [ + .target( + name: "llama", + path: ".", + exclude: [ + "cmake", + "examples", + "scripts", + "models", + "tests", + "CMakeLists.txt", + "ggml-cuda.cu", + "ggml-cuda.h", + "Makefile" + ], + sources: sources, + resources: resources, + publicHeadersPath: "spm-headers", + cSettings: cSettings, + linkerSettings: linkerSettings + ) + ], + cxxLanguageStandard: .cxx11 +) diff --git a/llama-cpp-python/vendor/llama.cpp/README-sycl.md b/llama-cpp-python/vendor/llama.cpp/README-sycl.md new file mode 100644 index 0000000000000000000000000000000000000000..cfa248a95b5ffc52dfd870e9e5e1162b90f6aeae --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/README-sycl.md @@ -0,0 +1,568 @@ +# llama.cpp for SYCL + +- [Background](#background) +- [News](#news) +- [OS](#os) +- [Hardware](#hardware) +- [Docker](#docker) +- [Linux](#linux) +- [Windows](#windows) +- [Environment Variable](#environment-variable) +- [Known Issue](#known-issues) +- [Q&A](#qa) +- [TODO](#todo) + +## Background + +**SYCL** is a high-level parallel programming model designed to improve developers productivity writing code across various hardware accelerators such as CPUs, GPUs, and FPGAs. It is a single-source language designed for heterogeneous computing and based on standard C++17. + +**oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include: + +- **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers. +- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL - Math Kernel Library)*. +- **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs. +- **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets. + +### Llama.cpp + SYCL + +The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based on the cross-platform feature of SYCL, it could support other vendor GPUs: Nvidia GPU (*AMD GPU coming*). + +When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend. + +It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, CLBlast etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose. + +## News + +- 2024.4 + - Support data types: GGML_TYPE_IQ4_NL, GGML_TYPE_IQ4_XS, GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ3_S, GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M. + +- 2024.3 + - Release binary files of Windows. + - A blog is published: **Run LLM on all Intel GPUs Using llama.cpp**: [intel.com](https://www.intel.com/content/www/us/en/developer/articles/technical/run-llm-on-all-gpus-using-llama-cpp-artical.html) or [medium.com](https://medium.com/@jianyu_neo/run-llm-on-all-intel-gpus-using-llama-cpp-fd2e2dcbd9bd). + - New base line is ready: [tag b2437](https://github.com/ggerganov/llama.cpp/tree/b2437). + - Support multiple cards: **--split-mode**: [none|layer]; not support [row], it's on developing. + - Support to assign main GPU by **--main-gpu**, replace $GGML_SYCL_DEVICE. + - Support detecting all GPUs with level-zero and same top **Max compute units**. + - Support OPs + - hardsigmoid + - hardswish + - pool2d + +- 2024.1 + - Create SYCL backend for Intel GPU. + - Support Windows build + +## OS + +| OS | Status | Verified | +|---------|---------|------------------------------------| +| Linux | Support | Ubuntu 22.04, Fedora Silverblue 39 | +| Windows | Support | Windows 11 | + + +## Hardware + +### Intel GPU + +**Verified devices** + +| Intel GPU | Status | Verified Model | +|-------------------------------|---------|---------------------------------------| +| Intel Data Center Max Series | Support | Max 1550, 1100 | +| Intel Data Center Flex Series | Support | Flex 170 | +| Intel Arc Series | Support | Arc 770, 730M | +| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake | +| Intel iGPU | Support | iGPU in i5-1250P, i7-1260P, i7-1165G7 | + +*Notes:* + +- **Memory** + - The device memory is a limitation when running a large model. The loaded model size, *`llm_load_tensors: buffer_size`*, is displayed in the log when running `./bin/main`. + + - Please make sure the GPU shared memory from the host is large enough to account for the model's size. For e.g. the *llama-2-7b.Q4_0* requires at least 8.0GB for integrated GPU and 4.0GB for discrete GPU. + +- **Execution Unit (EU)** + - If the iGPU has less than 80 EUs, the inference speed will likely be too slow for practical use. + +### Other Vendor GPU + +**Verified devices** + +| Nvidia GPU | Status | Verified Model | +|--------------------------|---------|----------------| +| Ampere Series | Support | A100, A4000 | +| Ampere Series *(Mobile)* | Support | RTX 40 Series | + +## Docker +The docker build option is currently limited to *intel GPU* targets. + +### Build image +```sh +# Using FP16 +docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/main-intel.Dockerfile . +``` + +*Notes*: + +To build in default FP32 *(Slower than FP16 alternative)*, you can remove the `--build-arg="LLAMA_SYCL_F16=ON"` argument from the previous command. + +You can also use the `.devops/server-intel.Dockerfile`, which builds the *"server"* alternative. + +### Run container + +```sh +# First, find all the DRI cards +ls -la /dev/dri +# Then, pick the card that you want to use (here for e.g. /dev/dri/card1). +docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-sycl -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 +``` + +*Notes:* +- Docker has been tested successfully on native Linux. WSL support has not been verified yet. +- You may need to install Intel GPU driver on the **host** machine *(Please refer to the [Linux configuration](#linux) for details)*. + +## Linux + +### I. Setup Environment + +1. **Install GPU drivers** + + - **Intel GPU** + +Intel data center GPUs drivers installation guide and download page can be found here: [Get intel dGPU Drivers](https://dgpu-docs.intel.com/driver/installation.html#ubuntu-install-steps). + +*Note*: for client GPUs *(iGPU & Arc A-Series)*, please refer to the [client iGPU driver installation](https://dgpu-docs.intel.com/driver/client/overview.html). + +Once installed, add the user(s) to the `video` and `render` groups. + +```sh +sudo usermod -aG render $USER +sudo usermod -aG video $USER +``` + +*Note*: logout/re-login for the changes to take effect. + +Verify installation through `clinfo`: + +```sh +sudo apt install clinfo +sudo clinfo -l +``` + +Sample output: + +```sh +Platform #0: Intel(R) OpenCL Graphics + `-- Device #0: Intel(R) Arc(TM) A770 Graphics + +Platform #0: Intel(R) OpenCL HD Graphics + `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49] +``` + +- **Nvidia GPU** + +In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed. + +2. **Install Intel® oneAPI Base toolkit** + +- **For Intel GPU** + +The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page. + +Please follow the instructions for downloading and installing the Toolkit for Linux, and preferably keep the default installation values unchanged, notably the installation path *(`/opt/intel/oneapi` by default)*. + +Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable. + +Upon a successful installation, SYCL is enabled for the available intel devices, along with relevant libraries such as oneAPI MKL for intel GPUs. + +- **Adding support to Nvidia GPUs** + +**oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup. + + +**oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs. + +```sh +git clone https://github.com/oneapi-src/oneMKL +cd oneMKL +cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON -DTARGET_DOMAINS=blas +cmake --build buildWithCublas --config Release +``` + + +3. **Verify installation and environment** + +In order to check the available SYCL devices on the machine, please use the `sycl-ls` command. +```sh +source /opt/intel/oneapi/setvars.sh +sycl-ls +``` + +- **Intel GPU** + +When targeting an intel GPU, the user should expect one or more level-zero devices among the available SYCL devices. Please make sure that at least one GPU is present, for instance [`ext_oneapi_level_zero:gpu:0`] in the sample output below: + +``` +[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000] +[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] +[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50] +[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918] +``` + +- **Nvidia GPU** + +Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA device [`ext_oneapi_cuda:gpu`] as bellow: +``` +[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.12.0.12_195853.xmain-hotfix] +[opencl:cpu:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix] +[ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2] +``` + +### II. Build llama.cpp + +#### Intel GPU +```sh +# Export relevant ENV variables +source /opt/intel/oneapi/setvars.sh + +# Build LLAMA with MKL BLAS acceleration for intel GPU + +# Option 1: Use FP32 (recommended for better performance in most cases) +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + +# Option 2: Use FP16 +cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON + +# build all binary +cmake --build build --config Release -j -v +``` + +#### Nvidia GPU +```sh +# Export relevant ENV variables +export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH +export LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LIBRARY_PATH +export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_DIR +export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR + +# Build LLAMA with Nvidia BLAS acceleration through SYCL + +# Option 1: Use FP32 (recommended for better performance in most cases) +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + +# Option 2: Use FP16 +cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON + +# build all binary +cmake --build build --config Release -j -v + +``` + +### III. Run the inference + +1. Retrieve and prepare model + +You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example. + +2. Enable oneAPI running environment + +```sh +source /opt/intel/oneapi/setvars.sh +``` + +3. List devices information + +Similar to the native `sycl-ls`, available SYCL devices can be queried as follow: + +```sh +./build/bin/ls-sycl-device +``` +A example of such log in a system with 1 *intel CPU* and 1 *intel GPU* can look like the following: +``` +found 6 SYCL devices: +| | | |Compute |Max compute|Max work|Max sub| | +|ID| Device Type| Name|capability|units |group |group |Global mem size| +|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------| +| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136| +| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216| +| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136| +| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216| +| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616| +| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616| +``` + +| Attribute | Note | +|------------------------|-------------------------------------------------------------| +| compute capability 1.3 | Level-zero driver/runtime, recommended | +| compute capability 3.0 | OpenCL driver/runtime, slower than level-zero in most cases | + +4. Launch inference + +There are two device selection modes: + +- Single device: Use one device target specified by the user. +- Multiple devices: Automatically select the devices with the same largest Max compute-units. + +| Device selection | Parameter | +|------------------|----------------------------------------| +| Single device | --split-mode none --main-gpu DEVICE_ID | +| Multiple devices | --split-mode layer (default) | + +Examples: + +- Use device 0: + +```sh +ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0 +``` +or run by script: + +```sh +./examples/sycl/run_llama2.sh 0 +``` + +- Use multiple devices: + +```sh +ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer +``` + +Otherwise, you can run the script: + +```sh +./examples/sycl/run_llama2.sh +``` + +*Notes:* + +- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow: + +```sh +detect 1 SYCL GPUs: [0] with top Max compute units:512 +``` +Or +```sh +use 1 SYCL GPUs: [0] with Max compute units:512 +``` + +## Windows + +### I. Setup Environment + +1. Install GPU driver + +Intel GPU drivers instructions guide and download page can be found here: [Get intel GPU Drivers](https://www.intel.com/content/www/us/en/products/docs/discrete-gpus/arc/software/drivers.html). + +2. Install Visual Studio + +If you already have a recent version of Microsoft Visual Studio, you can skip this step. Otherwise, please refer to the official download page for [Microsoft Visual Studio](https://visualstudio.microsoft.com/). + +3. Install Intel® oneAPI Base toolkit + +The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page. + +Please follow the instructions for downloading and installing the Toolkit for Windows, and preferably keep the default installation values unchanged, notably the installation path *(`C:\Program Files (x86)\Intel\oneAPI` by default)*. + +Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable. + +b. Enable oneAPI running environment: + +- Type "oneAPI" in the search bar, then open the `Intel oneAPI command prompt for Intel 64 for Visual Studio 2022` App. + +- On the command prompt, enable the runtime environment with the following: +``` +"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 +``` + +c. Verify installation + +In the oneAPI command line, run the following to print the available SYCL devices: + +``` +sycl-ls +``` + +There should be one or more *level-zero* GPU devices displayed as **[ext_oneapi_level_zero:gpu]**. Below is example of such output detecting an *intel Iris Xe* GPU as a Level-zero SYCL device: + +Output (example): +``` +[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000] +[opencl:cpu:1] Intel(R) OpenCL, 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000] +[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Iris(R) Xe Graphics OpenCL 3.0 NEO [31.0.101.5186] +[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Iris(R) Xe Graphics 1.3 [1.3.28044] +``` + +4. Install build tools + +a. Download & install cmake for Windows: https://cmake.org/download/ + +b. Download & install mingw-w64 make for Windows provided by w64devkit + +- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip). + +- Extract `w64devkit` on your pc. + +- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`). + +### II. Build llama.cpp + +On the oneAPI command line window, step into the llama.cpp main directory and run the following: + +``` +@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force + +# Option 1: Use FP32 (recommended for better performance in most cases) +cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release + +# Option 2: Or FP16 +cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON + +cmake --build build --config Release -j +``` + +Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions: +```sh +.\examples\sycl\win-build-sycl.bat +``` + +*Notes:* + +- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make main`. + +### III. Run the inference + +1. Retrieve and prepare model + +You can refer to the general [*Prepare and Quantize*](README#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example. + +2. Enable oneAPI running environment + +On the oneAPI command line window, run the following and step into the llama.cpp directory: +``` +"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 +``` + +3. List devices information + +Similar to the native `sycl-ls`, available SYCL devices can be queried as follow: + +``` +build\bin\ls-sycl-device.exe +``` + +The output of this command in a system with 1 *intel CPU* and 1 *intel GPU* would look like the following: +``` +found 6 SYCL devices: +| | | |Compute |Max compute|Max work|Max sub| | +|ID| Device Type| Name|capability|units |group |group |Global mem size| +|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------| +| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136| +| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216| +| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136| +| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216| +| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616| +| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616| + +``` + +| Attribute | Note | +|------------------------|-----------------------------------------------------------| +| compute capability 1.3 | Level-zero running time, recommended | +| compute capability 3.0 | OpenCL running time, slower than level-zero in most cases | + + +4. Launch inference + +There are two device selection modes: + +- Single device: Use one device assigned by user. +- Multiple devices: Automatically choose the devices with the same biggest Max compute units. + +| Device selection | Parameter | +|------------------|----------------------------------------| +| Single device | --split-mode none --main-gpu DEVICE_ID | +| Multiple devices | --split-mode layer (default) | + +Examples: + +- Use device 0: + +``` +build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm none -mg 0 +``` + +- Use multiple devices: + +``` +build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer +``` +Otherwise, run the following wrapper script: + +``` +.\examples\sycl\win-run-llama2.bat +``` + +Note: + +- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow: + +```sh +detect 1 SYCL GPUs: [0] with top Max compute units:512 +``` +Or +```sh +use 1 SYCL GPUs: [0] with Max compute units:512 +``` + +## Environment Variable + +#### Build + +| Name | Value | Function | +|--------------------|-----------------------------------|---------------------------------------------| +| LLAMA_SYCL | ON (mandatory) | Enable build with SYCL code path. | +| LLAMA_SYCL_TARGET | INTEL *(default)* \| NVIDIA | Set the SYCL target device type. | +| LLAMA_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. | +| CMAKE_C_COMPILER | icx | Set *icx* compiler for SYCL code path. | +| CMAKE_CXX_COMPILER | icpx *(Linux)*, icx *(Windows)* | Set `icpx/icx` compiler for SYCL code path. | + +#### Runtime + +| Name | Value | Function | +|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------| +| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG | +| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.
Recommended to use when --split-mode = layer | + +## Known Issues + +- `Split-mode:[row]` is not supported. + +## Q&A + +- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`. + + - Potential cause: Unavailable oneAPI installation or not set ENV variables. + - Solution: Install *oneAPI base toolkit* and enable its ENV through: `source /opt/intel/oneapi/setvars.sh`. + +- General compiler error: + + - Remove **build** folder or try a clean-build. + +- I can **not** see `[ext_oneapi_level_zero:gpu]` afer installing the GPU driver on Linux. + + Please double-check with `sudo sycl-ls`. + + If it's present in the list, please add video/render group to your user then **logout/login** or restart your system: + + ``` + sudo usermod -aG render $USER + sudo usermod -aG video $USER + ``` + Otherwise, please double-check the GPU driver installation steps. + +### **GitHub contribution**: +Please add the **[SYCL]** prefix/tag in issues/PRs titles to help the SYCL-team check/address them without delay. + +## TODO + +- Support row layer split for multiple card runs. diff --git a/llama-cpp-python/vendor/llama.cpp/README.md b/llama-cpp-python/vendor/llama.cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..514ef3af15d5b96c2c7846b17108bffaeb6e67f8 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/README.md @@ -0,0 +1,1140 @@ +# llama.cpp + +![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) + +[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) + +[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) + +Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ + +### Recent API changes + +- [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807 +- [2024 Apr 4] State and session file functions reorganized under `llama_state_*` https://github.com/ggerganov/llama.cpp/pull/6341 +- [2024 Mar 26] Logits and embeddings API updated for compactness https://github.com/ggerganov/llama.cpp/pull/6122 +- [2024 Mar 13] Add `llama_synchronize()` + `llama_context_params.n_ubatch` https://github.com/ggerganov/llama.cpp/pull/6017 +- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_seq_max()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 +- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796 +- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849 + +### Hot topics + +- **BPE pre-tokenization support has been added: https://github.com/ggerganov/llama.cpp/pull/6920** +- MoE memory layout has been updated - reconvert models for `mmap` support and regenerate `imatrix` https://github.com/ggerganov/llama.cpp/pull/6387 +- Model sharding instructions using `gguf-split` https://github.com/ggerganov/llama.cpp/discussions/6404 +- Fix major bug in Metal batched inference https://github.com/ggerganov/llama.cpp/pull/6225 +- Multi-GPU pipeline parallelism support https://github.com/ggerganov/llama.cpp/pull/6017 +- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981 +- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962 +- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328 + +---- + +
+ Table of Contents +
    +
  1. + Description +
  2. +
  3. + Usage + +
  4. +
  5. Contributing
  6. +
  7. Coding guidelines
  8. +
  9. Docs
  10. +
+
+ +## Description + +The main goal of `llama.cpp` is to enable LLM inference with minimal setup and state-of-the-art performance on a wide +variety of hardware - locally and in the cloud. + +- Plain C/C++ implementation without any dependencies +- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks +- AVX, AVX2 and AVX512 support for x86 architectures +- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use +- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP) +- Vulkan, SYCL, and (partial) OpenCL backend support +- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity + +Since its [inception](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022), the project has +improved significantly thanks to many contributions. It is the main playground for developing new features for the +[ggml](https://github.com/ggerganov/ggml) library. + +**Supported platforms:** + +- [X] Mac OS +- [X] Linux +- [X] Windows (via CMake) +- [X] Docker +- [X] FreeBSD + +**Supported models:** + +Typically finetunes of the base models below are supported as well. + +- [X] LLaMA 🦙 +- [x] LLaMA 2 🦙🦙 +- [x] LLaMA 3 🦙🦙🦙 +- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) +- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) +- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct) +- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon) +- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) +- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) +- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) +- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) +- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) +- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) +- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim) +- [X] [Persimmon 8B](https://github.com/ggerganov/llama.cpp/pull/3410) +- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) +- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553) +- [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi) +- [X] [StableLM models](https://huggingface.co/stabilityai) +- [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) +- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) +- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) +- [x] [Phi models](https://huggingface.co/models?search=microsoft/phi) +- [x] [GPT-2](https://huggingface.co/gpt2) +- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118) +- [x] [InternLM2](https://huggingface.co/models?search=internlm2) +- [x] [CodeShell](https://github.com/WisdomShell/codeshell) +- [x] [Gemma](https://ai.google.dev/gemma) +- [x] [Mamba](https://github.com/state-spaces/mamba) +- [x] [Grok-1](https://huggingface.co/keyfan/grok-1-hf) +- [x] [Xverse](https://huggingface.co/models?search=xverse) +- [x] [Command-R models](https://huggingface.co/models?search=CohereForAI/c4ai-command-r) +- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion) +- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B) +- [x] [OLMo](https://allenai.org/olmo) + +(instructions for supporting more models: [HOWTO-add-model.md](./docs/HOWTO-add-model.md)) + +**Multimodal models:** + +- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e), [LLaVA 1.6 models](https://huggingface.co/collections/liuhaotian/llava-16-65b9e40155f60fd046a5ccf2) +- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava) +- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) +- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) +- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) +- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL) +- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) +- [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) + +**HTTP server** + +[llama.cpp web server](./examples/server) is a lightweight [OpenAI API](https://github.com/openai/openai-openapi) compatible HTTP server that can be used to serve local models and easily connect them to existing clients. + +**Bindings:** + +- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python) +- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) +- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp) +- JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp) +- JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm) +- Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama) +- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb) +- Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) +- Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) +- Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) +- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp) +- Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s) +- Clojure: [phronmophobic/llama.clj](https://github.com/phronmophobic/llama.clj) +- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn) +- Java: [kherud/java-llama.cpp](https://github.com/kherud/java-llama.cpp) +- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig) +- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart) +- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggerganov/llama.cpp/pull/6326) + +**UI:** + +Unless otherwise noted these projects are open-source with permissive licensing: + +- [iohub/collama](https://github.com/iohub/coLLaMA) +- [janhq/jan](https://github.com/janhq/jan) (AGPL) +- [nat/openplayground](https://github.com/nat/openplayground) +- [Faraday](https://faraday.dev/) (proprietary) +- [LMStudio](https://lmstudio.ai/) (proprietary) +- [LocalAI](https://github.com/mudler/LocalAI) (MIT) +- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL) +- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile) +- [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) +- [ollama/ollama](https://github.com/ollama/ollama) +- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) (AGPL) +- [psugihara/FreeChat](https://github.com/psugihara/FreeChat) +- [cztomsik/ava](https://github.com/cztomsik/ava) (MIT) +- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) +- [pythops/tenere](https://github.com/pythops/tenere) (AGPL) +- [RecurseChat](https://recurse.chat/) (proprietary) +- [semperai/amica](https://github.com/semperai/amica) +- [withcatai/catai](https://github.com/withcatai/catai) +- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT) +- [Msty](https://msty.app) (proprietary) +- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT) +- [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file)(Apachev2.0 or later) +- [Dot](https://github.com/alexpinel/Dot) (GPL) +- [MindMac](https://mindmac.app) (proprietary) +- [KodiBot](https://github.com/firatkiral/kodibot) (GPL) +- [eva](https://github.com/ylsdamxssjxxdd/eva) (MIT) +- [AI Sublime Text plugin](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (MIT) + +*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)* + +--- + +Here is a typical run using LLaMA v2 13B on M2 Ultra: + +``` +$ make -j && ./main -m models/llama-13b-v2/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e +I llama.cpp build info: +I UNAME_S: Darwin +I UNAME_P: arm +I UNAME_M: arm64 +I CFLAGS: -I. -O3 -std=c11 -fPIC -DNDEBUG -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -pthread -DGGML_USE_K_QUANTS -DGGML_USE_ACCELERATE +I CXXFLAGS: -I. -I./common -O3 -std=c++11 -fPIC -DNDEBUG -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar -pthread -DGGML_USE_K_QUANTS +I LDFLAGS: -framework Accelerate +I CC: Apple clang version 14.0.3 (clang-1403.0.22.14.1) +I CXX: Apple clang version 14.0.3 (clang-1403.0.22.14.1) + +make: Nothing to be done for `default'. +main: build = 1041 (cf658ad) +main: seed = 1692823051 +llama_model_loader: loaded meta data with 16 key-value pairs and 363 tensors from models/llama-13b-v2/ggml-model-q4_0.gguf (version GGUF V1 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_0: 281 tensors +llama_model_loader: - type q6_K: 1 tensors +llm_load_print_meta: format = GGUF V1 (latest) +llm_load_print_meta: arch = llama +llm_load_print_meta: vocab type = SPM +llm_load_print_meta: n_vocab = 32000 +llm_load_print_meta: n_merges = 0 +llm_load_print_meta: n_ctx_train = 4096 +llm_load_print_meta: n_ctx = 512 +llm_load_print_meta: n_embd = 5120 +llm_load_print_meta: n_head = 40 +llm_load_print_meta: n_head_kv = 40 +llm_load_print_meta: n_layer = 40 +llm_load_print_meta: n_rot = 128 +llm_load_print_meta: n_gqa = 1 +llm_load_print_meta: f_norm_eps = 1.0e-05 +llm_load_print_meta: f_norm_rms_eps = 1.0e-05 +llm_load_print_meta: n_ff = 13824 +llm_load_print_meta: freq_base = 10000.0 +llm_load_print_meta: freq_scale = 1 +llm_load_print_meta: model type = 13B +llm_load_print_meta: model ftype = mostly Q4_0 +llm_load_print_meta: model size = 13.02 B +llm_load_print_meta: general.name = LLaMA v2 +llm_load_print_meta: BOS token = 1 '' +llm_load_print_meta: EOS token = 2 '' +llm_load_print_meta: UNK token = 0 '' +llm_load_print_meta: LF token = 13 '<0x0A>' +llm_load_tensors: ggml ctx size = 0.11 MB +llm_load_tensors: mem required = 7024.01 MB (+ 400.00 MB per state) +................................................................................................... +llama_new_context_with_model: kv self size = 400.00 MB +llama_new_context_with_model: compute buffer total size = 75.41 MB + +system_info: n_threads = 16 / 24 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | +sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000 +generate: n_ctx = 512, n_batch = 512, n_predict = 400, n_keep = 0 + + + Building a website can be done in 10 simple steps: +Step 1: Find the right website platform. +Step 2: Choose your domain name and hosting plan. +Step 3: Design your website layout. +Step 4: Write your website content and add images. +Step 5: Install security features to protect your site from hackers or spammers +Step 6: Test your website on multiple browsers, mobile devices, operating systems etc… +Step 7: Test it again with people who are not related to you personally – friends or family members will work just fine! +Step 8: Start marketing and promoting the website via social media channels or paid ads +Step 9: Analyze how many visitors have come to your site so far, what type of people visit more often than others (e.g., men vs women) etc… +Step 10: Continue to improve upon all aspects mentioned above by following trends in web design and staying up-to-date on new technologies that can enhance user experience even further! +How does a Website Work? +A website works by having pages, which are made of HTML code. This code tells your computer how to display the content on each page you visit – whether it’s an image or text file (like PDFs). In order for someone else’s browser not only be able but also want those same results when accessing any given URL; some additional steps need taken by way of programming scripts that will add functionality such as making links clickable! +The most common type is called static HTML pages because they remain unchanged over time unless modified manually (either through editing files directly or using an interface such as WordPress). They are usually served up via HTTP protocols – this means anyone can access them without having any special privileges like being part of a group who is allowed into restricted areas online; however, there may still exist some limitations depending upon where one lives geographically speaking. +How to +llama_print_timings: load time = 576.45 ms +llama_print_timings: sample time = 283.10 ms / 400 runs ( 0.71 ms per token, 1412.91 tokens per second) +llama_print_timings: prompt eval time = 599.83 ms / 19 tokens ( 31.57 ms per token, 31.68 tokens per second) +llama_print_timings: eval time = 24513.59 ms / 399 runs ( 61.44 ms per token, 16.28 tokens per second) +llama_print_timings: total time = 25431.49 ms +``` + +And here is another demo of running both LLaMA-7B and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) on a single M1 Pro MacBook: + +https://user-images.githubusercontent.com/1991296/224442907-7693d4be-acaa-4e01-8b4f-add84093ffff.mp4 + +## Usage + +Here are the end-to-end binary build and model conversion steps for most supported models. + +### Get the Code + +```bash +git clone https://github.com/ggerganov/llama.cpp +cd llama.cpp +``` + +### Build + +In order to build llama.cpp you have three different options. + +- Using `make`: + - On Linux or MacOS: + + ```bash + make + ``` + + **Note**: for `Debug` builds, run `make LLAMA_DEBUG=1` + + - On Windows: + + 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). + 2. Extract `w64devkit` on your pc. + 3. Run `w64devkit.exe`. + 4. Use the `cd` command to reach the `llama.cpp` folder. + 5. From here you can run: + ```bash + make + ``` + +- Using `CMake`: + + ```bash + cmake -B build + cmake --build build --config Release + ``` + + **Note**: for `Debug` builds, there are two cases: + + - Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag): + + ```bash + cmake -B build -DCMAKE_BUILD_TYPE=Debug + cmake --build build + ``` + + - Multi-config generators (`-G` param set to Visual Studio, XCode...): + + ```bash + cmake -B build -G "Xcode" + cmake --build build --config Debug + ``` + +- Using `Zig` (version 0.11 or later): + + Building for optimization levels and CPU features can be accomplished using standard build arguments, for example AVX2, FMA, F16C, + it's also possible to cross compile for other operating systems and architectures: + + ```bash + zig build -Doptimize=ReleaseFast -Dtarget=x86_64-windows-gnu -Dcpu=x86_64+avx2+fma+f16c + ``` + + The `zig targets` command will give you valid options to use. + +- Using `gmake` (FreeBSD): + + 1. Install and activate [DRM in FreeBSD](https://wiki.freebsd.org/Graphics) + 2. Add your user to **video** group + 3. Install compilation dependencies. + + ```bash + sudo pkg install gmake automake autoconf pkgconf llvm15 clinfo clover \ + opencl clblast openblas + + gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j4 + ``` + + **Notes:** With this packages you can build llama.cpp with OPENBLAS and + CLBLAST support for use OpenCL GPU acceleration in FreeBSD. Please read + the instructions for use and activate this options in this document below. + +### Metal Build + +On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU. +To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option. + +When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line +argument. + +### MPI Build + +MPI lets you distribute the computation over a cluster of machines. Because of the serial nature of LLM prediction, this won't yield any end-to-end speed-ups, but it will let you run larger models than would otherwise fit into RAM on a single machine. + +First you will need MPI libraries installed on your system. The two most popular (only?) options are [MPICH](https://www.mpich.org) and [OpenMPI](https://www.open-mpi.org). Either can be installed with a package manager (`apt`, Homebrew, MacPorts, etc). + +Next you will need to build the project with `LLAMA_MPI` set to true on all machines; if you're building with `make`, you will also need to specify an MPI-capable compiler (when building with CMake, this is configured automatically): + +- Using `make`: + + ```bash + make CC=mpicc CXX=mpicxx LLAMA_MPI=1 + ``` + +- Using `CMake`: + + ```bash + cmake -S . -B build -DLLAMA_MPI=ON + ``` + +Once the programs are built, download/convert the weights on all of the machines in your cluster. The paths to the weights and programs should be identical on all machines. + +Next, ensure password-less SSH access to each machine from the primary host, and create a `hostfile` with a list of the hostnames and their relative "weights" (slots). If you want to use localhost for computation, use its local subnet IP address rather than the loopback address or "localhost". + +Here is an example hostfile: + +``` +192.168.0.1:2 +malvolio.local:1 +``` + +The above will distribute the computation across 2 processes on the first host and 1 process on the second host. Each process will use roughly an equal amount of RAM. Try to keep these numbers small, as inter-process (intra-host) communication is expensive. + +Finally, you're ready to run a computation using `mpirun`: + +```bash +mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128 +``` + +### BLAS Build + +Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use: + +- #### Accelerate Framework: + + This is only available on Mac PCs and it's enabled by default. You can just build using the normal instructions. + +- #### OpenBLAS: + + This provides BLAS acceleration using only the CPU. Make sure to have OpenBLAS installed on your machine. + + - Using `make`: + - On Linux: + ```bash + make LLAMA_OPENBLAS=1 + ``` + + - On Windows: + + 1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases). + 2. Download the latest version of [OpenBLAS for Windows](https://github.com/xianyi/OpenBLAS/releases). + 3. Extract `w64devkit` on your pc. + 4. From the OpenBLAS zip that you just downloaded copy `libopenblas.a`, located inside the `lib` folder, inside `w64devkit\x86_64-w64-mingw32\lib`. + 5. From the same OpenBLAS zip copy the content of the `include` folder inside `w64devkit\x86_64-w64-mingw32\include`. + 6. Run `w64devkit.exe`. + 7. Use the `cd` command to reach the `llama.cpp` folder. + 8. From here you can run: + + ```bash + make LLAMA_OPENBLAS=1 + ``` + + - Using `CMake` on Linux: + + ```bash + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS + cmake --build build --config Release + ``` + +- #### BLIS + + Check [BLIS.md](docs/BLIS.md) for more information. + +- #### SYCL + SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators. + + llama.cpp based on SYCL is used to **support Intel GPU** (Data Center Max series, Flex series, Arc series, Built-in GPU and iGPU). + + For detailed info, please refer to [llama.cpp for SYCL](README-sycl.md). + +- #### Intel oneMKL + Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. Please note that this build config **does not support Intel GPU**. For Intel GPU support, please refer to [llama.cpp for SYCL](./README-sycl.md). + + - Using manual oneAPI installation: + By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps: + ```bash + source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation + cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON + cmake --build build --config Release + ``` + + - Using oneAPI docker image: + If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-basekit](https://hub.docker.com/r/intel/oneapi-basekit). Then, you can use the commands given above. + + Check [Optimizing and Running LLaMA2 on Intel® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information. + +- #### CUDA + + This provides GPU acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). + + For Jetson user, if you have Jetson Orin, you can try this: [Offical Support](https://www.jetson-ai-lab.com/tutorial_text-generation.html). If you are using an old model(nano/TX2), need some additional operations before compiling. + + - Using `make`: + ```bash + make LLAMA_CUDA=1 + ``` + - Using `CMake`: + + ```bash + cmake -B build -DLLAMA_CUDA=ON + cmake --build build --config Release + ``` + + The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance: + + | Option | Legal values | Default | Description | + |--------------------------------|------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | LLAMA_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. | + | LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. | + | LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. | + | LLAMA_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. | + | LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. | + | LLAMA_CUDA_PEER_MAX_BATCH_SIZE | Positive integer | 128 | Maximum batch size for which to enable peer access between multiple GPUs. Peer access requires either Linux or NVLink. When using NVLink enabling peer access for larger batch sizes is potentially beneficial. | + +- #### hipBLAS + + This provides BLAS acceleration on HIP-supported AMD GPUs. + Make sure to have ROCm installed. + You can download it from your Linux distro's package manager or from here: [ROCm Quick Start (Linux)](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html#rocm-install-quick). + + - Using `make`: + ```bash + make LLAMA_HIPBLAS=1 + ``` + - Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU): + ```bash + CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ \ + cmake -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \ + && cmake --build build --config Release -- -j 16 + ``` + On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON"`. + However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs). + + - Using `make` (example for target gfx1030, build with 16 CPU threads): + ```bash + make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gfx1030 + ``` + + - Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU): + ```bash + set PATH=%HIP_PATH%\bin;%PATH% + mkdir build + cd build + cmake -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release .. + cmake --build . + ``` + Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors) + Find your gpu version string by matching the most significant version information from `rocminfo | grep gfx | head -1 | awk '{print $2}'` with the list of processors, e.g. `gfx1035` maps to `gfx1030`. + + + The environment variable [`HIP_VISIBLE_DEVICES`](https://rocm.docs.amd.com/en/latest/understand/gpu_isolation.html#hip-visible-devices) can be used to specify which GPU(s) will be used. + If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3. + The following compilation options are also available to tweak performance (yes, they refer to CUDA, not HIP, because it uses the same code as the cuBLAS version above): + + | Option | Legal values | Default | Description | + |-------------------------|------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the HIP dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. | + | LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. | + | LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. | + +- #### CLBlast + + OpenCL acceleration is provided by the matrix multiplication kernels from the [CLBlast](https://github.com/CNugteren/CLBlast) project and custom kernels for ggml that can generate tokens on the GPU. + + You will need the [OpenCL SDK](https://github.com/KhronosGroup/OpenCL-SDK). + - For Ubuntu, Debian, and Fedora the packages `opencl-headers`, `ocl-icd` may be needed. + + - For Windows, a pre-built SDK is available on the [OpenCL Releases](https://github.com/KhronosGroup/OpenCL-SDK/releases) page. + + -
+ Installing the OpenCL SDK from source + + ```sh + git clone --recurse-submodules https://github.com/KhronosGroup/OpenCL-SDK.git + cd OpenCL-SDK + cmake -B build -DBUILD_DOCS=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DBUILD_TESTING=OFF \ + -DOPENCL_SDK_BUILD_SAMPLES=OFF \ + -DOPENCL_SDK_TEST_SAMPLES=OFF + cmake --build build + cmake --install build --prefix /some/path + ``` +
+ + ##### Installing CLBlast + + Pre-built CLBlast binaries may be found on the [CLBlast Releases](https://github.com/CNugteren/CLBlast/releases) page. For Unix variants, it may also be found in your operating system's packages. + + Linux packaging: + Fedora Linux: + ```bash + sudo dnf install clblast + ``` + + Alternatively, they may be built from source. + + -
+ Windows: + + ```cmd + set OPENCL_SDK_ROOT="C:/OpenCL-SDK-v2023.04.17-Win-x64" + git clone https://github.com/CNugteren/CLBlast.git + cd CLBlast + cmake -B build -DBUILD_SHARED_LIBS=OFF -DOVERRIDE_MSVC_FLAGS_TO_MT=OFF -DTUNERS=OFF -DOPENCL_ROOT=%OPENCL_SDK_ROOT% -G "Visual Studio 17 2022" -A x64 + cmake --build build --config Release + cmake --install build --prefix C:/CLBlast + ``` + + (note: `--config Release` at build time is the default and only relevant for Visual Studio builds - or multi-config Ninja builds) + + -
+ Unix: + + ```sh + git clone https://github.com/CNugteren/CLBlast.git + cd CLBlast + cmake -B build -DBUILD_SHARED_LIBS=OFF -DTUNERS=OFF + cmake --build build --config Release + cmake --install build --prefix /some/path + ``` + + Where `/some/path` is where the built library will be installed (default is `/usr/local`). +
+ + ##### Building Llama with CLBlast + + - Build with make: + ```sh + make LLAMA_CLBLAST=1 + ``` + - CMake (Unix): + ```sh + cmake -B build -DLLAMA_CLBLAST=ON -DCLBlast_DIR=/some/path + cmake --build build --config Release + ``` + - CMake (Windows): + ```cmd + set CL_BLAST_CMAKE_PKG="C:/CLBlast/lib/cmake/CLBlast" + git clone https://github.com/ggerganov/llama.cpp + cd llama.cpp + cmake -B build -DBUILD_SHARED_LIBS=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH=%CL_BLAST_CMAKE_PKG% -G "Visual Studio 17 2022" -A x64 + cmake --build build --config Release + cmake --install build --prefix C:/LlamaCPP + ``` + + ##### Running Llama with CLBlast + + The CLBlast build supports `--gpu-layers|-ngl` like the CUDA version does. + + To select the correct platform (driver) and device (GPU), you can use the environment variables `GGML_OPENCL_PLATFORM` and `GGML_OPENCL_DEVICE`. + The selection can be a number (starting from 0) or a text string to search: + + ```sh + GGML_OPENCL_PLATFORM=1 ./main ... + GGML_OPENCL_DEVICE=2 ./main ... + GGML_OPENCL_PLATFORM=Intel ./main ... + GGML_OPENCL_PLATFORM=AMD GGML_OPENCL_DEVICE=1 ./main ... + ``` + + The default behavior is to find the first GPU device, but when it is an integrated GPU on a laptop, for instance, the selectors are useful. + Using the variables it is possible to select a CPU-based driver as well, if so desired. + + You can get a list of platforms and devices from the `clinfo -l` command, etc. + +- #### Vulkan + + **With docker**: + + You don't need to install Vulkan SDK. It will be installed inside the container. + + ```sh + # Build the image + docker build -t llama-cpp-vulkan -f .devops/main-vulkan.Dockerfile . + + # Then, use it: + docker run -it --rm -v "$(pwd):/app:Z" --device /dev/dri/renderD128:/dev/dri/renderD128 --device /dev/dri/card1:/dev/dri/card1 llama-cpp-vulkan -m "/app/models/YOUR_MODEL_FILE" -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 + ``` + + **Without docker**: + + Firstly, you need to make sure you have installed [Vulkan SDK](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html) + + For example, on Ubuntu 22.04 (jammy), use the command below: + + ```bash + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - + wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + apt update -y + apt-get install -y vulkan-sdk + # To verify the installation, use the command below: + vulkaninfo + ``` + + Alternatively your package manager might be able to provide the appropiate libraries. For example for Ubuntu 22.04 you can install `libvulkan-dev` instead. + + Then, build llama.cpp using the cmake command below: + + ```bash + cmake -B build -DLLAMA_VULKAN=1 + cmake --build build --config Release + # Test the output binary (with "-ngl 33" to offload all layers to GPU) + ./bin/main -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4 + + # You should see in the output, ggml_vulkan detected your GPU. For example: + # ggml_vulkan: Using Intel(R) Graphics (ADL GT2) | uma: 1 | fp16: 1 | warp size: 32 + ``` + +### Prepare and Quantize + +To obtain the official LLaMA 2 weights please see the Obtaining and using the Facebook LLaMA 2 model section. There is also a large selection of pre-quantized `gguf` models available on Hugging Face. + +```bash +# obtain the official LLaMA model weights and place them in ./models +ls ./models +llama-2-7b tokenizer_checklist.chk tokenizer.model +# [Optional] for models using BPE tokenizers +ls ./models + vocab.json +# [Optional] for PyTorch .bin models like Mistral-7B +ls ./models + + +# install Python dependencies +python3 -m pip install -r requirements.txt + +# convert the model to ggml FP16 format +python3 convert.py models/mymodel/ + +# [Optional] for models using BPE tokenizers +python convert.py models/mymodel/ --vocab-type bpe + +# quantize the model to 4-bits (using Q4_K_M method) +./quantize ./models/mymodel/ggml-model-f16.gguf ./models/mymodel/ggml-model-Q4_K_M.gguf Q4_K_M + +# update the gguf filetype to current version if older version is now unsupported +./quantize ./models/mymodel/ggml-model-Q4_K_M.gguf ./models/mymodel/ggml-model-Q4_K_M-v2.gguf COPY +``` + +### Run the quantized model + +```bash +# start inference on a gguf model +./main -m ./models/mymodel/ggml-model-Q4_K_M.gguf -n 128 +``` + +When running the larger models, make sure you have enough disk space to store all the intermediate files. + +### Running on Windows with prebuilt binaries + +You will find prebuilt Windows binaries on the release page. + +Simply download and extract the latest zip package of choice: (e.g. `llama-b1380-bin-win-avx2-x64.zip`) + +From the unzipped folder, open a terminal/cmd window here and place a pre-converted `.gguf` model file. Test out the main example like so: + +``` +.\main -m llama-2-7b.Q4_0.gguf -n 128 +``` + +### Memory/Disk Requirements + +As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same. + +| Model | Original size | Quantized size (Q4_0) | +|------:|--------------:|----------------------:| +| 7B | 13 GB | 3.9 GB | +| 13B | 24 GB | 7.8 GB | +| 30B | 60 GB | 19.5 GB | +| 65B | 120 GB | 38.5 GB | + +### Quantization + +Several quantization methods are supported. They differ in the resulting model disk size and inference speed. + +*(outdated)* + +| Model | Measure | F16 | Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 | +|------:|--------------|-------:|-------:|-------:|-------:|-------:|-------:| +| 7B | perplexity | 5.9066 | 6.1565 | 6.0912 | 5.9862 | 5.9481 | 5.9070 | +| 7B | file size | 13.0G | 3.5G | 3.9G | 4.3G | 4.7G | 6.7G | +| 7B | ms/tok @ 4th | 127 | 55 | 54 | 76 | 83 | 72 | +| 7B | ms/tok @ 8th | 122 | 43 | 45 | 52 | 56 | 67 | +| 7B | bits/weight | 16.0 | 4.5 | 5.0 | 5.5 | 6.0 | 8.5 | +| 13B | perplexity | 5.2543 | 5.3860 | 5.3608 | 5.2856 | 5.2706 | 5.2548 | +| 13B | file size | 25.0G | 6.8G | 7.6G | 8.3G | 9.1G | 13G | +| 13B | ms/tok @ 4th | - | 103 | 105 | 148 | 160 | 131 | +| 13B | ms/tok @ 8th | - | 73 | 82 | 98 | 105 | 128 | +| 13B | bits/weight | 16.0 | 4.5 | 5.0 | 5.5 | 6.0 | 8.5 | + +- [k-quants](https://github.com/ggerganov/llama.cpp/pull/1684) +- recent k-quants improvements and new i-quants + - [#2707](https://github.com/ggerganov/llama.cpp/pull/2707) + - [#2807](https://github.com/ggerganov/llama.cpp/pull/2807) + - [#4773 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4773) + - [#4856 - 2-bit i-quants (inference)](https://github.com/ggerganov/llama.cpp/pull/4856) + - [#4861 - importance matrix](https://github.com/ggerganov/llama.cpp/pull/4861) + - [#4872 - MoE models](https://github.com/ggerganov/llama.cpp/pull/4872) + - [#4897 - 2-bit quantization](https://github.com/ggerganov/llama.cpp/pull/4897) + - [#4930 - imatrix for all k-quants](https://github.com/ggerganov/llama.cpp/pull/4930) + - [#4951 - imatrix on the GPU](https://github.com/ggerganov/llama.cpp/pull/4957) + - [#4969 - imatrix for legacy quants](https://github.com/ggerganov/llama.cpp/pull/4969) + - [#4996 - k-qunats tuning](https://github.com/ggerganov/llama.cpp/pull/4996) + - [#5060 - Q3_K_XS](https://github.com/ggerganov/llama.cpp/pull/5060) + - [#5196 - 3-bit i-quants](https://github.com/ggerganov/llama.cpp/pull/5196) + - [quantization tuning](https://github.com/ggerganov/llama.cpp/pull/5320), [another one](https://github.com/ggerganov/llama.cpp/pull/5334), and [another one](https://github.com/ggerganov/llama.cpp/pull/5361) + +### Perplexity (measuring model quality) + +You can use the `perplexity` example to measure perplexity over a given prompt (lower perplexity is better). +For more information, see [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity). + +The perplexity measurements in table above are done against the `wikitext2` test dataset (https://paperswithcode.com/dataset/wikitext-2), with context length of 512. +The time per token is measured on a MacBook M1 Pro 32GB RAM using 4 and 8 threads. + +#### How to run + +1. Download/extract: https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip +2. Run `./perplexity -m models/7B/ggml-model-q4_0.gguf -f wiki.test.raw` +3. Output: +``` +perplexity : calculating perplexity over 655 chunks +24.43 seconds per pass - ETA 4.45 hours +[1]4.5970,[2]5.1807,[3]6.0382,... +``` +And after 4.45 hours, you will have the final perplexity. + +### Interactive mode + +If you want a more ChatGPT-like experience, you can run in interactive mode by passing `-i` as a parameter. +In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes LLaMA emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`. + +Here is an example of a few-shot interaction, invoked with the command + +```bash +# default arguments using a 7B model +./examples/chat.sh + +# advanced chat with a 13B model +./examples/chat-13B.sh + +# custom arguments using a 13B model +./main -m ./models/13B/ggml-model-q4_0.gguf -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt +``` + +Note the use of `--color` to distinguish between user input and generated text. Other parameters are explained in more detail in the [README](examples/main/README.md) for the `main` example program. + +![image](https://user-images.githubusercontent.com/1991296/224575029-2af3c7dc-5a65-4f64-a6bb-517a532aea38.png) + +### Persistent Interaction + +The prompt, user inputs, and model generations can be saved and resumed across calls to `./main` by leveraging `--prompt-cache` and `--prompt-cache-all`. The `./examples/chat-persistent.sh` script demonstrates this with support for long-running, resumable chat sessions. To use this example, you must provide a file to cache the initial chat prompt and a directory to save the chat session, and may optionally provide the same variables as `chat-13B.sh`. The same prompt cache can be reused for new chat sessions. Note that both prompt cache and chat directory are tied to the initial prompt (`PROMPT_TEMPLATE`) and the model file. + +```bash +# Start a new chat +PROMPT_CACHE_FILE=chat.prompt.bin CHAT_SAVE_DIR=./chat/default ./examples/chat-persistent.sh + +# Resume that chat +PROMPT_CACHE_FILE=chat.prompt.bin CHAT_SAVE_DIR=./chat/default ./examples/chat-persistent.sh + +# Start a different chat with the same prompt/model +PROMPT_CACHE_FILE=chat.prompt.bin CHAT_SAVE_DIR=./chat/another ./examples/chat-persistent.sh + +# Different prompt cache for different prompt/model +PROMPT_TEMPLATE=./prompts/chat-with-bob.txt PROMPT_CACHE_FILE=bob.prompt.bin \ + CHAT_SAVE_DIR=./chat/bob ./examples/chat-persistent.sh +``` + +### Constrained output with grammars + +`llama.cpp` supports grammars to constrain model output. For example, you can force the model to output JSON only: + +```bash +./main -m ./models/13B/ggml-model-q4_0.gguf -n 256 --grammar-file grammars/json.gbnf -p 'Request: schedule a call at 8pm; Command:' +``` + +The `grammars/` folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](./grammars/README.md). + +For authoring more complex JSON grammars, you can also check out https://grammar.intrinsiclabs.ai/, a browser app that lets you write TypeScript interfaces which it compiles to GBNF grammars that you can save for local use. Note that the app is built and maintained by members of the community, please file any issues or FRs on [its repo](http://github.com/intrinsiclabsai/gbnfgen) and not this one. + +### Instruct mode + +1. First, download and place the `ggml` model into the `./models` folder +2. Run the `main` tool like this: + +``` +./examples/alpaca.sh +``` + +Sample run: + +``` +== Running in interactive mode. == + - Press Ctrl+C to interject at any time. + - Press Return to return control to LLaMA. + - If you want to submit another line, end your input in '\'. + + Below is an instruction that describes a task. Write a response that appropriately completes the request. + +> How many letters are there in the English alphabet? +There 26 letters in the English Alphabet +> What is the most common way of transportation in Amsterdam? +The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis +> List 5 words that start with "ca". +cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. +> +``` + +### Obtaining and using the Facebook LLaMA 2 model + +- Refer to [Facebook's LLaMA download page](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) if you want to access the model data. +- Alternatively, if you want to save time and space, you can download already converted and quantized models from [TheBloke](https://huggingface.co/TheBloke), including: + - [LLaMA 2 7B base](https://huggingface.co/TheBloke/Llama-2-7B-GGUF) + - [LLaMA 2 13B base](https://huggingface.co/TheBloke/Llama-2-13B-GGUF) + - [LLaMA 2 70B base](https://huggingface.co/TheBloke/Llama-2-70B-GGUF) + - [LLaMA 2 7B chat](https://huggingface.co/TheBloke/Llama-2-7B-chat-GGUF) + - [LLaMA 2 13B chat](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF) + - [LLaMA 2 70B chat](https://huggingface.co/TheBloke/Llama-2-70B-chat-GGUF) + +### Seminal papers and background on the models + +If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: +- LLaMA: + - [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) + - [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) +- GPT-3 + - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) +- GPT-3.5 / InstructGPT / ChatGPT: + - [Aligning language models to follow instructions](https://openai.com/research/instruction-following) + - [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) + +### Android + +#### Building the Project using Android NDK +You can easily run `llama.cpp` on Android device with [termux](https://termux.dev/). + +First, install the essential packages for termux: +``` +pkg install clang wget git cmake +``` +Second, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake: + +You can execute the following commands on your computer to avoid downloading the NDK to your mobile. Of course, you can also do this in Termux. + +``` +$ mkdir build-android +$ cd build-android +$ export NDK= +$ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. +$ make +``` +Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card. +Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission: + +(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`) +``` +$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/ +$cd /data/data/com.termux/files/home/bin +$chmod +x ./* +``` + +Download model [llama-2-7b-chat.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf), and push it to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/` + +``` +$mv /sdcard/llama.cpp/llama-2-7b-chat.Q4_K_M.gguf /data/data/com.termux/files/home/model/ +``` + +Now, you can start chatting: +``` +$cd /data/data/com.termux/files/home/bin +$./main -m ../model/llama-2-7b-chat.Q4_K_M.gguf -n 128 -cml +``` + +Here is a demo of an interactive session running on Pixel 5 phone: + +https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4 + +#### Building the Project using Termux (F-Droid) +Termux from F-Droid offers an alternative route to execute the project on an Android device. This method empowers you to construct the project right from within the terminal, negating the requirement for a rooted device or SD Card. + +Outlined below are the directives for installing the project using OpenBLAS and CLBlast. This combination is specifically designed to deliver peak performance on recent devices that feature a GPU. + +If you opt to utilize OpenBLAS, you'll need to install the corresponding package. +``` +apt install libopenblas +``` + +Subsequently, if you decide to incorporate CLBlast, you'll first need to install the requisite OpenCL packages: +``` +apt install ocl-icd opencl-headers opencl-clhpp clinfo +``` + +In order to compile CLBlast, you'll need to first clone the respective Git repository, which can be found at this URL: https://github.com/CNugteren/CLBlast. Alongside this, clone this repository into your home directory. Once this is done, navigate to the CLBlast folder and execute the commands detailed below: +``` +cmake . +make +cp libclblast.so* $PREFIX/lib +cp ./include/clblast.h ../llama.cpp +``` + +Following the previous steps, navigate to the LlamaCpp directory. To compile it with OpenBLAS and CLBlast, execute the command provided below: +``` +cp /data/data/com.termux/files/usr/include/openblas/cblas.h . +cp /data/data/com.termux/files/usr/include/openblas/openblas_config.h . +make LLAMA_CLBLAST=1 //(sometimes you need to run this command twice) +``` + +Upon completion of the aforementioned steps, you will have successfully compiled the project. To run it using CLBlast, a slight adjustment is required: a command must be issued to direct the operations towards your device's physical GPU, rather than the virtual one. The necessary command is detailed below: +``` +GGML_OPENCL_PLATFORM=0 +GGML_OPENCL_DEVICE=0 +export LD_LIBRARY_PATH=/vendor/lib64:$LD_LIBRARY_PATH +``` + +(Note: some Android devices, like the Zenfone 8, need the following command instead - "export LD_LIBRARY_PATH=/system/vendor/lib64:$LD_LIBRARY_PATH". Source: https://www.reddit.com/r/termux/comments/kc3ynp/opencl_working_in_termux_more_in_comments/ ) + +For easy and swift re-execution, consider documenting this final part in a .sh script file. This will enable you to rerun the process with minimal hassle. + +Place your desired model into the `~/llama.cpp/models/` directory and execute the `./main (...)` script. + +### Docker + +#### Prerequisites +* Docker must be installed and running on your system. +* Create a folder to store big models & intermediate files (ex. /llama/models) + +#### Images +We have three Docker images available for this project: + +1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. (platforms: `linux/amd64`, `linux/arm64`) +2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. (platforms: `linux/amd64`, `linux/arm64`) +3. `ghcr.io/ggerganov/llama.cpp:server`: This image only includes the server executable file. (platforms: `linux/amd64`, `linux/arm64`) + +Additionally, there the following images, similar to the above: + +- `ghcr.io/ggerganov/llama.cpp:full-cuda`: Same as `full` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggerganov/llama.cpp:light-cuda`: Same as `light` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggerganov/llama.cpp:server-cuda`: Same as `server` but compiled with CUDA support. (platforms: `linux/amd64`) +- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) +- `ghcr.io/ggerganov/llama.cpp:server-rocm`: Same as `server` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`) + +The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now). + +#### Usage + +The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image. + +Replace `/path/to/models` below with the actual path where you downloaded the models. + +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B +``` + +On completion, you are ready to play! + +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 +``` + +or with a light image: + +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 +``` + +or with a server image: + +```bash +docker run -v /path/to/models:/models -p 8000:8000 ghcr.io/ggerganov/llama.cpp:server -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 +``` + +### Docker With CUDA + +Assuming one has the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit) properly installed on Linux, or is using a GPU enabled cloud, `cuBLAS` should be accessible inside the container. + +#### Building Locally + +```bash +docker build -t local/llama.cpp:full-cuda -f .devops/full-cuda.Dockerfile . +docker build -t local/llama.cpp:light-cuda -f .devops/main-cuda.Dockerfile . +docker build -t local/llama.cpp:server-cuda -f .devops/server-cuda.Dockerfile . +``` + +You may want to pass in some different `ARGS`, depending on the CUDA environment supported by your container host, as well as the GPU architecture. + +The defaults are: + +- `CUDA_VERSION` set to `11.7.1` +- `CUDA_DOCKER_ARCH` set to `all` + +The resulting images, are essentially the same as the non-CUDA images: + +1. `local/llama.cpp:full-cuda`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. +2. `local/llama.cpp:light-cuda`: This image only includes the main executable file. +3. `local/llama.cpp:server-cuda`: This image only includes the server executable file. + +#### Usage + +After building locally, Usage is similar to the non-CUDA examples, but you'll need to add the `--gpus` flag. You will also want to use the `--n-gpu-layers` flag. + +```bash +docker run --gpus all -v /path/to/models:/models local/llama.cpp:full-cuda --run -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 +docker run --gpus all -v /path/to/models:/models local/llama.cpp:light-cuda -m /models/7B/ggml-model-q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 512 --n-gpu-layers 1 +docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m /models/7B/ggml-model-q4_0.gguf --port 8000 --host 0.0.0.0 -n 512 --n-gpu-layers 1 +``` + +### Contributing + +- Contributors can open PRs +- Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch +- Collaborators will be invited based on contributions +- Any help with managing issues and PRs is very appreciated! +- Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205) +- A bit of backstory for those who are interested: [Changelog podcast](https://changelog.com/podcast/532) + +### Coding guidelines + +- Avoid adding third-party dependencies, extra files, extra headers, etc. +- Always consider cross-compatibility with other operating systems and architectures +- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple +- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit +- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` +- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions +- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices +- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ + +![matmul](media/matmul.png) + +### Docs + +- [main](./examples/main/README.md) +- [server](./examples/server/README.md) +- [jeopardy](./examples/jeopardy/README.md) +- [BLIS](./docs/BLIS.md) +- [Performance troubleshooting](./docs/token_generation_performance_tips.md) +- [GGML tips & tricks](https://github.com/ggerganov/llama.cpp/wiki/GGML-Tips-&-Tricks) +- [GBNF grammars](./grammars/README.md) diff --git a/llama-cpp-python/vendor/llama.cpp/SECURITY.md b/llama-cpp-python/vendor/llama.cpp/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..f4322c6ee4d18c3fb9f694fcc01f2be8640adb50 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/SECURITY.md @@ -0,0 +1,67 @@ +# Security Policy + + - [**Using llama.cpp securely**](#using-llamacpp-securely) + - [Untrusted models](#untrusted-models) + - [Untrusted inputs](#untrusted-inputs) + - [Data privacy](#data-privacy) + - [Untrusted environments or networks](#untrusted-environments-or-networks) + - [Multi-Tenant environments](#multi-tenant-environments) + - [**Reporting a vulnerability**](#reporting-a-vulnerability) + +## Using llama.cpp securely + +### Untrusted models +Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources. + +*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. + +> [!NOTE] +> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance. + +### Untrusted inputs + +Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks. + +For maximum security when handling untrusted inputs, you may need to employ the following: + +* Sandboxing: Isolate the environment where the inference happens. +* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics. +* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches. +* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as: + * Validation: Enforce strict rules on allowed characters and data types. + * Filtering: Remove potentially malicious scripts or code fragments. + * Encoding: Convert special characters into safe representations. + * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)). + +### Data privacy + +To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors. + +### Untrusted environments or networks + +If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions: +* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value +* Encrypt your data if sending it over the network. + +### Multi-Tenant environments + +If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks. + +1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity. + +2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring. + +3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk. + +4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time. + +## Reporting a vulnerability + +Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++. + + +However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. + +Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new). + +A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/llama-cpp-python/vendor/llama.cpp/build.zig b/llama-cpp-python/vendor/llama.cpp/build.zig new file mode 100644 index 0000000000000000000000000000000000000000..96783574fe7406795ba14a2ce3f4471f4291f560 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/build.zig @@ -0,0 +1,172 @@ +// Compatible with Zig Version 0.11.0 +const std = @import("std"); +const ArrayList = std.ArrayList; +const Compile = std.Build.Step.Compile; +const ConfigHeader = std.Build.Step.ConfigHeader; +const Mode = std.builtin.Mode; +const CrossTarget = std.zig.CrossTarget; + +const Maker = struct { + builder: *std.build.Builder, + target: CrossTarget, + optimize: Mode, + enable_lto: bool, + + include_dirs: ArrayList([]const u8), + cflags: ArrayList([]const u8), + cxxflags: ArrayList([]const u8), + objs: ArrayList(*Compile), + + fn addInclude(m: *Maker, dir: []const u8) !void { + try m.include_dirs.append(dir); + } + fn addProjectInclude(m: *Maker, path: []const []const u8) !void { + try m.addInclude(try m.builder.build_root.join(m.builder.allocator, path)); + } + fn addCFlag(m: *Maker, flag: []const u8) !void { + try m.cflags.append(flag); + } + fn addCxxFlag(m: *Maker, flag: []const u8) !void { + try m.cxxflags.append(flag); + } + fn addFlag(m: *Maker, flag: []const u8) !void { + try m.addCFlag(flag); + try m.addCxxFlag(flag); + } + + fn init(builder: *std.build.Builder) !Maker { + const target = builder.standardTargetOptions(.{}); + const zig_version = @import("builtin").zig_version_string; + const commit_hash = try std.ChildProcess.exec( + .{ .allocator = builder.allocator, .argv = &.{ "git", "rev-parse", "HEAD" } }, + ); + try std.fs.cwd().writeFile("common/build-info.cpp", builder.fmt( + \\int LLAMA_BUILD_NUMBER = {}; + \\char const *LLAMA_COMMIT = "{s}"; + \\char const *LLAMA_COMPILER = "Zig {s}"; + \\char const *LLAMA_BUILD_TARGET = "{s}"; + \\ + , .{ 0, commit_hash.stdout[0 .. commit_hash.stdout.len - 1], zig_version, try target.allocDescription(builder.allocator) })); + var m = Maker{ + .builder = builder, + .target = target, + .optimize = builder.standardOptimizeOption(.{}), + .enable_lto = false, + .include_dirs = ArrayList([]const u8).init(builder.allocator), + .cflags = ArrayList([]const u8).init(builder.allocator), + .cxxflags = ArrayList([]const u8).init(builder.allocator), + .objs = ArrayList(*Compile).init(builder.allocator), + }; + + try m.addCFlag("-std=c11"); + try m.addCxxFlag("-std=c++11"); + try m.addProjectInclude(&.{}); + try m.addProjectInclude(&.{"common"}); + return m; + } + + fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile { + const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize }); + if (o.target.getAbi() != .msvc) + o.defineCMacro("_GNU_SOURCE", null); + + if (std.mem.endsWith(u8, src, ".c")) { + o.addCSourceFiles(&.{src}, m.cflags.items); + o.linkLibC(); + } else { + o.addCSourceFiles(&.{src}, m.cxxflags.items); + if (o.target.getAbi() == .msvc) { + o.linkLibC(); // need winsdk + crt + } else { + // linkLibCpp already add (libc++ + libunwind + libc) + o.linkLibCpp(); + } + } + for (m.include_dirs.items) |i| o.addIncludePath(.{ .path = i }); + o.want_lto = m.enable_lto; + return o; + } + + fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile { + const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize }); + e.addCSourceFiles(&.{src}, m.cxxflags.items); + for (deps) |d| e.addObject(d); + for (m.objs.items) |o| e.addObject(o); + for (m.include_dirs.items) |i| e.addIncludePath(.{ .path = i }); + + // https://github.com/ziglang/zig/issues/15448 + if (e.target.getAbi() == .msvc) { + e.linkLibC(); // need winsdk + crt + } else { + // linkLibCpp already add (libc++ + libunwind + libc) + e.linkLibCpp(); + } + m.builder.installArtifact(e); + e.want_lto = m.enable_lto; + return e; + } +}; + +pub fn build(b: *std.build.Builder) !void { + var make = try Maker.init(b); + make.enable_lto = b.option(bool, "lto", "Enable LTO optimization, (default: false)") orelse false; + + const ggml = make.obj("ggml", "ggml.c"); + const sgemm = make.obj("sgemm", "sgemm.cpp"); + const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c"); + const ggml_backend = make.obj("ggml-backend", "ggml-backend.c"); + const ggml_quants = make.obj("ggml-quants", "ggml-quants.c"); + const unicode = make.obj("unicode", "unicode.cpp"); + const unicode_data = make.obj("unicode-data", "unicode-data.cpp"); + const llama = make.obj("llama", "llama.cpp"); + const buildinfo = make.obj("common", "common/build-info.cpp"); + const common = make.obj("common", "common/common.cpp"); + const console = make.obj("console", "common/console.cpp"); + const sampling = make.obj("sampling", "common/sampling.cpp"); + const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp"); + const json_schema_to_grammar = make.obj("json-schema-to-grammar", "common/json-schema-to-grammar.cpp"); + const train = make.obj("train", "common/train.cpp"); + const clip = make.obj("clip", "examples/llava/clip.cpp"); + const llava = make.obj("llava", "examples/llava/llava.cpp"); + + _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, console, grammar_parser }); + _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo }); + _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo }); + _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo }); + _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train }); + _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train }); + + const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, grammar_parser, clip, llava }); + if (server.target.isWindows()) { + server.linkSystemLibrary("ws2_32"); + } + + const server_assets = [_][]const u8{ "index.html", "index.js", "completion.js", "json-schema-to-grammar.mjs" }; + for (server_assets) |asset| { + const input_path = b.fmt("examples/server/public/{s}", .{asset}); + const output_path = b.fmt("examples/server/{s}.hpp", .{asset}); + + // Portable equivalent of `b.addSystemCommand(&.{ "xxd", "-n", asset, "-i", input_path, output_path }) })`: + + const input = try std.fs.cwd().readFileAlloc(b.allocator, input_path, std.math.maxInt(usize)); + defer b.allocator.free(input); + + var buf = std.ArrayList(u8).init(b.allocator); + defer buf.deinit(); + + for (input) |byte| { + try std.fmt.format(buf.writer(), "0x{X:0>2}, ", .{byte}); + } + + var name = try std.mem.replaceOwned(u8, b.allocator, asset, "-", "_"); + defer b.allocator.free(name); + std.mem.replaceScalar(u8, name, '.', '_'); + + try std.fs.cwd().writeFile(output_path, b.fmt( + "unsigned char {s}[] = {{{s}}};\nunsigned int {s}_len = {d};\n", + .{ name, buf.items, name, input.len }, + )); + + std.debug.print("Dumped hex of \"{s}\" ({s}) to {s}\n", .{ input_path, name, output_path }); + } +} diff --git a/llama-cpp-python/vendor/llama.cpp/ci/README.md b/llama-cpp-python/vendor/llama.cpp/ci/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4064705190697a7da430e1b85863afe3c71ea123 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/ci/README.md @@ -0,0 +1,29 @@ +# CI + +In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework: + +https://github.com/ggml-org/ci + +It monitors the `master` branch for new commits and runs the +[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us +to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled +to cover various hardware architectures, including GPU and Apple Silicon instances. + +Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message. +Only the branches of this repo are monitored for this keyword. + +It is a good practice, before publishing changes to execute the full CI locally on your machine: + +```bash +mkdir tmp + +# CPU-only build +bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with CUDA support +GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + +# with SYCL support +source /opt/intel/oneapi/setvars.sh +GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +``` diff --git a/llama-cpp-python/vendor/llama.cpp/ci/run.sh b/llama-cpp-python/vendor/llama.cpp/ci/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..bf21b6b31c52d540df60f1a7f637c0c8a5f301d4 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/ci/run.sh @@ -0,0 +1,712 @@ +#/bin/bash +# +# sample usage: +# +# mkdir tmp +# +# # CPU-only build +# bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with CUDA support +# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# +# # with SYCL support +# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# + +if [ -z "$2" ]; then + echo "usage: $0 " + exit 1 +fi + +mkdir -p "$1" +mkdir -p "$2" + +OUT=$(realpath "$1") +MNT=$(realpath "$2") + +rm -f "$OUT/*.log" +rm -f "$OUT/*.exit" +rm -f "$OUT/*.md" + +sd=`dirname $0` +cd $sd/../ +SRC=`pwd` + +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" + +if [ ! -z ${GG_BUILD_METAL} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_METAL_SHADER_DEBUG=ON" +fi + +if [ ! -z ${GG_BUILD_CUDA} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_CUDA=1" +fi + +if [ ! -z ${GG_BUILD_SYCL} ]; then + if [ -z ${ONEAPI_ROOT} ]; then + echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:" + echo "source /opt/intel/oneapi/setvars.sh" + exit 1 + fi + + CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON" +fi +## helpers + +# download a file if it does not exist or if it is outdated +function gg_wget { + local out=$1 + local url=$2 + + local cwd=`pwd` + + mkdir -p $out + cd $out + + # should not re-download if file is the same + wget -nv -N $url + + cd $cwd +} + +function gg_printf { + printf -- "$@" >> $OUT/README.md +} + +function gg_run { + ci=$1 + + set -o pipefail + set -x + + gg_run_$ci | tee $OUT/$ci.log + cur=$? + echo "$cur" > $OUT/$ci.exit + + set +x + set +o pipefail + + gg_sum_$ci + + ret=$((ret | cur)) +} + +## ci + +# ctest_debug + +function gg_run_ctest_debug { + cd ${SRC} + + rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + + set +e +} + +function gg_sum_ctest_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# ctest_release + +function gg_run_ctest_release { + cd ${SRC} + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + + if [ -z ${GG_BUILD_LOW_PERF} ]; then + (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log + else + (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log + fi + + set +e +} + +function gg_sum_ctest_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# test_scripts_debug + +function gg_run_test_scripts_debug { + cd ${SRC} + + set -e + + # TODO: too slow, run on dedicated node + #(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + #(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# test_scripts_release + +function gg_run_test_scripts_release { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +function gg_get_model { + local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf" + local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" + if [[ -s $gguf_3b ]]; then + echo -n "$gguf_3b" + elif [[ -s $gguf_7b ]]; then + echo -n "$gguf_7b" + else + echo >&2 "No model found. Can't run gg_run_ctest_with_model." + exit 1 + fi +} + +function gg_run_ctest_with_model_debug { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-debug + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_run_ctest_with_model_release { + cd ${SRC} + + local model; model=$(gg_get_model) + cd build-ci-release + set -e + (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log + set +e + cd .. +} + +function gg_sum_ctest_with_model_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +function gg_sum_ctest_with_model_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs ctest with model files in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" + gg_printf '```\n' +} + +# open_llama_3b_v2 + +function gg_run_open_llama_3b_v2 { + cd ${SRC} + + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin + gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw + + path_models="../models-mnt/open-llama/3B-v2" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_QKK_64=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert.py ${path_models} + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test_60="${path_wiki}/wiki.test-60.raw" + + ./bin/quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/quantize ${model_f16} ${model_q2_k} q2_k + ./bin/quantize ${model_f16} ${model_q3_k} q3_k + ./bin/quantize ${model_f16} ${model_q4_k} q4_k + ./bin/quantize ${model_f16} ${model_q5_k} q5_k + ./bin/quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/main --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/main --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/save-load-state -fa --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + # lora + function compare_ppl { + qnt="$1" + ppl1=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + ppl2=$(echo "$3" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl1 < $ppl2" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: %s > %s)\n' "$qnt" "$ppl" "$ppl1" "$ppl2" + return 20 + fi + + printf ' - %s @ %s %s OK\n' "$qnt" "$ppl1" "$ppl2" + return 0 + } + + path_lora="../models-mnt/open-llama/3B-v2/lora" + path_shakespeare="../models-mnt/shakespeare" + + shakespeare="${path_shakespeare}/shakespeare.txt" + lora_shakespeare="${path_lora}/ggml-adapter-model.bin" + + gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json + gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin + gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt + + python3 ../convert-lora-to-ggml.py ${path_lora} + + # f16 + (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log + (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log + compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + # q8_0 + (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log + (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log + compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + # q8_0 + f16 lora-base + (time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log + compare_ppl "q8_0 / f16 base shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + set +e +} + +function gg_sum_open_llama_3b_v2 { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'OpenLLaMA 3B-v2:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- lora:\n%s\n' "$(cat $OUT/${ci}-lora-ppl.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" + gg_printf '- shakespeare (f16):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-f16.log)" + gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log)" + gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)" + gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)" + gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)" +} + +# open_llama_7b_v2 +# requires: GG_BUILD_CUDA + +function gg_run_open_llama_7b_v2 { + cd ${SRC} + + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin + gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json + + gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip + unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ + + path_models="../models-mnt/open-llama/7B-v2" + path_wiki="../models-mnt/wikitext/wikitext-2-raw" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert.py ${path_models} + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + model_q4_0="${path_models}/ggml-model-q4_0.gguf" + model_q4_1="${path_models}/ggml-model-q4_1.gguf" + model_q5_0="${path_models}/ggml-model-q5_0.gguf" + model_q5_1="${path_models}/ggml-model-q5_1.gguf" + model_q2_k="${path_models}/ggml-model-q2_k.gguf" + model_q3_k="${path_models}/ggml-model-q3_k.gguf" + model_q4_k="${path_models}/ggml-model-q4_k.gguf" + model_q5_k="${path_models}/ggml-model-q5_k.gguf" + model_q6_k="${path_models}/ggml-model-q6_k.gguf" + + wiki_test="${path_wiki}/wiki.test.raw" + + ./bin/quantize ${model_f16} ${model_q8_0} q8_0 + ./bin/quantize ${model_f16} ${model_q4_0} q4_0 + ./bin/quantize ${model_f16} ${model_q4_1} q4_1 + ./bin/quantize ${model_f16} ${model_q5_0} q5_0 + ./bin/quantize ${model_f16} ${model_q5_1} q5_1 + ./bin/quantize ${model_f16} ${model_q2_k} q2_k + ./bin/quantize ${model_f16} ${model_q3_k} q3_k + ./bin/quantize ${model_f16} ${model_q4_k} q4_k + ./bin/quantize ${model_f16} ${model_q5_k} q5_k + ./bin/quantize ${model_f16} ${model_q6_k} q6_k + + (time ./bin/main --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/main --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/main --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/main --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/main --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/main --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/main --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/main --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/main --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/main --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/main --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log + (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log + (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log + (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log + (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log + (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log + (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log + (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log + (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log + + (time ./bin/imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log + + (time ./bin/save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + (time ./bin/save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log + + function check_ppl { + qnt="$1" + ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" + return 20 + fi + + printf ' - %s @ %s OK\n' "$qnt" "$ppl" + return 0 + } + + check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log + + cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log + + # lora + function compare_ppl { + qnt="$1" + ppl1=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + ppl2=$(echo "$3" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) + + if [ $(echo "$ppl1 < $ppl2" | bc) -eq 1 ]; then + printf ' - %s @ %s (FAIL: %s > %s)\n' "$qnt" "$ppl" "$ppl1" "$ppl2" + return 20 + fi + + printf ' - %s @ %s %s OK\n' "$qnt" "$ppl1" "$ppl2" + return 0 + } + + path_lora="../models-mnt/open-llama/7B-v2/lora" + path_shakespeare="../models-mnt/shakespeare" + + shakespeare="${path_shakespeare}/shakespeare.txt" + lora_shakespeare="${path_lora}/ggml-adapter-model.bin" + + gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_config.json + gg_wget ${path_lora} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_model.bin + gg_wget ${path_shakespeare} https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/shakespeare.txt + + python3 ../convert-lora-to-ggml.py ${path_lora} + + # f16 + (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log + (time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log + compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + # currently not supported by the CUDA backend + # q8_0 + #(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log + #(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log + #compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + # q8_0 + f16 lora-base + #(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log + #compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log + + set +e +} + +function gg_sum_open_llama_7b_v2 { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'OpenLLaMA 7B-v2:\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" + gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" + gg_printf '- lora:\n%s\n' "$(cat $OUT/${ci}-lora-ppl.log)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" + gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" + gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" + gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" + gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" + gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" + gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" + gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" + gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" + gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" + gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" + gg_printf '- shakespeare (f16):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-f16.log)" + gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log)" + #gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)" + #gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)" + #gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)" +} + +# bge-small + +function gg_run_embd_bge_small { + cd ${SRC} + + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json + gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json + + gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json + + path_models="../models-mnt/bge-small" + + rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release + + set -e + + (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log + (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log + + python3 ../convert-hf-to-gguf.py ${path_models} + + model_f16="${path_models}/ggml-model-f16.gguf" + model_q8_0="${path_models}/ggml-model-q8_0.gguf" + + ./bin/quantize ${model_f16} ${model_q8_0} q8_0 + + (time ./bin/embedding --model ${model_f16} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log + (time ./bin/embedding --model ${model_q8_0} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log + + set +e +} + +function gg_sum_embd_bge_small { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'BGE Small (BERT):\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" + gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" +} + +## main + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt + rm -rf ${SRC}/models-mnt + mnt_models=${MNT}/models + mkdir -p ${mnt_models} + ln -sfn ${mnt_models} ${SRC}/models-mnt + + # Create a fresh python3 venv and enter it + python3 -m venv "$MNT/venv" + source "$MNT/venv/bin/activate" + + pip install -r ${SRC}/requirements.txt --disable-pip-version-check + pip install --editable gguf-py --disable-pip-version-check +fi + +ret=0 + +test $ret -eq 0 && gg_run ctest_debug +test $ret -eq 0 && gg_run ctest_release + +if [ -z ${GG_BUILD_LOW_PERF} ]; then + test $ret -eq 0 && gg_run embd_bge_small + + test $ret -eq 0 && gg_run test_scripts_debug + test $ret -eq 0 && gg_run test_scripts_release + + if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then + if [ -z ${GG_BUILD_CUDA} ]; then + test $ret -eq 0 && gg_run open_llama_3b_v2 + else + test $ret -eq 0 && gg_run open_llama_7b_v2 + fi + test $ret -eq 0 && gg_run ctest_with_model_debug + test $ret -eq 0 && gg_run ctest_with_model_release + fi +fi + +exit $ret diff --git a/llama-cpp-python/vendor/llama.cpp/cmake/FindSIMD.cmake b/llama-cpp-python/vendor/llama.cpp/cmake/FindSIMD.cmake new file mode 100644 index 0000000000000000000000000000000000000000..33377ec44de12cb9f44386eedea76cb08aed4440 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/cmake/FindSIMD.cmake @@ -0,0 +1,100 @@ +include(CheckCSourceRuns) + +set(AVX_CODE " + #include + int main() + { + __m256 a; + a = _mm256_set1_ps(0); + return 0; + } +") + +set(AVX512_CODE " + #include + int main() + { + __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0); + __m512i b = a; + __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); + return 0; + } +") + +set(AVX2_CODE " + #include + int main() + { + __m256i a = {0}; + a = _mm256_abs_epi16(a); + __m256i x; + _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code + return 0; + } +") + +set(FMA_CODE " + #include + int main() + { + __m256 acc = _mm256_setzero_ps(); + const __m256 d = _mm256_setzero_ps(); + const __m256 p = _mm256_setzero_ps(); + acc = _mm256_fmadd_ps( d, p, acc ); + return 0; + } +") + +macro(check_sse type flags) + set(__FLAG_I 1) + set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) + foreach (__FLAG ${flags}) + if (NOT ${type}_FOUND) + set(CMAKE_REQUIRED_FLAGS ${__FLAG}) + check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I}) + if (HAS_${type}_${__FLAG_I}) + set(${type}_FOUND TRUE CACHE BOOL "${type} support") + set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") + endif() + math(EXPR __FLAG_I "${__FLAG_I}+1") + endif() + endforeach() + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) + + if (NOT ${type}_FOUND) + set(${type}_FOUND FALSE CACHE BOOL "${type} support") + set(${type}_FLAGS "" CACHE STRING "${type} flags") + endif() + + mark_as_advanced(${type}_FOUND ${type}_FLAGS) +endmacro() + +# flags are for MSVC only! +check_sse("AVX" " ;/arch:AVX") +if (NOT ${AVX_FOUND}) + set(LLAMA_AVX OFF) +else() + set(LLAMA_AVX ON) +endif() + +check_sse("AVX2" " ;/arch:AVX2") +check_sse("FMA" " ;/arch:AVX2") +if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND})) + set(LLAMA_AVX2 OFF) +else() + set(LLAMA_AVX2 ON) +endif() + +check_sse("AVX512" " ;/arch:AVX512") +if (NOT ${AVX512_FOUND}) + set(LLAMA_AVX512 OFF) +else() + set(LLAMA_AVX512 ON) +endif() diff --git a/llama-cpp-python/vendor/llama.cpp/codecov.yml b/llama-cpp-python/vendor/llama.cpp/codecov.yml new file mode 100644 index 0000000000000000000000000000000000000000..a301c5b2c769437b20d78d126e335a586a27eff6 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/codecov.yml @@ -0,0 +1,14 @@ +comment: off + +coverage: + status: + project: + default: + target: auto + threshold: 0 + base: auto + patch: + default: + target: auto + threshold: 0 + base: auto diff --git a/llama-cpp-python/vendor/llama.cpp/common/CMakeLists.txt b/llama-cpp-python/vendor/llama.cpp/common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ec8d6d8d03b5319b7095ab84ccd60cc74ff44a8 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/CMakeLists.txt @@ -0,0 +1,87 @@ +# common + + +# Build info header +# + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git") + + # Is git submodule + if(NOT IS_DIRECTORY "${GIT_DIR}") + file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) + string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) + string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS) + if (SLASH_POS EQUAL 0) + set(GIT_DIR "${REAL_GIT_DIR}") + else() + set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") + endif() + endif() + + if(EXISTS "${GIT_DIR}/index") + set(GIT_INDEX "${GIT_DIR}/index") + else() + message(WARNING "Git index not found in git repository.") + set(GIT_INDEX "") + endif() +else() + message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") + set(GIT_INDEX "") +endif() + +# Add a custom command to rebuild build-info.cpp when .git/index changes +add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp" + COMMENT "Generating build details from Git" + COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} + -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." + DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} + VERBATIM +) +set(TARGET build_info) +add_library(${TARGET} OBJECT build-info.cpp) +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(TARGET common) + +add_library(${TARGET} STATIC + base64.hpp + common.h + common.cpp + sampling.h + sampling.cpp + console.h + console.cpp + grammar-parser.h + grammar-parser.cpp + json.hpp + json-schema-to-grammar.cpp + train.h + train.cpp + ngram-cache.h + ngram-cache.cpp + ) + +if (BUILD_SHARED_LIBS) + set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) +endif() + +set(LLAMA_COMMON_EXTRA_LIBS build_info) + +# Use curl to download model url +if (LLAMA_CURL) + find_package(CURL REQUIRED) + add_definitions(-DLLAMA_USE_CURL) + include_directories(${CURL_INCLUDE_DIRS}) + find_library(CURL_LIBRARY curl REQUIRED) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) +endif () + +target_include_directories(${TARGET} PUBLIC .) +target_compile_features(${TARGET} PUBLIC cxx_std_11) +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama) diff --git a/llama-cpp-python/vendor/llama.cpp/common/base64.hpp b/llama-cpp-python/vendor/llama.cpp/common/base64.hpp new file mode 100644 index 0000000000000000000000000000000000000000..563247a6e5f7dba837c07a509026d8b36e61387c --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/base64.hpp @@ -0,0 +1,392 @@ +/* +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to +*/ + +#ifndef PUBLIC_DOMAIN_BASE64_HPP_ +#define PUBLIC_DOMAIN_BASE64_HPP_ + +#include +#include +#include +#include + +class base64_error : public std::runtime_error +{ +public: + using std::runtime_error::runtime_error; +}; + +class base64 +{ +public: + enum class alphabet + { + /** the alphabet is detected automatically */ + auto_, + /** the standard base64 alphabet is used */ + standard, + /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ + url_filename_safe + }; + + enum class decoding_behavior + { + /** if the input is not padded, the remaining bits are ignored */ + moderate, + /** if a padding character is encounter decoding is finished */ + loose + }; + + /** + Encodes all the elements from `in_begin` to `in_end` to `out`. + + @warning The source and destination cannot overlap. The destination must be able to hold at least + `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than + 8 bits + @tparam Output_iterator the destination; the elements written to it are from the type `char` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @returns the iterator to the next element past the last element copied + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::standard) + { + constexpr auto pad = '='; + const char* alpha = alphabet == alphabet::url_filename_safe + ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" + : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + while (in_begin != in_end) { + std::uint8_t i0 = 0, i1 = 0, i2 = 0; + + // first character + i0 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[i0 >> 2 & 0x3f]; + ++out; + + // part of first character and second + if (in_begin != in_end) { + i1 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; + ++out; + } else { + *out = alpha[(i0 & 0x3) << 4]; + ++out; + + // last padding + *out = pad; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // part of second character and third + if (in_begin != in_end) { + i2 = static_cast(*in_begin); + ++in_begin; + + *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; + ++out; + } else { + *out = alpha[(i1 & 0xf) << 2]; + ++out; + + // last padding + *out = pad; + ++out; + + break; + } + + // rest of third + *out = alpha[i2 & 0x3f]; + ++out; + } + + return out; + } + /** + Encodes a string. + + @param str the string that should be encoded + @param alphabet which alphabet should be used + @returns the encoded base64 string + @throws see base64::encode() + */ + static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(str.length()) + 1); + + encode(str.begin(), str.end(), std::back_inserter(result), alphabet); + + return result; + } + /** + Encodes a char array. + + @param buffer the char array + @param size the size of the array + @param alphabet which alphabet should be used + @returns the encoded string + */ + static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) + { + std::string result; + + result.reserve(required_encode_size(size) + 1); + + encode(buffer, buffer + size, std::back_inserter(result), alphabet); + + return result; + } + /** + Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, + in other words: inplace decoding is possible. + + @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, + otherwise the behavior depends on the output iterator. + + @tparam Input_iterator the source; the returned elements are cast to `char` + @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` + @param in_begin the beginning of the source + @param in_end the ending of the source + @param out the destination iterator + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the iterator to the next element past the last element copied + @throws base64_error depending on the set behavior + @throws see `Input_iterator` and `Output_iterator` + */ + template + static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, + alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + //constexpr auto pad = '='; + std::uint8_t last = 0; + auto bits = 0; + + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c == '=') { + break; + } + + auto part = _base64_value(alphabet, c); + + // enough bits for one byte + if (bits + 6 >= 8) { + *out = (last << (8 - bits)) | (part >> (bits - 2)); + ++out; + + bits -= 2; + } else { + bits += 6; + } + + last = part; + } + + // check padding + if (behavior != decoding_behavior::loose) { + while (in_begin != in_end) { + auto c = *in_begin; + ++in_begin; + + if (c != '=') { + throw base64_error("invalid base64 character."); + } + } + } + + return out; + } + /** + Decodes a string. + + @param str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(str.length())); + + decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string. + + @param buffer the base64 encoded buffer + @param size the size of the buffer + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the decoded string + @throws see base64::decode() + */ + static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + std::string result; + + result.reserve(max_decode_size(size)); + + decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); + + return result; + } + /** + Decodes a string inplace. + + @param[in,out] str the base64 encoded string + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @throws base64::decode_inplace() + */ + static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); + } + /** + Decodes a char array inplace. + + @param[in,out] str the string array + @param size the length of the array + @param alphabet which alphabet should be used + @param behavior the behavior when an error was detected + @returns the pointer to the next element past the last element decoded + @throws base64::decode_inplace() + */ + static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, + decoding_behavior behavior = decoding_behavior::moderate) + { + return decode(str, str + size, str, alphabet, behavior); + } + /** + Returns the required decoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{4} \rceil \cdot 3 + $$ + + @param size the size of the encoded input + @returns the size of the resulting decoded buffer; this the absolute maximum + */ + static std::size_t max_decode_size(std::size_t size) noexcept + { + return (size / 4 + (size % 4 ? 1 : 0)) * 3; + } + /** + Returns the required encoding size for a given size. The value is calculated with the following formula: + + $$ + \lceil \frac{size}{3} \rceil \cdot 4 + $$ + + @param size the size of the decoded input + @returns the size of the resulting encoded buffer + */ + static std::size_t required_encode_size(std::size_t size) noexcept + { + return (size / 3 + (size % 3 ? 1 : 0)) * 4; + } + +private: + static std::uint8_t _base64_value(alphabet& alphabet, char c) + { + if (c >= 'A' && c <= 'Z') { + return c - 'A'; + } else if (c >= 'a' && c <= 'z') { + return c - 'a' + 26; + } else if (c >= '0' && c <= '9') { + return c - '0' + 52; + } + + // comes down to alphabet + if (alphabet == alphabet::standard) { + if (c == '+') { + return 62; + } else if (c == '/') { + return 63; + } + } else if (alphabet == alphabet::url_filename_safe) { + if (c == '-') { + return 62; + } else if (c == '_') { + return 63; + } + } // auto detect + else { + if (c == '+') { + alphabet = alphabet::standard; + + return 62; + } else if (c == '/') { + alphabet = alphabet::standard; + + return 63; + } else if (c == '-') { + alphabet = alphabet::url_filename_safe; + + return 62; + } else if (c == '_') { + alphabet = alphabet::url_filename_safe; + + return 63; + } + } + + throw base64_error("invalid base64 character."); + } +}; + +#endif // !PUBLIC_DOMAIN_BASE64_HPP_ diff --git a/llama-cpp-python/vendor/llama.cpp/common/build-info.cpp.in b/llama-cpp-python/vendor/llama.cpp/common/build-info.cpp.in new file mode 100644 index 0000000000000000000000000000000000000000..0b945aa68fff3e77b881328e4fd63f6e2f9017ee --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/build-info.cpp.in @@ -0,0 +1,4 @@ +int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@; +char const *LLAMA_COMMIT = "@BUILD_COMMIT@"; +char const *LLAMA_COMPILER = "@BUILD_COMPILER@"; +char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; diff --git a/llama-cpp-python/vendor/llama.cpp/common/common.cpp b/llama-cpp-python/vendor/llama.cpp/common/common.cpp new file mode 100644 index 0000000000000000000000000000000000000000..243b88abf1aab44dcce3be44b2e10322ceab4e61 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/common.cpp @@ -0,0 +1,2990 @@ +#include "common.h" +#include "json.hpp" +#include "json-schema-to-grammar.h" +#include "llama.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) && defined(__MACH__) +#include +#include +#endif + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#include +#include +#include +#include +#else +#include +#include +#include +#endif +#if defined(LLAMA_USE_CURL) +#include +#include +#include +#include +#endif + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) +#define GGML_USE_CUDA_SYCL +#endif + +#if (defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN) +#define GGML_USE_CUDA_SYCL_VULKAN +#endif + +#if defined(LLAMA_USE_CURL) +#ifdef __linux__ +#include +#elif defined(_WIN32) +#define PATH_MAX MAX_PATH +#else +#include +#endif +#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 +#endif // LLAMA_USE_CURL + +using json = nlohmann::ordered_json; + +int32_t get_num_physical_cores() { +#ifdef __linux__ + // enumerate the set of thread siblings, num entries is num cores + std::unordered_set siblings; + for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { + std::ifstream thread_siblings("/sys/devices/system/cpu" + + std::to_string(cpu) + "/topology/thread_siblings"); + if (!thread_siblings.is_open()) { + break; // no more cpus + } + std::string line; + if (std::getline(thread_siblings, line)) { + siblings.insert(line); + } + } + if (!siblings.empty()) { + return static_cast(siblings.size()); + } +#elif defined(__APPLE__) && defined(__MACH__) + int32_t num_physical_cores; + size_t len = sizeof(num_physical_cores); + int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } + result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); + if (result == 0) { + return num_physical_cores; + } +#elif defined(_WIN32) + //TODO: Implement +#endif + unsigned int n_threads = std::thread::hardware_concurrency(); + return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; +} + +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) +#include + +static void cpuid(unsigned leaf, unsigned subleaf, + unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { + __asm__("movq\t%%rbx,%%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx,%%rsi" + : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx) + : "0"(leaf), "2"(subleaf)); +} + +static int pin_cpu(int cpu) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); +} + +static bool is_hybrid_cpu(void) { + unsigned eax, ebx, ecx, edx; + cpuid(7, 0, &eax, &ebx, &ecx, &edx); + return !!(edx & (1u << 15)); +} + +static bool is_running_on_efficiency_core(void) { + unsigned eax, ebx, ecx, edx; + cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx); + int intel_atom = 0x20; + int core_type = (eax & 0xff000000u) >> 24; + return core_type == intel_atom; +} + +static int count_math_cpus(int cpu_count) { + int result = 0; + for (int cpu = 0; cpu < cpu_count; ++cpu) { + if (pin_cpu(cpu)) { + return -1; + } + if (is_running_on_efficiency_core()) { + continue; // efficiency cores harm lockstep threading + } + ++cpu; // hyperthreading isn't useful for linear algebra + ++result; + } + return result; +} + +#endif // __x86_64__ && __linux__ + +/** + * Returns number of CPUs on system that are useful for math. + */ +int get_math_cpu_count() { +#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) + int cpu_count = sysconf(_SC_NPROCESSORS_ONLN); + if (cpu_count < 1) { + return get_num_physical_cores(); + } + if (is_hybrid_cpu()) { + cpu_set_t affinity; + if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) { + int result = count_math_cpus(cpu_count); + pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity); + if (result > 0) { + return result; + } + } + } +#endif + return get_num_physical_cores(); +} + +void process_escapes(std::string & input) { + std::size_t input_len = input.length(); + std::size_t output_idx = 0; + + for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { + if (input[input_idx] == '\\' && input_idx + 1 < input_len) { + switch (input[++input_idx]) { + case 'n': input[output_idx++] = '\n'; break; + case 'r': input[output_idx++] = '\r'; break; + case 't': input[output_idx++] = '\t'; break; + case '\'': input[output_idx++] = '\''; break; + case '\"': input[output_idx++] = '\"'; break; + case '\\': input[output_idx++] = '\\'; break; + case 'x': + // Handle \x12, etc + if (input_idx + 2 < input_len) { + const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 }; + char *err_p = nullptr; + const long val = std::strtol(x, &err_p, 16); + if (err_p == x + 2) { + input_idx += 2; + input[output_idx++] = char(val); + break; + } + } + // fall through + default: input[output_idx++] = '\\'; + input[output_idx++] = input[input_idx]; break; + } + } else { + input[output_idx++] = input[input_idx]; + } + } + + input.resize(output_idx); +} + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { + bool result = true; + try { + if (!gpt_params_parse_ex(argc, argv, params)) { + gpt_print_usage(argc, argv, gpt_params()); + exit(0); + } + } + catch (const std::invalid_argument & ex) { + fprintf(stderr, "%s\n", ex.what()); + gpt_print_usage(argc, argv, gpt_params()); + exit(1); + } + return result; +} + +bool parse_kv_override(const char * data, std::vector & overrides) { + const char * sep = strchr(data, '='); + if (sep == nullptr || sep - data >= 128) { + fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data); + return false; + } + llama_model_kv_override kvo; + std::strncpy(kvo.key, data, sep - data); + kvo.key[sep - data] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; + kvo.val_i64 = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; + kvo.val_f64 = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.val_bool = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.val_bool = false; + } else { + fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data); + return false; + } + } else if (strncmp(sep, "str:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; + if (strlen(sep) > 127) { + fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); + return false; + } + strncpy(kvo.val_str, sep, 127); + kvo.val_str[127] = '\0'; + } else { + fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data); + return false; + } + overrides.emplace_back(std::move(kvo)); + return true; +} + +bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) { + llama_sampling_params & sparams = params.sparams; + + if (arg == "-s" || arg == "--seed") { + if (++i >= argc) { + invalid_param = true; + return true; + } + // This is temporary, in the future the samplign state will be moved fully to llama_sampling_context. + params.seed = std::stoul(argv[i]); + sparams.seed = std::stoul(argv[i]); + return true; + } + if (arg == "-t" || arg == "--threads") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_threads = std::stoi(argv[i]); + if (params.n_threads <= 0) { + params.n_threads = std::thread::hardware_concurrency(); + } + return true; + } + if (arg == "-tb" || arg == "--threads-batch") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_threads_batch = std::stoi(argv[i]); + if (params.n_threads_batch <= 0) { + params.n_threads_batch = std::thread::hardware_concurrency(); + } + return true; + } + if (arg == "-td" || arg == "--threads-draft") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_threads_draft = std::stoi(argv[i]); + if (params.n_threads_draft <= 0) { + params.n_threads_draft = std::thread::hardware_concurrency(); + } + return true; + } + if (arg == "-tbd" || arg == "--threads-batch-draft") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_threads_batch_draft = std::stoi(argv[i]); + if (params.n_threads_batch_draft <= 0) { + params.n_threads_batch_draft = std::thread::hardware_concurrency(); + } + return true; + } + if (arg == "-p" || arg == "--prompt") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.prompt = argv[i]; + return true; + } + if (arg == "-e" || arg == "--escape") { + params.escape = true; + return true; + } + if (arg == "--prompt-cache") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.path_prompt_cache = argv[i]; + return true; + } + if (arg == "--prompt-cache-all") { + params.prompt_cache_all = true; + return true; + } + if (arg == "--prompt-cache-ro") { + params.prompt_cache_ro = true; + return true; + } + if (arg == "-bf" || arg == "--binary-file") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::ifstream file(argv[i], std::ios::binary); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + return true; + } + // store the external file name in params + params.prompt_file = argv[i]; + std::ostringstream ss; + ss << file.rdbuf(); + params.prompt = ss.str(); + fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), argv[i]); + return true; + } + if (arg == "-f" || arg == "--file") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::ifstream file(argv[i]); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + return true; + } + // store the external file name in params + params.prompt_file = argv[i]; + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); + if (!params.prompt.empty() && params.prompt.back() == '\n') { + params.prompt.pop_back(); + } + return true; + } + if (arg == "-n" || arg == "--n-predict") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_predict = std::stoi(argv[i]); + return true; + } + if (arg == "--top-k") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.top_k = std::stoi(argv[i]); + return true; + } + if (arg == "-c" || arg == "--ctx-size") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_ctx = std::stoi(argv[i]); + return true; + } + if (arg == "--grp-attn-n" || arg == "-gan") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.grp_attn_n = std::stoi(argv[i]); + return true; + } + if (arg == "--grp-attn-w" || arg == "-gaw") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.grp_attn_w = std::stoi(argv[i]); + return true; + } + if (arg == "--rope-freq-base") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.rope_freq_base = std::stof(argv[i]); + return true; + } + if (arg == "--rope-freq-scale") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.rope_freq_scale = std::stof(argv[i]); + return true; + } + if (arg == "--rope-scaling") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::string value(argv[i]); + /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } + else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } + else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } + else { invalid_param = true; } + return true; + } + if (arg == "--rope-scale") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.rope_freq_scale = 1.0f / std::stof(argv[i]); + return true; + } + if (arg == "--yarn-orig-ctx") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.yarn_orig_ctx = std::stoi(argv[i]); + return true; + } + if (arg == "--yarn-ext-factor") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.yarn_ext_factor = std::stof(argv[i]); + return true; + } + if (arg == "--yarn-attn-factor") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.yarn_attn_factor = std::stof(argv[i]); + return true; + } + if (arg == "--yarn-beta-fast") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.yarn_beta_fast = std::stof(argv[i]); + return true; + } + if (arg == "--yarn-beta-slow") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.yarn_beta_slow = std::stof(argv[i]); + return true; + } + if (arg == "--pooling") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::string value(argv[i]); + /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; } + else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; } + else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; } + else { invalid_param = true; } + return true; + } + if (arg == "--defrag-thold" || arg == "-dt") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.defrag_thold = std::stof(argv[i]); + return true; + } + if (arg == "--samplers") { + if (++i >= argc) { + invalid_param = true; + return true; + } + const auto sampler_names = string_split(argv[i], ';'); + sparams.samplers_sequence = sampler_types_from_names(sampler_names, true); + return true; + } + if (arg == "--sampling-seq") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.samplers_sequence = sampler_types_from_chars(argv[i]); + return true; + } + if (arg == "--top-p") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.top_p = std::stof(argv[i]); + return true; + } + if (arg == "--min-p") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.min_p = std::stof(argv[i]); + return true; + } + if (arg == "--temp") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.temp = std::stof(argv[i]); + sparams.temp = std::max(sparams.temp, 0.0f); + return true; + } + if (arg == "--tfs") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.tfs_z = std::stof(argv[i]); + return true; + } + if (arg == "--typical") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.typical_p = std::stof(argv[i]); + return true; + } + if (arg == "--repeat-last-n") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.penalty_last_n = std::stoi(argv[i]); + sparams.n_prev = std::max(sparams.n_prev, sparams.penalty_last_n); + return true; + } + if (arg == "--repeat-penalty") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.penalty_repeat = std::stof(argv[i]); + return true; + } + if (arg == "--frequency-penalty") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.penalty_freq = std::stof(argv[i]); + return true; + } + if (arg == "--presence-penalty") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.penalty_present = std::stof(argv[i]); + return true; + } + if (arg == "--dynatemp-range") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.dynatemp_range = std::stof(argv[i]); + return true; + } + if (arg == "--dynatemp-exp") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.dynatemp_exponent = std::stof(argv[i]); + return true; + } + if (arg == "--mirostat") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.mirostat = std::stoi(argv[i]); + return true; + } + if (arg == "--mirostat-lr") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.mirostat_eta = std::stof(argv[i]); + return true; + } + if (arg == "--mirostat-ent") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.mirostat_tau = std::stof(argv[i]); + return true; + } + if (arg == "--cfg-negative-prompt") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.cfg_negative_prompt = argv[i]; + return true; + } + if (arg == "--cfg-negative-prompt-file") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::ifstream file(argv[i]); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + return true; + } + std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(sparams.cfg_negative_prompt)); + if (!sparams.cfg_negative_prompt.empty() && sparams.cfg_negative_prompt.back() == '\n') { + sparams.cfg_negative_prompt.pop_back(); + } + return true; + } + if (arg == "--cfg-scale") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.cfg_scale = std::stof(argv[i]); + return true; + } + if (arg == "-b" || arg == "--batch-size") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_batch = std::stoi(argv[i]); + return true; + } + if (arg == "-ub" || arg == "--ubatch-size") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_ubatch = std::stoi(argv[i]); + return true; + } + if (arg == "--keep") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_keep = std::stoi(argv[i]); + return true; + } + if (arg == "--draft") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_draft = std::stoi(argv[i]); + return true; + } + if (arg == "--chunks") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_chunks = std::stoi(argv[i]); + return true; + } + if (arg == "-np" || arg == "--parallel") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_parallel = std::stoi(argv[i]); + return true; + } + if (arg == "-ns" || arg == "--sequences") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_sequences = std::stoi(argv[i]); + return true; + } + if (arg == "--p-split" || arg == "-ps") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.p_split = std::stof(argv[i]); + return true; + } + if (arg == "-m" || arg == "--model") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.model = argv[i]; + return true; + } + if (arg == "-md" || arg == "--model-draft") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.model_draft = argv[i]; + return true; + } + if (arg == "-a" || arg == "--alias") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.model_alias = argv[i]; + return true; + } + if (arg == "-mu" || arg == "--model-url") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.model_url = argv[i]; + return true; + } + if (arg == "-hfr" || arg == "--hf-repo") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.hf_repo = argv[i]; + return true; + } + if (arg == "-hff" || arg == "--hf-file") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.hf_file = argv[i]; + return true; + } + if (arg == "--lora") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.lora_adapter.emplace_back(argv[i], 1.0f); + params.use_mmap = false; + return true; + } + if (arg == "--lora-scaled") { + if (++i >= argc) { + invalid_param = true; + return true; + } + const char* lora_adapter = argv[i]; + if (++i >= argc) { + invalid_param = true; + return true; + } + params.lora_adapter.emplace_back(lora_adapter, std::stof(argv[i])); + params.use_mmap = false; + return true; + } + if (arg == "--lora-base") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.lora_base = argv[i]; + return true; + } + if (arg == "--control-vector") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.control_vectors.push_back({ 1.0f, argv[i], }); + return true; + } + if (arg == "--control-vector-scaled") { + if (++i >= argc) { + invalid_param = true; + return true; + } + const char* fname = argv[i]; + if (++i >= argc) { + invalid_param = true; + return true; + } + params.control_vectors.push_back({ std::stof(argv[i]), fname, }); + return true; + } + if (arg == "--control-vector-layer-range") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.control_vector_layer_start = std::stoi(argv[i]); + if (++i >= argc) { + invalid_param = true; + return true; + } + params.control_vector_layer_end = std::stoi(argv[i]); + return true; + } + if (arg == "--mmproj") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.mmproj = argv[i]; + return true; + } + if (arg == "--image") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.image.emplace_back(argv[i]); + return true; + } + if (arg == "-i" || arg == "--interactive") { + params.interactive = true; + return true; + } + if (arg == "--embedding") { + params.embedding = true; + return true; + } + if (arg == "--interactive-first") { + params.interactive_first = true; + return true; + } + if (arg == "-ins" || arg == "--instruct") { + params.instruct = true; + return true; + } + if (arg == "-cml" || arg == "--chatml") { + params.chatml = true; + return true; + } + if (arg == "--infill") { + params.infill = true; + return true; + } + if (arg == "-dkvc" || arg == "--dump-kv-cache") { + params.dump_kv_cache = true; + return true; + } + if (arg == "-nkvo" || arg == "--no-kv-offload") { + params.no_kv_offload = true; + return true; + } + if (arg == "-ctk" || arg == "--cache-type-k") { + params.cache_type_k = argv[++i]; + return true; + } + if (arg == "-ctv" || arg == "--cache-type-v") { + params.cache_type_v = argv[++i]; + return true; + } + if (arg == "--multiline-input") { + params.multiline_input = true; + return true; + } + if (arg == "--simple-io") { + params.simple_io = true; + return true; + } + if (arg == "-cb" || arg == "--cont-batching") { + params.cont_batching = true; + return true; + } + if (arg == "-fa" || arg == "--flash-attn") { + params.flash_attn = true; + return true; + } + if (arg == "--color") { + params.use_color = true; + return true; + } + if (arg == "--mlock") { + params.use_mlock = true; + return true; + } + if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_gpu_layers = std::stoi(argv[i]); + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } + return true; + } + if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_gpu_layers_draft = std::stoi(argv[i]); + if (!llama_supports_gpu_offload()) { + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + } + return true; + } + if (arg == "--main-gpu" || arg == "-mg") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.main_gpu = std::stoi(argv[i]); +#ifndef GGML_USE_CUDA_SYCL + fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the main GPU has no effect.\n"); +#endif // GGML_USE_CUDA_SYCL + return true; + } + if (arg == "--split-mode" || arg == "-sm") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::string arg_next = argv[i]; + if (arg_next == "none") { + params.split_mode = LLAMA_SPLIT_MODE_NONE; + } + else if (arg_next == "layer") { + params.split_mode = LLAMA_SPLIT_MODE_LAYER; + } + else if (arg_next == "row") { +#ifdef GGML_USE_SYCL + fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n"); + exit(1); +#endif // GGML_USE_SYCL + params.split_mode = LLAMA_SPLIT_MODE_ROW; + } + else { + invalid_param = true; + return true; + } +#ifndef GGML_USE_CUDA_SYCL + fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL. Setting the split mode has no effect.\n"); +#endif // GGML_USE_CUDA_SYCL + return true; + } + if (arg == "--tensor-split" || arg == "-ts") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::string arg_next = argv[i]; + + // split string by , and / + const std::regex regex{ R"([,/]+)" }; + std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 }; + std::vector split_arg{ it, {} }; + if (split_arg.size() >= llama_max_devices()) { + invalid_param = true; + return true; + } + for (size_t i = 0; i < llama_max_devices(); ++i) { + if (i < split_arg.size()) { + params.tensor_split[i] = std::stof(split_arg[i]); + } + else { + params.tensor_split[i] = 0.0f; + } + } +#ifndef GGML_USE_CUDA_SYCL_VULKAN + fprintf(stderr, "warning: llama.cpp was compiled without CUDA/SYCL/Vulkan. Setting a tensor split has no effect.\n"); +#endif // GGML_USE_CUDA_SYCL_VULKAN + return true; + } + if (arg == "--no-mmap") { + params.use_mmap = false; + return true; + } + if (arg == "--numa") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::string value(argv[i]); + /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } + else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } + else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } + else { invalid_param = true; } + return true; + } + if (arg == "--verbose-prompt") { + params.verbose_prompt = true; + return true; + } + if (arg == "--no-display-prompt") { + params.display_prompt = false; + return true; + } + if (arg == "-r" || arg == "--reverse-prompt") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.antiprompt.emplace_back(argv[i]); + return true; + } + if (arg == "-ld" || arg == "--logdir") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.logdir = argv[i]; + + if (params.logdir.back() != DIRECTORY_SEPARATOR) { + params.logdir += DIRECTORY_SEPARATOR; + } + return true; + } + if (arg == "-lcs" || arg == "--lookup-cache-static") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.lookup_cache_static = argv[i]; + return true; + } + if (arg == "-lcd" || arg == "--lookup-cache-dynamic") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.lookup_cache_dynamic = argv[i]; + return true; + } + if (arg == "--save-all-logits" || arg == "--kl-divergence-base") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.logits_file = argv[i]; + return true; + } + if (arg == "--perplexity" || arg == "--all-logits") { + params.logits_all = true; + return true; + } + if (arg == "--ppl-stride") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.ppl_stride = std::stoi(argv[i]); + return true; + } + if (arg == "-ptc" || arg == "--print-token-count") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.n_print = std::stoi(argv[i]); + return true; + } + if (arg == "--check-tensors") { + params.check_tensors = true; + return true; + } + if (arg == "--ppl-output-type") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.ppl_output_type = std::stoi(argv[i]); + return true; + } + if (arg == "--hellaswag") { + params.hellaswag = true; + return true; + } + if (arg == "--hellaswag-tasks") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.hellaswag_tasks = std::stoi(argv[i]); + return true; + } + if (arg == "--winogrande") { + params.winogrande = true; + return true; + } + if (arg == "--winogrande-tasks") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.winogrande_tasks = std::stoi(argv[i]); + return true; + } + if (arg == "--multiple-choice") { + params.multiple_choice = true; + return true; + } + if (arg == "--multiple-choice-tasks") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.multiple_choice_tasks = std::stoi(argv[i]); + return true; + } + if (arg == "--kl-divergence") { + params.kl_divergence = true; + return true; + } + if (arg == "--ignore-eos") { + params.ignore_eos = true; + return true; + } + if (arg == "--penalize-nl") { + sparams.penalize_nl = true; + return true; + } + if (arg == "-l" || arg == "--logit-bias") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::stringstream ss(argv[i]); + llama_token key; + char sign; + std::string value_str; + try { + if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { + sparams.logit_bias[key] = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); + } + else { + throw std::exception(); + } + } + catch (const std::exception&) { + invalid_param = true; + return true; + } + return true; + } + if (arg == "-h" || arg == "--help") { + gpt_print_usage(argc, argv, gpt_params()); + exit(0); + } + if (arg == "--version") { + fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); + fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); + exit(0); + } + if (arg == "--random-prompt") { + params.random_prompt = true; + return true; + } + if (arg == "--in-prefix-bos") { + params.input_prefix_bos = true; + return true; + } + if (arg == "--in-prefix") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.input_prefix = argv[i]; + return true; + } + if (arg == "--in-suffix") { + if (++i >= argc) { + invalid_param = true; + return true; + } + params.input_suffix = argv[i]; + return true; + } + if (arg == "--grammar") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.grammar = argv[i]; + return true; + } + if (arg == "--grammar-file") { + if (++i >= argc) { + invalid_param = true; + return true; + } + std::ifstream file(argv[i]); + if (!file) { + fprintf(stderr, "error: failed to open file '%s'\n", argv[i]); + invalid_param = true; + return true; + } + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(sparams.grammar) + ); + return true; + } + if (arg == "-j" || arg == "--json-schema") { + if (++i >= argc) { + invalid_param = true; + return true; + } + sparams.grammar = json_schema_to_grammar(json::parse(argv[i])); + return true; + } + if (arg == "--override-kv") { + if (++i >= argc) { + invalid_param = true; + return true; + } + if (!parse_kv_override(argv[i], params.kv_overrides)) { + fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); + invalid_param = true; + return true; + } + return true; + } +#ifndef LOG_DISABLE_LOGS + // Parse args for logging parameters + if (log_param_single_parse(argv[i])) { + // Do nothing, log_param_single_parse automatically does it's thing + // and returns if a match was found and parsed. + return true; + } + if (log_param_pair_parse( /*check_but_dont_parse*/ true, argv[i])) { + // We have a matching known parameter requiring an argument, + // now we need to check if there is anything after this argv + // and flag invalid_param or parse it. + if (++i >= argc) { + invalid_param = true; + return true; + } + if (!log_param_pair_parse( /*check_but_dont_parse*/ false, argv[i - 1], argv[i])) { + invalid_param = true; + return true; + } + return true; + } + // End of Parse args for logging parameters +#endif // LOG_DISABLE_LOGS + + return false; +} + +void gpt_params_handle_model_default(gpt_params & params) { + if (!params.hf_repo.empty()) { + // short-hand to avoid specifying --hf-file -> default it to --model + if (params.hf_file.empty()) { + if (params.model.empty()) { + throw std::invalid_argument("error: --hf-repo requires either --hf-file or --model\n"); + } + params.hf_file = params.model; + } else if (params.model.empty()) { + params.model = "models/" + string_split(params.hf_file, '/').back(); + } + } else if (!params.model_url.empty()) { + if (params.model.empty()) { + auto f = string_split(params.model_url, '#').front(); + f = string_split(f, '?').front(); + f = string_split(f, '/').back(); + params.model = "models/" + f; + } + } else if (params.model.empty()) { + params.model = DEFAULT_MODEL_PATH; + } +} + +bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { + bool invalid_param = false; + std::string arg; + const std::string arg_prefix = "--"; + llama_sampling_params & sparams = params.sparams; + + for (int i = 1; i < argc; i++) { + arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } + + if (!gpt_params_find_arg(argc, argv, arg, params, i, invalid_param)) { + throw std::invalid_argument("error: unknown argument: " + arg); + } + } + + if (invalid_param) { + throw std::invalid_argument("error: invalid parameter for argument: " + arg); + } + + if (params.prompt_cache_all && + (params.interactive || params.interactive_first || + params.instruct)) { + + throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); + } + + gpt_params_handle_model_default(params); + + if (params.escape) { + process_escapes(params.prompt); + process_escapes(params.input_prefix); + process_escapes(params.input_suffix); + process_escapes(sparams.cfg_negative_prompt); + for (auto & antiprompt : params.antiprompt) { + process_escapes(antiprompt); + } + } + + if (!params.kv_overrides.empty()) { + params.kv_overrides.emplace_back(); + params.kv_overrides.back().key[0] = 0; + } + + return true; +} + +void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { + const llama_sampling_params & sparams = params.sparams; + + std::string sampler_type_chars; + std::string sampler_type_names; + for (const auto sampler_type : sparams.samplers_sequence) { + sampler_type_chars += static_cast(sampler_type); + sampler_type_names += sampler_type_to_name_string(sampler_type) + ";"; + } + sampler_type_names.pop_back(); + + printf("\n"); + printf("usage: %s [options]\n", argv[0]); + printf("\n"); + printf("options:\n"); + printf(" -h, --help show this help message and exit\n"); + printf(" --version show version and build info\n"); + printf(" -i, --interactive run in interactive mode\n"); + printf(" --interactive-first run in interactive mode and wait for input right away\n"); + printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); + printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); + printf(" --multiline-input allows you to write or paste multiple lines without ending each in '\\'\n"); + printf(" -r PROMPT, --reverse-prompt PROMPT\n"); + printf(" halt generation at PROMPT, return control in interactive mode\n"); + printf(" (can be specified more than once for multiple prompts).\n"); + printf(" --color colorise output to distinguish prompt and user input from generations\n"); + printf(" -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n"); + printf(" -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); + printf(" -tb N, --threads-batch N\n"); + printf(" number of threads to use during batch and prompt processing (default: same as --threads)\n"); + printf(" -td N, --threads-draft N"); + printf(" number of threads to use during generation (default: same as --threads)\n"); + printf(" -tbd N, --threads-batch-draft N\n"); + printf(" number of threads to use during batch and prompt processing (default: same as --threads-draft)\n"); + printf(" -p PROMPT, --prompt PROMPT\n"); + printf(" prompt to start generation with (default: empty)\n"); + printf(" -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); + printf(" --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n"); + printf(" --prompt-cache-all if specified, saves user input and generations to cache as well.\n"); + printf(" not supported with --interactive or other interactive options\n"); + printf(" --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n"); + printf(" --random-prompt start with a randomized prompt.\n"); + printf(" --in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string\n"); + printf(" --in-prefix STRING string to prefix user inputs with (default: empty)\n"); + printf(" --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); + printf(" -f FNAME, --file FNAME\n"); + printf(" prompt file to start generation.\n"); + printf(" -bf FNAME, --binary-file FNAME\n"); + printf(" binary file containing multiple choice tasks.\n"); + printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); + printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); + printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch); + printf(" -ub N, --ubatch-size N\n"); + printf(" physical maximum batch size (default: %d)\n", params.n_ubatch); + printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n"); + printf(" (default: %s)\n", sampler_type_names.c_str()); + printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str()); + printf(" --top-k N top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k); + printf(" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p); + printf(" --min-p N min-p sampling (default: %.1f, 0.0 = disabled)\n", (double)sparams.min_p); + printf(" --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)sparams.tfs_z); + printf(" --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)sparams.typical_p); + printf(" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", sparams.penalty_last_n); + printf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.penalty_repeat); + printf(" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_present); + printf(" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_freq); + printf(" --dynatemp-range N dynamic temperature range (default: %.1f, 0.0 = disabled)\n", (double)sparams.dynatemp_range); + printf(" --dynatemp-exp N dynamic temperature exponent (default: %.1f)\n", (double)sparams.dynatemp_exponent); + printf(" --mirostat N use Mirostat sampling.\n"); + printf(" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); + printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", sparams.mirostat); + printf(" --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)sparams.mirostat_eta); + printf(" --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)sparams.mirostat_tau); + printf(" -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); + printf(" modifies the likelihood of token appearing in the completion,\n"); + printf(" i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); + printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); + printf(" --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n"); + printf(" --grammar-file FNAME file to read grammar from\n"); + printf(" -j SCHEMA, --json-schema SCHEMA\n"); + printf(" JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object.\n"); + printf(" For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead\n"); + printf(" --cfg-negative-prompt PROMPT\n"); + printf(" negative prompt to use for guidance. (default: empty)\n"); + printf(" --cfg-negative-prompt-file FNAME\n"); + printf(" negative prompt file to use for guidance. (default: empty)\n"); + printf(" --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", sparams.cfg_scale); + printf(" --rope-scaling {none,linear,yarn}\n"); + printf(" RoPE frequency scaling method, defaults to linear unless specified by the model\n"); + printf(" --rope-scale N RoPE context scaling factor, expands context by a factor of N\n"); + printf(" --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)\n"); + printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n"); + printf(" --yarn-orig-ctx N YaRN: original context size of model (default: 0 = model training context size)\n"); + printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation)\n"); + printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n"); + printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow); + printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast); + printf(" --pooling {none,mean,cls}\n"); + printf(" pooling type for embeddings, use model default if unspecified\n"); + printf(" -dt N, --defrag-thold N\n"); + printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold); + printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); + printf(" --penalize-nl penalize newline tokens\n"); + printf(" --temp N temperature (default: %.1f)\n", (double)sparams.temp); + printf(" --all-logits return logits for all tokens in the batch (default: disabled)\n"); + printf(" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n"); + printf(" --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); + printf(" --winogrande compute Winogrande score over random tasks from datafile supplied with -f\n"); + printf(" --winogrande-tasks N number of tasks to use when computing the Winogrande score (default: %zu)\n", params.winogrande_tasks); + printf(" --multiple-choice compute multiple choice score over random tasks from datafile supplied with -f\n"); + printf(" --multiple-choice-tasks N number of tasks to use when computing the multiple choice score (default: %zu)\n", params.winogrande_tasks); + printf(" --kl-divergence computes KL-divergence to logits provided via --kl-divergence-base\n"); + printf(" --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); + printf(" --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); + printf(" --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); + printf(" -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); + printf(" -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); + printf(" -ps N, --p-split N speculative decoding split probability (default: %.1f)\n", (double)params.p_split); + printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n"); + printf(" -fa, --flash-attn enable Flash Attention (default: %s)\n", params.flash_attn ? "enabled" : "disabled"); + printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA. see examples/llava/README.md\n"); + printf(" --image IMAGE_FILE path to an image file. use with multimodal models. Specify multiple times for batching\n"); + if (llama_supports_mlock()) { + printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n"); + } + if (llama_supports_mmap()) { + printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); + } + printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n"); + printf(" - distribute: spread execution evenly over all nodes\n"); + printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n"); + printf(" - numactl: use the CPU map provided by numactl\n"); + printf(" if run without this previously, it is recommended to drop the system page cache before using this\n"); + printf(" see https://github.com/ggerganov/llama.cpp/issues/1437\n"); + if (llama_supports_gpu_offload()) { + printf(" -ngl N, --n-gpu-layers N\n"); + printf(" number of layers to store in VRAM\n"); + printf(" -ngld N, --n-gpu-layers-draft N\n"); + printf(" number of layers to store in VRAM for the draft model\n"); + printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n"); + printf(" how to split the model across multiple GPUs, one of:\n"); + printf(" - none: use one GPU only\n"); + printf(" - layer (default): split layers and KV across GPUs\n"); + printf(" - row: split rows across GPUs\n"); + printf(" -ts SPLIT, --tensor-split SPLIT\n"); + printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n"); + printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); + printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); + } + printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false"); + printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false"); + printf(" -gan N, --grp-attn-n N\n"); + printf(" group-attention factor (default: %d)\n", params.grp_attn_n); + printf(" -gaw N, --grp-attn-w N\n"); + printf(" group-attention width (default: %.1f)\n", (double)params.grp_attn_w); + printf(" -dkvc, --dump-kv-cache\n"); + printf(" verbose print of the KV cache\n"); + printf(" -nkvo, --no-kv-offload\n"); + printf(" disable KV offload\n"); + printf(" -ctk TYPE, --cache-type-k TYPE\n"); + printf(" KV cache data type for K (default: %s)\n", params.cache_type_k.c_str()); + printf(" -ctv TYPE, --cache-type-v TYPE\n"); + printf(" KV cache data type for V (default: %s)\n", params.cache_type_v.c_str()); + printf(" --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n"); + printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); + printf(" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n"); + printf(" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n"); + printf(" --control-vector FNAME\n"); + printf(" add a control vector\n"); + printf(" --control-vector-scaled FNAME S\n"); + printf(" add a control vector with user defined scaling S\n"); + printf(" --control-vector-layer-range START END\n"); + printf(" layer range to apply the control vector(s) to, start and end inclusive\n"); + printf(" -m FNAME, --model FNAME\n"); + printf(" model path (default: models/$filename with filename from --hf-file or --model-url if set, otherwise %s)\n", DEFAULT_MODEL_PATH); + printf(" -md FNAME, --model-draft FNAME\n"); + printf(" draft model for speculative decoding (default: unused)\n"); + printf(" -mu MODEL_URL, --model-url MODEL_URL\n"); + printf(" model download url (default: unused)\n"); + printf(" -hfr REPO, --hf-repo REPO\n"); + printf(" Hugging Face model repository (default: unused)\n"); + printf(" -hff FILE, --hf-file FILE\n"); + printf(" Hugging Face model file (default: unused)\n"); + printf(" -ld LOGDIR, --logdir LOGDIR\n"); + printf(" path under which to save YAML logs (no logging if unset)\n"); + printf(" -lcs FNAME, --lookup-cache-static FNAME\n"); + printf(" path to static lookup cache to use for lookup decoding (not updated by generation)\n"); + printf(" -lcd FNAME, --lookup-cache-dynamic FNAME\n"); + printf(" path to dynamic lookup cache to use for lookup decoding (updated by generation)\n"); + printf(" --override-kv KEY=TYPE:VALUE\n"); + printf(" advanced option to override model metadata by key. may be specified multiple times.\n"); + printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); + printf(" -ptc N, --print-token-count N\n"); + printf(" print token count every N tokens (default: %d)\n", params.n_print); + printf(" --check-tensors check model tensor data for invalid values\n"); + printf("\n"); +#ifndef LOG_DISABLE_LOGS + log_print_usage(); +#endif // LOG_DISABLE_LOGS +} + +std::string get_system_info(const gpt_params & params) { + std::ostringstream os; + + os << "system_info: n_threads = " << params.n_threads; + if (params.n_threads_batch != -1) { + os << " (n_threads_batch = " << params.n_threads_batch << ")"; + } + os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); + + return os.str(); +} + +std::string gpt_random_prompt(std::mt19937 & rng) { + const int r = rng() % 10; + switch (r) { + case 0: return "So"; + case 1: return "Once upon a time"; + case 2: return "When"; + case 3: return "The"; + case 4: return "After"; + case 5: return "If"; + case 6: return "import"; + case 7: return "He"; + case 8: return "She"; + case 9: return "They"; + } + + GGML_UNREACHABLE(); +} + +// Validate if a filename is safe to use +// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function +bool validate_file_name(const std::string & filename) { + if (!filename.length()) { + // Empty filename invalid + return false; + } + if (filename.length() > 255) { + // Limit at common largest possible filename on Linux filesystems + // to avoid unnecessary further validation + // (On systems with smaller limits it will be caught by the OS) + return false; + } + + std::u32string filename_utf32; + try { + std::wstring_convert, char32_t> converter; + filename_utf32 = converter.from_bytes(filename); + + // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, + // or invalid encodings were encountered. Reject such attempts + std::string filename_reencoded = converter.to_bytes(filename_utf32); + if (filename_reencoded != filename) { + return false; + } + } catch (const std::exception &) { + return false; + } + + // Check for forbidden codepoints: + // - Control characters + // - Unicode equivalents of illegal characters + // - UTF-16 surrogate pairs + // - UTF-8 replacement character + // - Byte order mark (BOM) + // - Illegal characters: / \ : * ? " < > | + for (char32_t c : filename_utf32) { + if (c <= 0x1F // Control characters (C0) + || c == 0x7F // Control characters (DEL) + || (c >= 0x80 && c <= 0x9F) // Control characters (C1) + || c == 0xFF0E // Fullwidth Full Stop (period equivalent) + || c == 0x2215 // Division Slash (forward slash equivalent) + || c == 0x2216 // Set Minus (backslash equivalent) + || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs + || c == 0xFFFD // Replacement Character (UTF-8) + || c == 0xFEFF // Byte Order Mark (BOM) + || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters + || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { + return false; + } + } + + // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename + // Unicode and other whitespace is not affected, only 0x20 space + if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') { + return false; + } + + // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead) + if (filename.find("..") != std::string::npos) { + return false; + } + + // Reject "." + if (filename == ".") { + return false; + } + + return true; +} + +// +// String utils +// + +std::vector string_split(std::string input, char separator) { + std::vector parts; + size_t separator_pos = input.find(separator); + while (separator_pos != std::string::npos) { + std::string part = input.substr(0, separator_pos); + parts.emplace_back(part); + input = input.substr(separator_pos + 1); + separator_pos = input.find(separator); + } + parts.emplace_back(input); + return parts; +} + +std::string string_strip(const std::string & str) { + size_t start = 0; + size_t end = str.size(); + while (start < end && std::isspace(str[start])) { + start++; + } + while (end > start && std::isspace(str[end - 1])) { + end--; + } + return str.substr(start, end - start); +} + +std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names) { + std::unordered_map sampler_canonical_name_map { + {"top_k", llama_sampler_type::TOP_K}, + {"top_p", llama_sampler_type::TOP_P}, + {"typical_p", llama_sampler_type::TYPICAL_P}, + {"min_p", llama_sampler_type::MIN_P}, + {"tfs_z", llama_sampler_type::TFS_Z}, + {"temperature", llama_sampler_type::TEMPERATURE} + }; + + // since samplers names are written multiple ways + // make it ready for both system names and input names + std::unordered_map sampler_alt_name_map { + {"top-k", llama_sampler_type::TOP_K}, + {"top-p", llama_sampler_type::TOP_P}, + {"nucleus", llama_sampler_type::TOP_P}, + {"typical-p", llama_sampler_type::TYPICAL_P}, + {"typical", llama_sampler_type::TYPICAL_P}, + {"min-p", llama_sampler_type::MIN_P}, + {"tfs-z", llama_sampler_type::TFS_Z}, + {"tfs", llama_sampler_type::TFS_Z}, + {"temp", llama_sampler_type::TEMPERATURE} + }; + + std::vector sampler_types; + sampler_types.reserve(names.size()); + for (const auto & name : names) + { + auto sampler_item = sampler_canonical_name_map.find(name); + if (sampler_item != sampler_canonical_name_map.end()) + { + sampler_types.push_back(sampler_item->second); + } + else + { + if (allow_alt_names) + { + sampler_item = sampler_alt_name_map.find(name); + if (sampler_item != sampler_alt_name_map.end()) + { + sampler_types.push_back(sampler_item->second); + } + } + } + } + return sampler_types; +} + +std::vector sampler_types_from_chars(const std::string & names_string) { + std::unordered_map sampler_name_map { + {'k', llama_sampler_type::TOP_K}, + {'p', llama_sampler_type::TOP_P}, + {'y', llama_sampler_type::TYPICAL_P}, + {'m', llama_sampler_type::MIN_P}, + {'f', llama_sampler_type::TFS_Z}, + {'t', llama_sampler_type::TEMPERATURE} + }; + + std::vector sampler_types; + sampler_types.reserve(names_string.size()); + for (const auto & c : names_string) { + const auto sampler_item = sampler_name_map.find(c); + if (sampler_item != sampler_name_map.end()) { + sampler_types.push_back(sampler_item->second); + } + } + return sampler_types; +} + +std::string sampler_type_to_name_string(llama_sampler_type sampler_type) { + switch (sampler_type) { + case llama_sampler_type::TOP_K: return "top_k"; + case llama_sampler_type::TFS_Z: return "tfs_z"; + case llama_sampler_type::TYPICAL_P: return "typical_p"; + case llama_sampler_type::TOP_P: return "top_p"; + case llama_sampler_type::MIN_P: return "min_p"; + case llama_sampler_type::TEMPERATURE: return "temperature"; + default : return ""; + } +} + +// +// Model utils +// + +struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params) { + auto mparams = llama_model_default_params(); + + if (params.n_gpu_layers != -1) { + mparams.n_gpu_layers = params.n_gpu_layers; + } + mparams.main_gpu = params.main_gpu; + mparams.split_mode = params.split_mode; + mparams.tensor_split = params.tensor_split; + mparams.use_mmap = params.use_mmap; + mparams.use_mlock = params.use_mlock; + mparams.check_tensors = params.check_tensors; + if (params.kv_overrides.empty()) { + mparams.kv_overrides = NULL; + } else { + GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key"); + mparams.kv_overrides = params.kv_overrides.data(); + } + + return mparams; +} + +static ggml_type kv_cache_type_from_str(const std::string & s) { + if (s == "f32") { + return GGML_TYPE_F32; + } + if (s == "f16") { + return GGML_TYPE_F16; + } + if (s == "q8_0") { + return GGML_TYPE_Q8_0; + } + if (s == "q4_0") { + return GGML_TYPE_Q4_0; + } + if (s == "q4_1") { + return GGML_TYPE_Q4_1; + } + if (s == "iq4_nl") { + return GGML_TYPE_IQ4_NL; + } + if (s == "q5_0") { + return GGML_TYPE_Q5_0; + } + if (s == "q5_1") { + return GGML_TYPE_Q5_1; + } + + throw std::runtime_error("Invalid cache type: " + s); +} + +struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) { + auto cparams = llama_context_default_params(); + + cparams.n_ctx = params.n_ctx; + cparams.n_seq_max = params.n_parallel; + cparams.n_batch = params.n_batch; + cparams.n_ubatch = params.n_ubatch; + cparams.n_threads = params.n_threads; + cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; + cparams.seed = params.seed; + cparams.logits_all = params.logits_all; + cparams.embeddings = params.embedding; + cparams.rope_scaling_type = params.rope_scaling_type; + cparams.rope_freq_base = params.rope_freq_base; + cparams.rope_freq_scale = params.rope_freq_scale; + cparams.yarn_ext_factor = params.yarn_ext_factor; + cparams.yarn_attn_factor = params.yarn_attn_factor; + cparams.yarn_beta_fast = params.yarn_beta_fast; + cparams.yarn_beta_slow = params.yarn_beta_slow; + cparams.yarn_orig_ctx = params.yarn_orig_ctx; + cparams.pooling_type = params.pooling_type; + cparams.defrag_thold = params.defrag_thold; + cparams.cb_eval = params.cb_eval; + cparams.cb_eval_user_data = params.cb_eval_user_data; + cparams.offload_kqv = !params.no_kv_offload; + cparams.flash_attn = params.flash_attn; + + cparams.type_k = kv_cache_type_from_str(params.cache_type_k); + cparams.type_v = kv_cache_type_from_str(params.cache_type_v); + + return cparams; +} + +void llama_batch_clear(struct llama_batch & batch) { + batch.n_tokens = 0; +} + +void llama_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits) { + batch.token [batch.n_tokens] = id; + batch.pos [batch.n_tokens] = pos; + batch.n_seq_id[batch.n_tokens] = seq_ids.size(); + for (size_t i = 0; i < seq_ids.size(); ++i) { + batch.seq_id[batch.n_tokens][i] = seq_ids[i]; + } + batch.logits [batch.n_tokens] = logits; + + batch.n_tokens++; +} + +#ifdef LLAMA_USE_CURL + +static bool starts_with(const std::string & str, const std::string & prefix) { + // While we wait for C++20's std::string::starts_with... + return str.rfind(prefix, 0) == 0; +} + +static bool llama_download_file(const std::string & url, const std::string & path) { + + // Initialize libcurl + std::unique_ptr curl(curl_easy_init(), &curl_easy_cleanup); + if (!curl) { + fprintf(stderr, "%s: error initializing libcurl\n", __func__); + return false; + } + + bool force_download = false; + + // Set the URL, allow to follow http redirection + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); + +#if defined(_WIN32) + // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of + // operating system. Currently implemented under MS-Windows. + curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); +#endif + + // Check if the file already exists locally + struct stat model_file_info; + auto file_exists = (stat(path.c_str(), &model_file_info) == 0); + + // If the file exists, check its JSON metadata companion file. + std::string metadata_path = path + ".json"; + nlohmann::json metadata; + std::string etag; + std::string last_modified; + + if (file_exists) { + // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). + std::ifstream metadata_in(metadata_path); + if (metadata_in.good()) { + try { + metadata_in >> metadata; + fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); + if (metadata.contains("url") && metadata["url"].is_string()) { + auto previous_url = metadata["url"].get(); + if (previous_url != url) { + fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); + return false; + } + } + if (metadata.contains("etag") && metadata["etag"].is_string()) { + etag = metadata["etag"]; + } + if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) { + last_modified = metadata["lastModified"]; + } + } catch (const nlohmann::json::exception & e) { + fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); + return false; + } + } + } else { + fprintf(stderr, "%s: no previous model file found %s\n", __func__, path.c_str()); + } + + // Send a HEAD request to retrieve the etag and last-modified headers + struct llama_load_model_from_url_headers { + std::string etag; + std::string last_modified; + }; + llama_load_model_from_url_headers headers; + { + typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); + auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { + llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata; + + static std::regex header_regex("([^:]+): (.*)\r\n"); + static std::regex etag_regex("ETag", std::regex_constants::icase); + static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); + + std::string header(buffer, n_items); + std::smatch match; + if (std::regex_match(header, match, header_regex)) { + const std::string & key = match[1]; + const std::string & value = match[2]; + if (std::regex_match(key, match, etag_regex)) { + headers->etag = value; + } else if (std::regex_match(key, match, last_modified_regex)) { + headers->last_modified = value; + } + } + return n_items; + }; + + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress + curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); + curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); + + CURLcode res = curl_easy_perform(curl.get()); + if (res != CURLE_OK) { + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); + return false; + } + + long http_code = 0; + curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code != 200) { + // HEAD not supported, we don't know if the file has changed + // force trigger downloading + force_download = true; + fprintf(stderr, "%s: HEAD invalid http status code received: %ld\n", __func__, http_code); + } + } + + bool should_download = !file_exists || force_download; + if (!should_download) { + if (!etag.empty() && etag != headers.etag) { + fprintf(stderr, "%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); + should_download = true; + } else if (!last_modified.empty() && last_modified != headers.last_modified) { + fprintf(stderr, "%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); + should_download = true; + } + } + if (should_download) { + std::string path_temporary = path + ".downloadInProgress"; + if (file_exists) { + fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); + if (remove(path.c_str()) != 0) { + fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str()); + return false; + } + } + + // Set the output file + std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb"), fclose); + if (!outfile) { + fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str()); + return false; + } + + typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); + auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { + return fwrite(data, size, nmemb, (FILE *)fd); + }; + curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); + + // display download progress + curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); + + // helper function to hide password in URL + auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { + std::size_t protocol_pos = url.find("://"); + if (protocol_pos == std::string::npos) { + return url; // Malformed URL + } + + std::size_t at_pos = url.find('@', protocol_pos + 3); + if (at_pos == std::string::npos) { + return url; // No password in URL + } + + return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); + }; + + // start the download + fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, + llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); + auto res = curl_easy_perform(curl.get()); + if (res != CURLE_OK) { + fprintf(stderr, "%s: curl_easy_perform() failed: %s\n", __func__, curl_easy_strerror(res)); + return false; + } + + long http_code = 0; + curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); + if (http_code < 200 || http_code >= 400) { + fprintf(stderr, "%s: invalid http status code received: %ld\n", __func__, http_code); + return false; + } + + // Causes file to be closed explicitly here before we rename it. + outfile.reset(); + + // Write the updated JSON metadata file. + metadata.update({ + {"url", url}, + {"etag", headers.etag}, + {"lastModified", headers.last_modified} + }); + std::ofstream(metadata_path) << metadata.dump(4); + fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); + + if (rename(path_temporary.c_str(), path.c_str()) != 0) { + fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); + return false; + } + } + + return true; +} + +struct llama_model * llama_load_model_from_url( + const char * model_url, + const char * path_model, + const struct llama_model_params & params) { + // Basic validation of the model_url + if (!model_url || strlen(model_url) == 0) { + fprintf(stderr, "%s: invalid model_url\n", __func__); + return NULL; + } + + if (!llama_download_file(model_url, path_model)) { + return NULL; + } + + // check for additional GGUFs split to download + int n_split = 0; + { + struct gguf_init_params gguf_params = { + /*.no_alloc = */ true, + /*.ctx = */ NULL, + }; + auto * ctx_gguf = gguf_init_from_file(path_model, gguf_params); + if (!ctx_gguf) { + fprintf(stderr, "\n%s: failed to load input GGUF from %s\n", __func__, path_model); + return NULL; + } + + auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); + if (key_n_split >= 0) { + n_split = gguf_get_val_u16(ctx_gguf, key_n_split); + } + + gguf_free(ctx_gguf); + } + + if (n_split > 1) { + char split_prefix[PATH_MAX] = {0}; + char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + + // Verify the first split file format + // and extract split URL and PATH prefixes + { + if (!llama_split_prefix(split_prefix, sizeof(split_prefix), path_model, 0, n_split)) { + fprintf(stderr, "\n%s: unexpected model file name: %s" + " n_split=%d\n", __func__, path_model, n_split); + return NULL; + } + + if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url, 0, n_split)) { + fprintf(stderr, "\n%s: unexpected model url: %s" + " n_split=%d\n", __func__, model_url, n_split); + return NULL; + } + } + + // Prepare download in parallel + std::vector> futures_download; + for (int idx = 1; idx < n_split; idx++) { + futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split](int download_idx) -> bool { + char split_path[PATH_MAX] = {0}; + llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); + + char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; + llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); + + return llama_download_file(split_url, split_path); + }, idx)); + } + + // Wait for all downloads to complete + for (auto & f : futures_download) { + if (!f.get()) { + return NULL; + } + } + } + + return llama_load_model_from_file(path_model, params); +} + +struct llama_model * llama_load_model_from_hf( + const char * repo, + const char * model, + const char * path_model, + const struct llama_model_params & params) { + // construct hugging face model url: + // + // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf + // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf + // + // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf + // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf + // + + std::string model_url = "https://huggingface.co/"; + model_url += repo; + model_url += "/resolve/main/"; + model_url += model; + + return llama_load_model_from_url(model_url.c_str(), path_model, params); +} + +#else + +struct llama_model * llama_load_model_from_url( + const char * /*model_url*/, + const char * /*path_model*/, + const struct llama_model_params & /*params*/) { + fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); + return nullptr; +} + +struct llama_model * llama_load_model_from_hf( + const char * /*repo*/, + const char * /*model*/, + const char * /*path_model*/, + const struct llama_model_params & /*params*/) { + fprintf(stderr, "%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); + return nullptr; +} + +#endif // LLAMA_USE_CURL + +std::tuple llama_init_from_gpt_params(gpt_params & params) { + auto mparams = llama_model_params_from_gpt_params(params); + + llama_model * model = nullptr; + + if (!params.hf_repo.empty() && !params.hf_file.empty()) { + model = llama_load_model_from_hf(params.hf_repo.c_str(), params.hf_file.c_str(), params.model.c_str(), mparams); + } else if (!params.model_url.empty()) { + model = llama_load_model_from_url(params.model_url.c_str(), params.model.c_str(), mparams); + } else { + model = llama_load_model_from_file(params.model.c_str(), mparams); + } + + if (model == NULL) { + fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); + return std::make_tuple(nullptr, nullptr); + } + + auto cparams = llama_context_params_from_gpt_params(params); + + llama_context * lctx = llama_new_context_with_model(model, cparams); + if (lctx == NULL) { + fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str()); + llama_free_model(model); + return std::make_tuple(nullptr, nullptr); + } + + if (!params.control_vectors.empty()) { + if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1; + if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model); + + const auto cvec = llama_control_vector_load(params.control_vectors); + if (cvec.n_embd == -1) { + llama_free(lctx); + llama_free_model(model); + return std::make_tuple(nullptr, nullptr); + } + + int err = llama_control_vector_apply(lctx, + cvec.data.data(), + cvec.data.size(), + cvec.n_embd, + params.control_vector_layer_start, + params.control_vector_layer_end); + if (err) { + llama_free(lctx); + llama_free_model(model); + return std::make_tuple(nullptr, nullptr); + } + } + + for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) { + const std::string & lora_adapter = std::get<0>(params.lora_adapter[i]); + float lora_scale = std::get<1>(params.lora_adapter[i]); + int err = llama_model_apply_lora_from_file(model, + lora_adapter.c_str(), + lora_scale, + ((i > 0) || params.lora_base.empty()) + ? NULL + : params.lora_base.c_str(), + params.n_threads); + if (err != 0) { + fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__); + llama_free(lctx); + llama_free_model(model); + return std::make_tuple(nullptr, nullptr); + } + } + + if (params.ignore_eos) { + params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; + } + + if (params.warmup) { + LOG("warming up the model with an empty run\n"); + + std::vector tmp = { llama_token_bos(model), llama_token_eos(model), }; + llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0)); + llama_kv_cache_clear(lctx); + llama_synchronize(lctx); + llama_reset_timings(lctx); + } + + return std::make_tuple(model, lctx); +} + +// +// Vocab utils +// + +std::vector llama_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special) { + return llama_tokenize(llama_get_model(ctx), text, add_special, parse_special); +} + +std::vector llama_tokenize( + const struct llama_model * model, + const std::string & text, + bool add_special, + bool parse_special) { + // upper limit for the number of tokens + int n_tokens = text.length() + 2 * add_special; + std::vector result(n_tokens); + n_tokens = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_tokenize(model, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + return result; +} + +std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) { + std::vector result(8, 0); + const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special); + if (n_tokens < 0) { + result.resize(-n_tokens); + int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special); + GGML_ASSERT(check == -n_tokens); + } else { + result.resize(n_tokens); + } + + return std::string(result.data(), result.size()); +} + +std::string llama_detokenize_spm(llama_context * ctx, const std::vector & tokens) { + const llama_token bos_id = llama_token_bos(llama_get_model(ctx)); + + std::string piece; + std::string result; + + for (size_t i = 0; i < tokens.size(); ++i) { + piece = llama_token_to_piece(ctx, tokens[i]); + + // remove the leading space of the first non-BOS token + if (((tokens[0] == bos_id && i == 1) || (tokens[0] != bos_id && i == 0)) && piece[0] == ' ') { + piece = piece.substr(1); + } + + result += piece; + } + + return result; +} + +std::string llama_detokenize_bpe(llama_context * ctx, const std::vector & tokens) { + std::string piece; + std::string result; + + for (size_t i = 0; i < tokens.size(); ++i) { + piece = llama_token_to_piece(ctx, tokens[i]); + + result += piece; + } + + // NOTE: the original tokenizer decodes bytes after collecting the pieces. + return result; +} + +bool llama_should_add_bos_token(const llama_model * model) { + const int add_bos = llama_add_bos_token(model); + + return add_bos != -1 ? bool(add_bos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM); +} + +// +// YAML utils +// + +// returns true if successful, false otherwise +bool create_directory_with_parents(const std::string & path) { +#ifdef _WIN32 + std::wstring_convert> converter; + std::wstring wpath = converter.from_bytes(path); + + // if the path already exists, check whether it's a directory + const DWORD attributes = GetFileAttributesW(wpath.c_str()); + if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return true; + } + + size_t pos_slash = 0; + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { + const std::wstring subpath = wpath.substr(0, pos_slash); + const wchar_t * test = subpath.c_str(); + + const bool success = CreateDirectoryW(test, NULL); + if (!success) { + const DWORD error = GetLastError(); + + // if the path already exists, ensure that it's a directory + if (error == ERROR_ALREADY_EXISTS) { + const DWORD attributes = GetFileAttributesW(subpath.c_str()); + if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { + return false; + } + } else { + return false; + } + } + + pos_slash += 1; + } + + return true; +#else + // if the path already exists, check whether it's a directory + struct stat info; + if (stat(path.c_str(), &info) == 0) { + return S_ISDIR(info.st_mode); + } + + size_t pos_slash = 1; // skip leading slashes for directory creation + + // process path from front to back, procedurally creating directories + while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { + const std::string subpath = path.substr(0, pos_slash); + struct stat info; + + // if the path already exists, ensure that it's a directory + if (stat(subpath.c_str(), &info) == 0) { + if (!S_ISDIR(info.st_mode)) { + return false; + } + } else { + // create parent directories + const int ret = mkdir(subpath.c_str(), 0755); + if (ret != 0) { + return false; + } + } + + pos_slash += 1; + } + + return true; +#endif // _WIN32 +} + +void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector & data) { + if (data.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + fprintf(stream, "%s: [", prop_name); + for (size_t i = 0; i < data.size() - 1; ++i) { + fprintf(stream, "%e, ", data[i]); + } + fprintf(stream, "%e]\n", data.back()); +} + +void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector & data) { + if (data.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + fprintf(stream, "%s: [", prop_name); + for (size_t i = 0; i < data.size() - 1; ++i) { + fprintf(stream, "%d, ", data[i]); + } + fprintf(stream, "%d]\n", data.back()); +} + +void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data) { + std::string data_str(data == NULL ? "" : data); + + if (data_str.empty()) { + fprintf(stream, "%s:\n", prop_name); + return; + } + + size_t pos_start = 0; + size_t pos_found = 0; + + if (!data_str.empty() && (std::isspace(data_str[0]) || std::isspace(data_str.back()))) { + data_str = std::regex_replace(data_str, std::regex("\n"), "\\n"); + data_str = std::regex_replace(data_str, std::regex("\""), "\\\""); + data_str = std::regex_replace(data_str, std::regex(R"(\\[^n"])"), R"(\$&)"); + data_str = "\"" + data_str + "\""; + fprintf(stream, "%s: %s\n", prop_name, data_str.c_str()); + return; + } + + if (data_str.find('\n') == std::string::npos) { + fprintf(stream, "%s: %s\n", prop_name, data_str.c_str()); + return; + } + + fprintf(stream, "%s: |\n", prop_name); + while ((pos_found = data_str.find('\n', pos_start)) != std::string::npos) { + fprintf(stream, " %s\n", data_str.substr(pos_start, pos_found-pos_start).c_str()); + pos_start = pos_found + 1; + } +} + +std::string get_sortable_timestamp() { + using clock = std::chrono::system_clock; + + const clock::time_point current_time = clock::now(); + const time_t as_time_t = clock::to_time_t(current_time); + char timestamp_no_ns[100]; + std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t)); + + const int64_t ns = std::chrono::duration_cast( + current_time.time_since_epoch() % 1000000000).count(); + char timestamp_ns[11]; + snprintf(timestamp_ns, 11, "%09" PRId64, ns); + + return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); +} + +void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const llama_context * lctx, + const std::string & timestamp, const std::vector & prompt_tokens, const char * model_desc) { + const llama_sampling_params & sparams = params.sparams; + + fprintf(stream, "build_commit: %s\n", LLAMA_COMMIT); + fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER); + fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false"); + fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false"); + fprintf(stream, "cpu_has_avx_vnni: %s\n", ggml_cpu_has_avx_vnni() ? "true" : "false"); + fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false"); + fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false"); + fprintf(stream, "cpu_has_cuda: %s\n", ggml_cpu_has_cuda() ? "true" : "false"); + fprintf(stream, "cpu_has_vulkan: %s\n", ggml_cpu_has_vulkan() ? "true" : "false"); + fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false"); + fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false"); + fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false"); + fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false"); + fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false"); + fprintf(stream, "cpu_has_f16c: %s\n", ggml_cpu_has_f16c() ? "true" : "false"); + fprintf(stream, "cpu_has_fp16_va: %s\n", ggml_cpu_has_fp16_va() ? "true" : "false"); + fprintf(stream, "cpu_has_wasm_simd: %s\n", ggml_cpu_has_wasm_simd() ? "true" : "false"); + fprintf(stream, "cpu_has_blas: %s\n", ggml_cpu_has_blas() ? "true" : "false"); + fprintf(stream, "cpu_has_sse3: %s\n", ggml_cpu_has_sse3() ? "true" : "false"); + fprintf(stream, "cpu_has_vsx: %s\n", ggml_cpu_has_vsx() ? "true" : "false"); + fprintf(stream, "cpu_has_matmul_int8: %s\n", ggml_cpu_has_matmul_int8() ? "true" : "false"); + +#ifdef NDEBUG + fprintf(stream, "debug: false\n"); +#else + fprintf(stream, "debug: true\n"); +#endif // NDEBUG + + fprintf(stream, "model_desc: %s\n", model_desc); + fprintf(stream, "n_vocab: %d # output size of the final layer, 32001 for some models\n", llama_n_vocab(llama_get_model(lctx))); + +#ifdef __OPTIMIZE__ + fprintf(stream, "optimize: true\n"); +#else + fprintf(stream, "optimize: false\n"); +#endif // __OPTIMIZE__ + + fprintf(stream, "time: %s\n", timestamp.c_str()); + + fprintf(stream, "\n"); + fprintf(stream, "###############\n"); + fprintf(stream, "# User Inputs #\n"); + fprintf(stream, "###############\n"); + fprintf(stream, "\n"); + + fprintf(stream, "alias: %s # default: unknown\n", params.model_alias.c_str()); + fprintf(stream, "batch_size: %d # default: 512\n", params.n_batch); + dump_string_yaml_multiline(stream, "cfg_negative_prompt", sparams.cfg_negative_prompt.c_str()); + fprintf(stream, "cfg_scale: %f # default: 1.0\n", sparams.cfg_scale); + fprintf(stream, "chunks: %d # default: -1 (unlimited)\n", params.n_chunks); + fprintf(stream, "color: %s # default: false\n", params.use_color ? "true" : "false"); + fprintf(stream, "ctx_size: %d # default: 512\n", params.n_ctx); + fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false"); + fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n"); + fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq); + dump_string_yaml_multiline(stream, "grammar", sparams.grammar.c_str()); + fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n"); + fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false"); + fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks); + + const auto logit_bias_eos = sparams.logit_bias.find(llama_token_eos(llama_get_model(lctx))); + const bool ignore_eos = logit_bias_eos != sparams.logit_bias.end() && logit_bias_eos->second == -INFINITY; + fprintf(stream, "ignore_eos: %s # default: false\n", ignore_eos ? "true" : "false"); + + dump_string_yaml_multiline(stream, "in_prefix", params.input_prefix.c_str()); + fprintf(stream, "in_prefix_bos: %s # default: false\n", params.input_prefix_bos ? "true" : "false"); + dump_string_yaml_multiline(stream, "in_suffix", params.input_prefix.c_str()); + fprintf(stream, "instruct: %s # default: false\n", params.instruct ? "true" : "false"); + fprintf(stream, "interactive: %s # default: false\n", params.interactive ? "true" : "false"); + fprintf(stream, "interactive_first: %s # default: false\n", params.interactive_first ? "true" : "false"); + fprintf(stream, "keep: %d # default: 0\n", params.n_keep); + fprintf(stream, "logdir: %s # default: unset (no logging)\n", params.logdir.c_str()); + + fprintf(stream, "logit_bias:\n"); + for (std::pair lb : sparams.logit_bias) { + if (ignore_eos && lb.first == logit_bias_eos->first) { + continue; + } + fprintf(stream, " %d: %f", lb.first, lb.second); + } + + fprintf(stream, "lora:\n"); + for (std::tuple la : params.lora_adapter) { + if (std::get<1>(la) != 1.0f) { + continue; + } + fprintf(stream, " - %s\n", std::get<0>(la).c_str()); + } + fprintf(stream, "lora_scaled:\n"); + for (std::tuple la : params.lora_adapter) { + if (std::get<1>(la) == 1.0f) { + continue; + } + fprintf(stream, " - %s: %f\n", std::get<0>(la).c_str(), std::get<1>(la)); + } + fprintf(stream, "lora_base: %s\n", params.lora_base.c_str()); + fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu); + fprintf(stream, "min_keep: %d # default: 0 (disabled)\n", sparams.min_keep); + fprintf(stream, "mirostat: %d # default: 0 (disabled)\n", sparams.mirostat); + fprintf(stream, "mirostat_ent: %f # default: 5.0\n", sparams.mirostat_tau); + fprintf(stream, "mirostat_lr: %f # default: 0.1\n", sparams.mirostat_eta); + fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false"); + fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH); + fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str()); + fprintf(stream, "multiline_input: %s # default: false\n", params.multiline_input ? "true" : "false"); + fprintf(stream, "n_gpu_layers: %d # default: -1\n", params.n_gpu_layers); + fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict); + fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs); + fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false"); + fprintf(stream, "penalize_nl: %s # default: false\n", sparams.penalize_nl ? "true" : "false"); + fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type); + fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride); + fprintf(stream, "presence_penalty: %f # default: 0.0\n", sparams.penalty_present); + dump_string_yaml_multiline(stream, "prompt", params.prompt.c_str()); + fprintf(stream, "prompt_cache: %s\n", params.path_prompt_cache.c_str()); + fprintf(stream, "prompt_cache_all: %s # default: false\n", params.prompt_cache_all ? "true" : "false"); + fprintf(stream, "prompt_cache_ro: %s # default: false\n", params.prompt_cache_ro ? "true" : "false"); + dump_vector_int_yaml(stream, "prompt_tokens", prompt_tokens); + fprintf(stream, "random_prompt: %s # default: false\n", params.random_prompt ? "true" : "false"); + fprintf(stream, "repeat_penalty: %f # default: 1.1\n", sparams.penalty_repeat); + + fprintf(stream, "reverse_prompt:\n"); + for (std::string ap : params.antiprompt) { + size_t pos = 0; + while ((pos = ap.find('\n', pos)) != std::string::npos) { + ap.replace(pos, 1, "\\n"); + pos += 1; + } + + fprintf(stream, " - %s\n", ap.c_str()); + } + + fprintf(stream, "rope_freq_base: %f # default: 10000.0\n", params.rope_freq_base); + fprintf(stream, "rope_freq_scale: %f # default: 1.0\n", params.rope_freq_scale); + fprintf(stream, "seed: %u # default: -1 (random seed)\n", params.seed); + fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false"); + fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false"); + fprintf(stream, "flash_attn: %s # default: false\n", params.flash_attn ? "true" : "false"); + fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp); + + const std::vector tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices()); + dump_vector_float_yaml(stream, "tensor_split", tensor_split_vector); + + fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z); + fprintf(stream, "threads: %d # default: %u\n", params.n_threads, std::thread::hardware_concurrency()); + fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k); + fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p); + fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p); + fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p); + fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false"); + fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false"); +} + +// +// KV cache utils +// + +void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + int seq_count = 0; + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { seq_count++; } + } + putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); + } + + printf("\n=== Done dumping\n"); +} + +void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) { + static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + + printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", + view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); + + std::unordered_map seqs; + llama_kv_cache_view_cell * c_curr = view.cells; + llama_seq_id * cs_curr = view.cells_sequences; + + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] < 0) { continue; } + if (seqs.find(cs_curr[j]) == seqs.end()) { + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + const size_t sz = seqs.size(); + seqs[cs_curr[j]] = sz; + } + } + if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } + } + + printf("=== Sequence legend: "); + for (const auto & it : seqs) { + printf("%zu=%d, ", it.second, it.first); + } + printf("'+'=other sequence ids"); + + c_curr = view.cells; + cs_curr = view.cells_sequences; + for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { + if (i % row_size == 0) { + printf("\n%5d: ", i); + } + for (int j = 0; j < view.n_seq_max; j++) { + if (cs_curr[j] >= 0) { + const auto & it = seqs.find(cs_curr[j]); + putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); + } else { + putchar('.'); + } + } + putchar(' '); + } + + printf("\n=== Done dumping\n"); +} + +void llama_embd_normalize(const float * inp, float * out, int n) { + double sum = 0.0; + for (int i = 0; i < n; i++) { + sum += inp[i] * inp[i]; + } + sum = sqrt(sum); + + const float norm = sum > 0.0 ? 1.0f / sum : 0.0f; + + for (int i = 0; i < n; i++) { + out[i] = inp[i] * norm; + } +} + +float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){ + double sum = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + + for (int i = 0; i < n; i++) { + sum += embd1[i] * embd2[i]; + sum1 += embd1[i] * embd1[i]; + sum2 += embd2[i] * embd2[i]; + } + + return sum / (sqrt(sum1) * sqrt(sum2)); +} + +// +// Control vector utils +// + +static llama_control_vector_data llama_control_vector_load_one(const llama_control_vector_load_info & load_info) { + int32_t n_tensors; + + size_t n_bytes = 0; + + uint32_t max_direction_layer = 0; + + llama_control_vector_data result = { -1, {} }; + + // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer + { + struct ggml_init_params meta_params = { + /* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(), + /* .mem_buffer = */ nullptr, + /* .no_alloc = */ true, + }; + ggml_context * meta_ctx = ggml_init(meta_params); + struct gguf_init_params meta_gguf_params = { + /* .no_alloc = */ true, + /* .ctx = */ &meta_ctx, + }; + struct gguf_context * meta_ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); + if (!meta_ctx_gguf) { + fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str()); + ggml_free(meta_ctx); + return result; + } + + n_tensors = gguf_get_n_tensors(meta_ctx_gguf); + for (int i = 0; i < n_tensors; i++) { + std::string name = gguf_get_tensor_name(meta_ctx_gguf, i); + + // split on '.' + size_t dotpos = name.find('.'); + if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { + try { + uint32_t layer = std::stoi(name.substr(dotpos + 1)); + if (layer == 0) { + fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); + ggml_free(meta_ctx); + gguf_free(meta_ctx_gguf); + return result; + } + if (layer > max_direction_layer) { + max_direction_layer = layer; + } + } catch (...) { + fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); + ggml_free(meta_ctx); + gguf_free(meta_ctx_gguf); + return result; + } + } + + struct ggml_tensor * tensor_meta = ggml_get_tensor(meta_ctx, name.c_str()); + if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims(tensor_meta) != 1) { + fprintf(stderr, "%s: direction tensor invalid in %s\n", __func__, load_info.fname.c_str()); + ggml_free(meta_ctx); + gguf_free(meta_ctx_gguf); + return result; + } + if (result.n_embd == -1) { + result.n_embd = ggml_nelements(tensor_meta); + } else if (ggml_nelements(tensor_meta) != result.n_embd) { + fprintf(stderr, "%s: direction tensor sizes mismatched in %s\n", __func__, load_info.fname.c_str()); + ggml_free(meta_ctx); + gguf_free(meta_ctx_gguf); + return result; + } + n_bytes += ggml_nbytes(tensor_meta); + } + ggml_free(meta_ctx); + gguf_free(meta_ctx_gguf); + } + + if (n_tensors == 0) { + fprintf(stderr, "%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); + return result; + } + + // load and scale tensors into final control vector context + struct ggml_init_params ggml_params = { + /* .mem_size = */ ggml_tensor_overhead() * n_tensors + n_bytes, + /* .mem_buffer = */ nullptr, + /* .no_alloc = */ false, + }; + struct ggml_context * ctx = ggml_init(ggml_params); + + struct gguf_init_params params = { + /*.no_alloc = */ false, + /*.ctx = */ &ctx, + }; + struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), params); + if (!ctx_gguf) { + fprintf(stderr, "%s: failed to load control vector from %s\n", __func__, load_info.fname.c_str()); + ggml_free(ctx); + return result; + } + + // do not store data for layer 0 (it's not used) + result.data.resize(result.n_embd * max_direction_layer); + + for (uint32_t il = 1; il <= max_direction_layer; il++) { + const std::string name = "direction." + std::to_string(il); + const ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); + + float * dst = result.data.data() + result.n_embd * (il - 1); + + if (tensor) { + const float * src = (const float *) tensor->data; + for (int j = 0; j < result.n_embd; j++) { + dst[j] = src[j] * load_info.strength; + } + } else { + for (int j = 0; j < result.n_embd; j++) { + dst[j] = 0.0f; + } + } + } + + return result; +} + +llama_control_vector_data llama_control_vector_load(const std::vector & load_infos) { + llama_control_vector_data result = { -1, {} }; + + for (const auto & info : load_infos) { + auto cur = llama_control_vector_load_one(info); + + if (cur.n_embd == -1) { + return result; + } + if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data.size() != cur.data.size())) { + fprintf(stderr, "%s: control vector in %s does not match previous vector dimensions\n", __func__, info.fname.c_str()); + return result; + } + + if (result.n_embd == -1) { + result = std::move(cur); + } else { + for (size_t i = 0; i < cur.data.size(); i++) { + result.data[i] += cur.data[i]; + } + } + } + + if (result.n_embd == -1) { + fprintf(stderr, "%s: no vectors passed\n", __func__); + } + + return result; +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/common.h b/llama-cpp-python/vendor/llama.cpp/common/common.h new file mode 100644 index 0000000000000000000000000000000000000000..9252a4b63889b395d6fa0fe14a358da6045af07e --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/common.h @@ -0,0 +1,334 @@ +// Various helper functions and utilities + +#pragma once + +#include "llama.h" + +#include "sampling.h" + +#define LOG_NO_FILE_LINE_FUNCTION +#include "log.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define DIRECTORY_SEPARATOR '\\' +#else +#define DIRECTORY_SEPARATOR '/' +#endif // _WIN32 + +#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) +#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) + +#define print_build_info() do { \ + fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \ + fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ +} while(0) + +#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" + +// build info +extern int LLAMA_BUILD_NUMBER; +extern char const *LLAMA_COMMIT; +extern char const *LLAMA_COMPILER; +extern char const *LLAMA_BUILD_TARGET; + +struct llama_control_vector_load_info; + +int get_math_cpu_count(); +int32_t get_num_physical_cores(); + +// +// CLI argument parsing +// + +struct gpt_params { + uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed + + int32_t n_threads = get_math_cpu_count(); + int32_t n_threads_draft = -1; + int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads) + int32_t n_threads_batch_draft = -1; + int32_t n_predict = -1; // new tokens to predict + int32_t n_ctx = 512; // context size + int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) + int32_t n_keep = 0; // number of tokens to keep from initial prompt + int32_t n_draft = 5; // number of tokens to draft during speculative decoding + int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) + int32_t n_parallel = 1; // number of parallel sequences to decode + int32_t n_sequences = 1; // number of sequences to decode + float p_split = 0.1f; // speculative decoding split probability + int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) + int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default) + llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs + int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors + float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs + int32_t n_beams = 0; // if non-zero then use beam search of given width. + int32_t grp_attn_n = 1; // group-attention factor + int32_t grp_attn_w = 512; // group-attention width + int32_t n_print = -1; // print token count every n tokens (-1 = disabled) + float rope_freq_base = 0.0f; // RoPE base frequency + float rope_freq_scale = 0.0f; // RoPE frequency scaling factor + float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor + float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor + float yarn_beta_fast = 32.0f; // YaRN low correction dim + float yarn_beta_slow = 1.0f; // YaRN high correction dim + int32_t yarn_orig_ctx = 0; // YaRN original context length + float defrag_thold = -1.0f; // KV cache defragmentation threshold + + ggml_backend_sched_eval_callback cb_eval = nullptr; + void * cb_eval_user_data = nullptr; + + ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; + + enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; + enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings + + // // sampling parameters + struct llama_sampling_params sparams; + + std::string model = ""; // model path + std::string model_draft = ""; // draft model for speculative decoding + std::string model_alias = "unknown"; // model alias + std::string model_url = ""; // model url to download + std::string hf_repo = ""; // HF repo + std::string hf_file = ""; // HF file + std::string prompt = ""; + std::string prompt_file = ""; // store the external prompt file name + std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state + std::string input_prefix = ""; // string to prefix user inputs with + std::string input_suffix = ""; // string to suffix user inputs with + std::vector antiprompt; // string upon seeing which more user input is prompted + std::string logdir = ""; // directory in which to save YAML log files + std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding + std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding + std::string logits_file = ""; // file for saving *all* logits + + std::vector kv_overrides; + + // TODO: avoid tuple, use struct + std::vector> lora_adapter; // lora adapter path with user defined scale + std::string lora_base = ""; // base model path for the lora adapter + + std::vector control_vectors; // control vector with user defined scale + + int32_t control_vector_layer_start = -1; // layer range for control vector + int32_t control_vector_layer_end = -1; // layer range for control vector + + int ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. + int ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line + // (which is more convenient to use for plotting) + // + bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt + size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score + + bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt + size_t winogrande_tasks= 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed + + bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt + size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed + + bool kl_divergence = false; // compute KL divergence + + bool random_prompt = false; // do not randomize prompt if none provided + bool use_color = false; // use color to distinguish generations and inputs + bool interactive = false; // interactive mode + bool chatml = false; // chatml mode (used for models trained on chatml syntax) + bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it + + bool embedding = false; // get only sentence embedding + bool escape = false; // escape "\n", "\r", "\t", "\'", "\"", and "\\" + bool interactive_first = false; // wait for user input immediately + bool multiline_input = false; // reverse the usage of `\` + bool simple_io = false; // improves compatibility with subprocesses and limited consoles + bool cont_batching = true; // insert new sequences for decoding on-the-fly + bool flash_attn = false; // flash attention + + bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix + bool ignore_eos = false; // ignore generated EOS tokens + bool instruct = false; // instruction mode (used for Alpaca models) + bool logits_all = false; // return logits for all tokens in the batch + bool use_mmap = true; // use mmap for faster loads + bool use_mlock = false; // use mlock to keep model in memory + bool verbose_prompt = false; // print prompt tokens before generation + bool display_prompt = true; // print prompt before generation + bool infill = false; // use infill mode + bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes + bool no_kv_offload = false; // disable KV offloading + bool warmup = true; // warmup run + bool check_tensors = false; // validate tensor data + + std::string cache_type_k = "f16"; // KV cache data type for the K + std::string cache_type_v = "f16"; // KV cache data type for the V + + // multimodal models (see examples/llava) + std::string mmproj = ""; // path to multimodal projector + std::vector image; // path to image file(s) +}; + +void gpt_params_handle_model_default(gpt_params & params); + +bool parse_kv_override(const char * data, std::vector & overrides); + +bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params); + +bool gpt_params_parse(int argc, char ** argv, gpt_params & params); + +void gpt_print_usage(int argc, char ** argv, const gpt_params & params); + +bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param); + +std::string get_system_info(const gpt_params & params); + +std::string gpt_random_prompt(std::mt19937 & rng); + +void process_escapes(std::string& input); + +bool validate_file_name(const std::string & filename); + +// +// String utils +// + +std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names); +std::vector sampler_types_from_chars(const std::string & names_string); +std::vector string_split(std::string input, char separator); +std::string string_strip(const std::string & str); +std::string sampler_type_to_name_string(llama_sampler_type sampler_type); + +// +// Model utils +// + +// TODO: avoid tuplue, use struct +std::tuple llama_init_from_gpt_params(gpt_params & params); + +struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params); +struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); + +struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params); +struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params); + +// Batch utils + +void llama_batch_clear(struct llama_batch & batch); + +void llama_batch_add( + struct llama_batch & batch, + llama_token id, + llama_pos pos, + const std::vector & seq_ids, + bool logits); + +// +// Vocab utils +// + +// tokenizes a string into a vector of tokens +// should work similar to Python's `tokenizer.encode` +std::vector llama_tokenize( + const struct llama_context * ctx, + const std::string & text, + bool add_special, + bool parse_special = false); + +std::vector llama_tokenize( + const struct llama_model * model, + const std::string & text, + bool add_special, + bool parse_special = false); + +// tokenizes a token into a piece, optionally renders special/control tokens +// should work similar to Python's `tokenizer.id_to_piece` +std::string llama_token_to_piece( + const struct llama_context * ctx, + llama_token token, + bool special = true); + +// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function +// that takes into account the tokenizer type and decides how to handle the leading space +// +// detokenizes a vector of tokens into a string +// should work similar to Python's `tokenizer.decode` +// removes the leading space from the first non-BOS token +std::string llama_detokenize_spm( + llama_context * ctx, + const std::vector & tokens); + +// detokenizes a vector of tokens into a string +// should work similar to Python's `tokenizer.decode` +std::string llama_detokenize_bpe( + llama_context * ctx, + const std::vector & tokens); + +// Uses the value from the model metadata if possible, otherwise +// defaults to true when model type is SPM, otherwise false. +bool llama_should_add_bos_token(const llama_model * model); + +// +// YAML utils +// + +bool create_directory_with_parents(const std::string & path); +void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector & data); +void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector & data); +void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data); +std::string get_sortable_timestamp(); + +void dump_non_result_info_yaml( + FILE * stream, const gpt_params & params, const llama_context * lctx, + const std::string & timestamp, const std::vector & prompt_tokens, const char * model_desc); + +// +// KV cache utils +// + +// Dump the KV cache view with the number of sequences per cell. +void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80); + +// Dump the KV cache view showing individual sequences in each cell (long output). +void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40); + +// +// Embedding utils +// + +void llama_embd_normalize(const float * inp, float * out, int n); + +float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n); + +// +// Control vector utils +// + +struct llama_control_vector_data { + int n_embd; + + // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd + std::vector data; +}; + +struct llama_control_vector_load_info { + float strength; + + std::string fname; +}; + +// Load control vectors, scale each by strength, and add them together. +// On error, returns {-1, empty} +llama_control_vector_data llama_control_vector_load(const std::vector & load_infos); + +// +// Split utils +// +static const char * const LLM_KV_SPLIT_NO = "split.no"; +static const char * const LLM_KV_SPLIT_COUNT = "split.count"; +static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"; diff --git a/llama-cpp-python/vendor/llama.cpp/common/console.cpp b/llama-cpp-python/vendor/llama.cpp/common/console.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f65cbc6eda0b1d1e4f45ab976fb8868be33b6c79 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/console.cpp @@ -0,0 +1,501 @@ +#include "console.h" +#include +#include + +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#include +#include +#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING +#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 +#endif +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" +#define ANSI_BOLD "\x1b[1m" + +namespace console { + + // + // Console state + // + + static bool advanced_display = false; + static bool simple_io = true; + static display_t current_display = reset; + + static FILE* out = stdout; + +#if defined (_WIN32) + static void* hConsole; +#else + static FILE* tty = nullptr; + static termios initial_state; +#endif + + // + // Init and cleanup + // + + void init(bool use_simple_io, bool use_advanced_display) { + advanced_display = use_advanced_display; + simple_io = use_simple_io; +#if defined(_WIN32) + // Windows-specific console initialization + DWORD dwMode = 0; + hConsole = GetStdHandle(STD_OUTPUT_HANDLE); + if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) { + hConsole = GetStdHandle(STD_ERROR_HANDLE); + if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) { + hConsole = nullptr; + simple_io = true; + } + } + if (hConsole) { + // Check conditions combined to reduce nesting + if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) && + !SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { + advanced_display = false; + } + // Set console output codepage to UTF8 + SetConsoleOutputCP(CP_UTF8); + } + HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); + if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { + // Set console input codepage to UTF16 + _setmode(_fileno(stdin), _O_WTEXT); + + // Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) + if (simple_io) { + dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT; + } else { + dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); + } + if (!SetConsoleMode(hConIn, dwMode)) { + simple_io = true; + } + } +#else + // POSIX-specific console initialization + if (!simple_io) { + struct termios new_termios; + tcgetattr(STDIN_FILENO, &initial_state); + new_termios = initial_state; + new_termios.c_lflag &= ~(ICANON | ECHO); + new_termios.c_cc[VMIN] = 1; + new_termios.c_cc[VTIME] = 0; + tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); + + tty = fopen("/dev/tty", "w+"); + if (tty != nullptr) { + out = tty; + } + } + + setlocale(LC_ALL, ""); +#endif + } + + void cleanup() { + // Reset console display + set_display(reset); + +#if !defined(_WIN32) + // Restore settings on POSIX systems + if (!simple_io) { + if (tty != nullptr) { + out = stdout; + fclose(tty); + tty = nullptr; + } + tcsetattr(STDIN_FILENO, TCSANOW, &initial_state); + } +#endif + } + + // + // Display and IO + // + + // Keep track of current display and only emit ANSI code if it changes + void set_display(display_t display) { + if (advanced_display && current_display != display) { + fflush(stdout); + switch(display) { + case reset: + fprintf(out, ANSI_COLOR_RESET); + break; + case prompt: + fprintf(out, ANSI_COLOR_YELLOW); + break; + case user_input: + fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); + break; + case error: + fprintf(out, ANSI_BOLD ANSI_COLOR_RED); + } + current_display = display; + fflush(out); + } + } + + static char32_t getchar32() { +#if defined(_WIN32) + HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); + wchar_t high_surrogate = 0; + + while (true) { + INPUT_RECORD record; + DWORD count; + if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { + return WEOF; + } + + if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { + wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; + if (wc == 0) { + continue; + } + + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + high_surrogate = wc; + continue; + } + if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate + if (high_surrogate != 0) { // Check if we have a high surrogate + return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; + } + } + + high_surrogate = 0; // Reset the high surrogate + return static_cast(wc); + } + } +#else + wchar_t wc = getwchar(); + if (static_cast(wc) == WEOF) { + return WEOF; + } + +#if WCHAR_MAX == 0xFFFF + if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate + wchar_t low_surrogate = getwchar(); + if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate + return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; + } + } + if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair + return 0xFFFD; // Return the replacement character U+FFFD + } +#endif + + return static_cast(wc); +#endif + } + + static void pop_cursor() { +#if defined(_WIN32) + if (hConsole != NULL) { + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + GetConsoleScreenBufferInfo(hConsole, &bufferInfo); + + COORD newCursorPosition = bufferInfo.dwCursorPosition; + if (newCursorPosition.X == 0) { + newCursorPosition.X = bufferInfo.dwSize.X - 1; + newCursorPosition.Y -= 1; + } else { + newCursorPosition.X -= 1; + } + + SetConsoleCursorPosition(hConsole, newCursorPosition); + return; + } +#endif + putc('\b', out); + } + + static int estimateWidth(char32_t codepoint) { +#if defined(_WIN32) + (void)codepoint; + return 1; +#else + return wcwidth(codepoint); +#endif + } + + static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { +#if defined(_WIN32) + CONSOLE_SCREEN_BUFFER_INFO bufferInfo; + if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) { + // go with the default + return expectedWidth; + } + COORD initialPosition = bufferInfo.dwCursorPosition; + DWORD nNumberOfChars = length; + WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); + + CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + + // Figure out our real position if we're in the last column + if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { + DWORD nNumberOfChars; + WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL); + GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); + } + + int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; + if (width < 0) { + width += newBufferInfo.dwSize.X; + } + return width; +#else + // We can trust expectedWidth if we've got one + if (expectedWidth >= 0 || tty == nullptr) { + fwrite(utf8_codepoint, length, 1, out); + return expectedWidth; + } + + fputs("\033[6n", tty); // Query cursor position + int x1; + int y1; + int x2; + int y2; + int results = 0; + results = fscanf(tty, "\033[%d;%dR", &y1, &x1); + + fwrite(utf8_codepoint, length, 1, tty); + + fputs("\033[6n", tty); // Query cursor position + results += fscanf(tty, "\033[%d;%dR", &y2, &x2); + + if (results != 4) { + return expectedWidth; + } + + int width = x2 - x1; + if (width < 0) { + // Calculate the width considering text wrapping + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + width += w.ws_col; + } + return width; +#endif + } + + static void replace_last(char ch) { +#if defined(_WIN32) + pop_cursor(); + put_codepoint(&ch, 1, 1); +#else + fprintf(out, "\b%c", ch); +#endif + } + + static void append_utf8(char32_t ch, std::string & out) { + if (ch <= 0x7F) { + out.push_back(static_cast(ch)); + } else if (ch <= 0x7FF) { + out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0xFFFF) { + out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else if (ch <= 0x10FFFF) { + out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); + out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); + out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); + out.push_back(static_cast(0x80 | (ch & 0x3F))); + } else { + // Invalid Unicode code point + } + } + + // Helper function to remove the last UTF-8 character from a string + static void pop_back_utf8_char(std::string & line) { + if (line.empty()) { + return; + } + + size_t pos = line.length() - 1; + + // Find the start of the last UTF-8 character (checking up to 4 bytes back) + for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { + if ((line[pos] & 0xC0) != 0x80) { + break; // Found the start of the character + } + } + line.erase(pos); + } + + static bool readline_advanced(std::string & line, bool multiline_input) { + if (out != stdout) { + fflush(stdout); + } + + line.clear(); + std::vector widths; + bool is_special_char = false; + bool end_of_stream = false; + + char32_t input_char; + while (true) { + fflush(out); // Ensure all output is displayed before waiting for input + input_char = getchar32(); + + if (input_char == '\r' || input_char == '\n') { + break; + } + + if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { + end_of_stream = true; + break; + } + + if (is_special_char) { + set_display(user_input); + replace_last(line.back()); + is_special_char = false; + } + + if (input_char == '\033') { // Escape sequence + char32_t code = getchar32(); + if (code == '[' || code == 0x1B) { + // Discard the rest of the escape sequence + while ((code = getchar32()) != (char32_t) WEOF) { + if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { + break; + } + } + } + } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace + if (!widths.empty()) { + int count; + do { + count = widths.back(); + widths.pop_back(); + // Move cursor back, print space, and move cursor back again + for (int i = 0; i < count; i++) { + replace_last(' '); + pop_cursor(); + } + pop_back_utf8_char(line); + } while (count == 0 && !widths.empty()); + } + } else { + int offset = line.length(); + append_utf8(input_char, line); + int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); + if (width < 0) { + width = 0; + } + widths.push_back(width); + } + + if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { + set_display(prompt); + replace_last(line.back()); + is_special_char = true; + } + } + + bool has_more = multiline_input; + if (is_special_char) { + replace_last(' '); + pop_cursor(); + + char last = line.back(); + line.pop_back(); + if (last == '\\') { + line += '\n'; + fputc('\n', out); + has_more = !has_more; + } else { + // llama will just eat the single space, it won't act as a space + if (line.length() == 1 && line.back() == ' ') { + line.clear(); + pop_cursor(); + } + has_more = false; + } + } else { + if (end_of_stream) { + has_more = false; + } else { + line += '\n'; + fputc('\n', out); + } + } + + fflush(out); + return has_more; + } + + static bool readline_simple(std::string & line, bool multiline_input) { +#if defined(_WIN32) + std::wstring wline; + if (!std::getline(std::wcin, wline)) { + // Input stream is bad or EOF received + line.clear(); + GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0); + return false; + } + + int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL); + line.resize(size_needed); + WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL); +#else + if (!std::getline(std::cin, line)) { + // Input stream is bad or EOF received + line.clear(); + return false; + } +#endif + if (!line.empty()) { + char last = line.back(); + if (last == '/') { // Always return control on '/' symbol + line.pop_back(); + return false; + } + if (last == '\\') { // '\\' changes the default action + line.pop_back(); + multiline_input = !multiline_input; + } + } + line += '\n'; + + // By default, continue input if multiline_input is set + return multiline_input; + } + + bool readline(std::string & line, bool multiline_input) { + set_display(user_input); + + if (simple_io) { + return readline_simple(line, multiline_input); + } + return readline_advanced(line, multiline_input); + } + +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/console.h b/llama-cpp-python/vendor/llama.cpp/common/console.h new file mode 100644 index 0000000000000000000000000000000000000000..ec175269b9d8af48803d0b6e618d008a9ab99b4d --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/console.h @@ -0,0 +1,19 @@ +// Console functions + +#pragma once + +#include + +namespace console { + enum display_t { + reset = 0, + prompt, + user_input, + error + }; + + void init(bool use_simple_io, bool use_advanced_display); + void cleanup(); + void set_display(display_t display); + bool readline(std::string & line, bool multiline_input); +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.cpp b/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a1301569793ad8e902b969f8c15aff0cc96f214 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.cpp @@ -0,0 +1,440 @@ +#include "grammar-parser.h" +#include +#include +#include +#include +#include +#include + +namespace grammar_parser { + // NOTE: assumes valid utf8 (but checks for overrun) + // copied from llama.cpp + static std::pair decode_utf8(const char * src) { + static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; + uint8_t first_byte = static_cast(*src); + uint8_t highbits = first_byte >> 4; + int len = lookup[highbits]; + uint8_t mask = (1 << (8 - len)) - 1; + uint32_t value = first_byte & mask; + const char * end = src + len; // may overrun! + const char * pos = src + 1; + for ( ; pos < end && *pos; pos++) { + value = (value << 6) + (static_cast(*pos) & 0x3F); + } + return std::make_pair(value, pos); + } + + static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) { + uint32_t next_id = static_cast(state.symbol_ids.size()); + auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id)); + return result.first->second; + } + + static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) { + uint32_t next_id = static_cast(state.symbol_ids.size()); + state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id; + return next_id; + } + + static void add_rule( + parse_state & state, + uint32_t rule_id, + const std::vector & rule) { + if (state.rules.size() <= rule_id) { + state.rules.resize(rule_id + 1); + } + state.rules[rule_id] = rule; + } + + static bool is_word_char(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9'); + } + + static std::pair parse_hex(const char * src, int size) { + const char * pos = src; + const char * end = src + size; + uint32_t value = 0; + for ( ; pos < end && *pos; pos++) { + value <<= 4; + char c = *pos; + if ('a' <= c && c <= 'f') { + value += c - 'a' + 10; + } else if ('A' <= c && c <= 'F') { + value += c - 'A' + 10; + } else if ('0' <= c && c <= '9') { + value += c - '0'; + } else { + break; + } + } + if (pos != end) { + throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src); + } + return std::make_pair(value, pos); + } + + static const char * parse_space(const char * src, bool newline_ok) { + const char * pos = src; + while (*pos == ' ' || *pos == '\t' || *pos == '#' || + (newline_ok && (*pos == '\r' || *pos == '\n'))) { + if (*pos == '#') { + while (*pos && *pos != '\r' && *pos != '\n') { + pos++; + } + } else { + pos++; + } + } + return pos; + } + + static const char * parse_name(const char * src) { + const char * pos = src; + while (is_word_char(*pos)) { + pos++; + } + if (pos == src) { + throw std::runtime_error(std::string("expecting name at ") + src); + } + return pos; + } + + static std::pair parse_char(const char * src) { + if (*src == '\\') { + switch (src[1]) { + case 'x': return parse_hex(src + 2, 2); + case 'u': return parse_hex(src + 2, 4); + case 'U': return parse_hex(src + 2, 8); + case 't': return std::make_pair('\t', src + 2); + case 'r': return std::make_pair('\r', src + 2); + case 'n': return std::make_pair('\n', src + 2); + case '\\': + case '"': + case '[': + case ']': + return std::make_pair(src[1], src + 2); + default: + throw std::runtime_error(std::string("unknown escape at ") + src); + } + } else if (*src) { + return decode_utf8(src); + } + throw std::runtime_error("unexpected end of input"); + } + + const char * parse_alternates( + parse_state & state, + const char * src, + const std::string & rule_name, + uint32_t rule_id, + bool is_nested); + + static const char * parse_sequence( + parse_state & state, + const char * src, + const std::string & rule_name, + std::vector & out_elements, + bool is_nested) { + size_t last_sym_start = out_elements.size(); + const char * pos = src; + while (*pos) { + if (*pos == '"') { // literal string + pos++; + last_sym_start = out_elements.size(); + while (*pos != '"') { + auto char_pair = parse_char(pos); + pos = char_pair.second; + out_elements.push_back({LLAMA_GRETYPE_CHAR, char_pair.first}); + } + pos = parse_space(pos + 1, is_nested); + } else if (*pos == '[') { // char range(s) + pos++; + enum llama_gretype start_type = LLAMA_GRETYPE_CHAR; + if (*pos == '^') { + pos++; + start_type = LLAMA_GRETYPE_CHAR_NOT; + } + last_sym_start = out_elements.size(); + while (*pos != ']') { + auto char_pair = parse_char(pos); + pos = char_pair.second; + enum llama_gretype type = last_sym_start < out_elements.size() + ? LLAMA_GRETYPE_CHAR_ALT + : start_type; + + out_elements.push_back({type, char_pair.first}); + if (pos[0] == '-' && pos[1] != ']') { + auto endchar_pair = parse_char(pos + 1); + pos = endchar_pair.second; + out_elements.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first}); + } + } + pos = parse_space(pos + 1, is_nested); + } else if (is_word_char(*pos)) { // rule reference + const char * name_end = parse_name(pos); + uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos); + pos = parse_space(name_end, is_nested); + last_sym_start = out_elements.size(); + out_elements.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id}); + } else if (*pos == '(') { // grouping + // parse nested alternates into synthesized rule + pos = parse_space(pos + 1, true); + uint32_t sub_rule_id = generate_symbol_id(state, rule_name); + pos = parse_alternates(state, pos, rule_name, sub_rule_id, true); + last_sym_start = out_elements.size(); + // output reference to synthesized rule + out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + if (*pos != ')') { + throw std::runtime_error(std::string("expecting ')' at ") + pos); + } + pos = parse_space(pos + 1, is_nested); + } else if (*pos == '*' || *pos == '+' || *pos == '?') { // repetition operator + if (last_sym_start == out_elements.size()) { + throw std::runtime_error(std::string("expecting preceding item to */+/? at ") + pos); + } + + // apply transformation to previous symbol (last_sym_start to end) according to + // rewrite rules: + // S* --> S' ::= S S' | + // S+ --> S' ::= S S' | S + // S? --> S' ::= S | + uint32_t sub_rule_id = generate_symbol_id(state, rule_name); + std::vector sub_rule; + // add preceding symbol to generated rule + sub_rule.insert( + sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end()); + if (*pos == '*' || *pos == '+') { + // cause generated rule to recurse + sub_rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + } + // mark start of alternate def + sub_rule.push_back({LLAMA_GRETYPE_ALT, 0}); + if (*pos == '+') { + // add preceding symbol as alternate only for '+' (otherwise empty) + sub_rule.insert( + sub_rule.end(), out_elements.begin() + last_sym_start, out_elements.end()); + } + sub_rule.push_back({LLAMA_GRETYPE_END, 0}); + add_rule(state, sub_rule_id, sub_rule); + + // in original rule, replace previous symbol with reference to generated rule + out_elements.resize(last_sym_start); + out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id}); + + pos = parse_space(pos + 1, is_nested); + } else { + break; + } + } + return pos; + } + + const char * parse_alternates( + parse_state & state, + const char * src, + const std::string & rule_name, + uint32_t rule_id, + bool is_nested) { + std::vector rule; + const char * pos = parse_sequence(state, src, rule_name, rule, is_nested); + while (*pos == '|') { + rule.push_back({LLAMA_GRETYPE_ALT, 0}); + pos = parse_space(pos + 1, true); + pos = parse_sequence(state, pos, rule_name, rule, is_nested); + } + rule.push_back({LLAMA_GRETYPE_END, 0}); + add_rule(state, rule_id, rule); + return pos; + } + + static const char * parse_rule(parse_state & state, const char * src) { + const char * name_end = parse_name(src); + const char * pos = parse_space(name_end, false); + size_t name_len = name_end - src; + uint32_t rule_id = get_symbol_id(state, src, name_len); + const std::string name(src, name_len); + + if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) { + throw std::runtime_error(std::string("expecting ::= at ") + pos); + } + pos = parse_space(pos + 3, true); + + pos = parse_alternates(state, pos, name, rule_id, false); + + if (*pos == '\r') { + pos += pos[1] == '\n' ? 2 : 1; + } else if (*pos == '\n') { + pos++; + } else if (*pos) { + throw std::runtime_error(std::string("expecting newline or end at ") + pos); + } + return parse_space(pos, true); + } + + parse_state parse(const char * src) { + try { + parse_state state; + const char * pos = parse_space(src, true); + while (*pos) { + pos = parse_rule(state, pos); + } + // Validate the state to ensure that all rules are defined + for (const auto & rule : state.rules) { + for (const auto & elem : rule) { + if (elem.type == LLAMA_GRETYPE_RULE_REF) { + // Ensure that the rule at that location exists + if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) { + // Get the name of the rule that is missing + for (const auto & kv : state.symbol_ids) { + if (kv.second == elem.value) { + throw std::runtime_error("Undefined rule identifier '" + kv.first + "'"); + } + } + } + } + } + } + return state; + } catch (const std::exception & err) { + fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); + return parse_state(); + } + } + + static void print_grammar_char(FILE * file, uint32_t c) { + if (0x20 <= c && c <= 0x7f) { + fprintf(file, "%c", static_cast(c)); + } else { + // cop out of encoding UTF-8 + fprintf(file, "", c); + } + } + + static bool is_char_element(llama_grammar_element elem) { + switch (elem.type) { + case LLAMA_GRETYPE_CHAR: return true; + case LLAMA_GRETYPE_CHAR_NOT: return true; + case LLAMA_GRETYPE_CHAR_ALT: return true; + case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true; + default: return false; + } + } + + static void print_rule_binary(FILE * file, const std::vector & rule) { + for (auto elem : rule) { + switch (elem.type) { + case LLAMA_GRETYPE_END: fprintf(file, "END"); break; + case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break; + case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break; + case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break; + case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break; + case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break; + case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break; + } + switch (elem.type) { + case LLAMA_GRETYPE_END: + case LLAMA_GRETYPE_ALT: + case LLAMA_GRETYPE_RULE_REF: + fprintf(file, "(%u) ", elem.value); + break; + case LLAMA_GRETYPE_CHAR: + case LLAMA_GRETYPE_CHAR_NOT: + case LLAMA_GRETYPE_CHAR_RNG_UPPER: + case LLAMA_GRETYPE_CHAR_ALT: + fprintf(file, "(\""); + print_grammar_char(file, elem.value); + fprintf(file, "\") "); + break; + } + } + fprintf(file, "\n"); + } + + static void print_rule( + FILE * file, + uint32_t rule_id, + const std::vector & rule, + const std::map & symbol_id_names) { + if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) { + throw std::runtime_error( + "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id)); + } + fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str()); + for (size_t i = 0, end = rule.size() - 1; i < end; i++) { + llama_grammar_element elem = rule[i]; + switch (elem.type) { + case LLAMA_GRETYPE_END: + throw std::runtime_error( + "unexpected end of rule: " + std::to_string(rule_id) + "," + + std::to_string(i)); + case LLAMA_GRETYPE_ALT: + fprintf(file, "| "); + break; + case LLAMA_GRETYPE_RULE_REF: + fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str()); + break; + case LLAMA_GRETYPE_CHAR: + fprintf(file, "["); + print_grammar_char(file, elem.value); + break; + case LLAMA_GRETYPE_CHAR_NOT: + fprintf(file, "[^"); + print_grammar_char(file, elem.value); + break; + case LLAMA_GRETYPE_CHAR_RNG_UPPER: + if (i == 0 || !is_char_element(rule[i - 1])) { + throw std::runtime_error( + "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " + + std::to_string(rule_id) + "," + std::to_string(i)); + } + fprintf(file, "-"); + print_grammar_char(file, elem.value); + break; + case LLAMA_GRETYPE_CHAR_ALT: + if (i == 0 || !is_char_element(rule[i - 1])) { + throw std::runtime_error( + "LLAMA_GRETYPE_CHAR_ALT without preceding char: " + + std::to_string(rule_id) + "," + std::to_string(i)); + } + print_grammar_char(file, elem.value); + break; + } + if (is_char_element(elem)) { + switch (rule[i + 1].type) { + case LLAMA_GRETYPE_CHAR_ALT: + case LLAMA_GRETYPE_CHAR_RNG_UPPER: + break; + default: + fprintf(file, "] "); + } + } + } + fprintf(file, "\n"); + } + + void print_grammar(FILE * file, const parse_state & state) { + try { + std::map symbol_id_names; + for (const auto & kv : state.symbol_ids) { + symbol_id_names[kv.second] = kv.first; + } + for (size_t i = 0, end = state.rules.size(); i < end; i++) { + // fprintf(file, "%zu: ", i); + // print_rule_binary(file, state.rules[i]); + print_rule(file, uint32_t(i), state.rules[i], symbol_id_names); + // fprintf(file, "\n"); + } + } catch (const std::exception & err) { + fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what()); + } + } + + std::vector parse_state::c_rules() { + std::vector ret; + ret.reserve(rules.size()); + for (const auto & rule : rules) { + ret.push_back(rule.data()); + } + return ret; + } +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.h b/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.h new file mode 100644 index 0000000000000000000000000000000000000000..9037d72728a42ed772f384f3d7ddcef01d0d15f5 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/grammar-parser.h @@ -0,0 +1,29 @@ +// Implements a parser for an extended Backus-Naur form (BNF), producing the +// binary context-free grammar format specified by llama.h. Supports character +// ranges, grouping, and repetition operators. As an example, a grammar for +// arithmetic might look like: +// +// root ::= expr +// expr ::= term ([-+*/] term)* +// term ::= num | "(" space expr ")" space +// num ::= [0-9]+ space +// space ::= [ \t\n]* + +#pragma once +#include "llama.h" +#include +#include +#include +#include + +namespace grammar_parser { + struct parse_state { + std::map symbol_ids; + std::vector> rules; + + std::vector c_rules(); + }; + + parse_state parse(const char * src); + void print_grammar(FILE * file, const parse_state & state); +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.cpp b/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0f8f1b1d41bdc88cbdc3e4b2be3369f29fbeb53d --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.cpp @@ -0,0 +1,764 @@ +#include "json-schema-to-grammar.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +template +static std::string join(Iterator begin, Iterator end, const std::string & separator); + +static std::string repeat(const std::string & str, size_t n); + +static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) { + if (separator_rule.empty()) { + if (min_items == 0 && max_items == 1) { + return item_rule + "?"; + } else if (min_items == 1 && max_items == std::numeric_limits::max()) { + return item_rule + "+"; + } + } + + std::string result; + if (min_items > 0) { + if (item_rule_is_literal && separator_rule.empty()) { + result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\""; + } else { + std::vector items(min_items, item_rule); + result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " "); + } + } + + std::function opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string { + auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule; + + if (up_to_n == 0) { + return ""; + } else if (up_to_n == 1) { + return "(" + content + ")?"; + } else if (!separator_rule.empty() && !prefix_with_sep) { + return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?"; + } else { + std::string res = repeat("(" + content + " ", up_to_n); + // strip trailing space + res = res.substr(0, res.length() - 1); + res += repeat(")?", up_to_n); + return res; + } + }; + + if (min_items > 0 && max_items != min_items) { + result += " "; + } + + if (max_items != std::numeric_limits::max()) { + result += opt_repetitions(max_items - min_items, min_items > 0); + } else { + std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")"; + if (min_items == 0 && !separator_rule.empty()) { + result = "(" + item_rule + " " + item_operator + "*)?"; + } else { + result += item_operator + "*"; + } + } + + return result; +} + +const std::string SPACE_RULE = "\" \"?"; + +struct BuiltinRule { + std::string content; + std::vector deps; +}; + +const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15); + +std::unordered_map PRIMITIVE_RULES = { + {"boolean", {"(\"true\" | \"false\") space", {}}}, + {"decimal-part", {"[0-9] " + _up_to_15_digits, {}}}, + {"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}}, + {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}}, + {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}}, + {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}}, + {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, + {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, + {"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " + "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " + "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " + "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " + "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}}, + {"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}}, + {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, + {"null", {"\"null\" space", {}}}, +}; + +std::unordered_map STRING_FORMAT_RULES = { + {"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, + {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, + {"date-time", {"date \"T\" time", {"date", "time"}}}, + {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}}, + {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}}, + {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}} +}; + +static bool is_reserved_name(const std::string & name) { + static std::unordered_set RESERVED_NAMES; + if (RESERVED_NAMES.empty()) { + RESERVED_NAMES.insert("root"); + for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); + for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); + } + return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); +} + +std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); +std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]"); +std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); +std::unordered_map GRAMMAR_LITERAL_ESCAPES = { + {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"} +}; + +std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; +std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; + +template +std::string join(Iterator begin, Iterator end, const std::string & separator) { + std::ostringstream result; + if (begin != end) { + result << *begin; + for (Iterator it = begin + 1; it != end; ++it) { + result << separator << *it; + } + } + return result.str(); +} + +static std::vector split(const std::string & str, const std::string & delimiter) { + std::vector tokens; + size_t start = 0; + size_t end = str.find(delimiter); + + while (end != std::string::npos) { + tokens.push_back(str.substr(start, end - start)); + start = end + delimiter.length(); + end = str.find(delimiter, start); + } + + tokens.push_back(str.substr(start)); + + return tokens; +} + +static std::string repeat(const std::string & str, size_t n) { + if (n == 0) { + return ""; + } + + std::string result; + result.reserve(str.length() * n); + + for (size_t i = 0; i < n; ++i) { + result += str; + } + + return result; +} + +static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { + std::smatch match; + std::string result; + + std::string::const_iterator searchStart(input.cbegin()); + std::string::const_iterator searchEnd(input.cend()); + + while (std::regex_search(searchStart, searchEnd, match, regex)) { + result.append(searchStart, searchStart + match.position()); + result.append(replacement(match)); + searchStart = match.suffix().first; + } + + result.append(searchStart, searchEnd); + + return result; +} + +static std::string format_literal(const std::string & literal) { + std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) { + char c = match.str()[0]; + return GRAMMAR_LITERAL_ESCAPES.at(c); + }); + return "\"" + escaped + "\""; +} + + +class SchemaConverter { +private: + std::function _fetch_json; + bool _dotall; + std::map _rules; + std::unordered_map _refs; + std::unordered_set _refs_being_resolved; + std::vector _errors; + std::vector _warnings; + + std::string _add_rule(const std::string & name, const std::string & rule) { + std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-"); + if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { + _rules[esc_name] = rule; + return esc_name; + } else { + int i = 0; + while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { + i++; + } + std::string key = esc_name + std::to_string(i); + _rules[key] = rule; + return key; + } + } + + std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { + std::vector rules; + for (size_t i = 0; i < alt_schemas.size(); i++) { + rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); + } + return join(rules.begin(), rules.end(), " | "); + } + + std::string _visit_pattern(const std::string & pattern, const std::string & name) { + if (!(pattern.front() == '^' && pattern.back() == '$')) { + _errors.push_back("Pattern must start with '^' and end with '$'"); + return ""; + } + std::string sub_pattern = pattern.substr(1, pattern.length() - 2); + std::unordered_map sub_rule_ids; + + size_t i = 0; + size_t length = sub_pattern.length(); + + using literal_or_rule = std::pair; + auto to_rule = [&](const literal_or_rule & ls) { + auto is_literal = ls.second; + auto s = ls.first; + return is_literal ? "\"" + s + "\"" : s; + }; + std::function transform = [&]() -> literal_or_rule { + size_t start = i; + std::vector seq; + + auto get_dot = [&]() { + std::string rule; + if (_dotall) { + rule = "[\\U00000000-\\U0010FFFF]"; + } else { + rule = "[^\\x0A\\x0D]"; + } + return _add_rule("dot", rule); + }; + + // Joins the sequence, merging consecutive literals together. + auto join_seq = [&]() { + std::vector ret; + + std::string literal; + auto flush_literal = [&]() { + if (literal.empty()) { + return false; + } + ret.push_back(std::make_pair(literal, true)); + literal.clear(); + return true; + }; + + for (const auto & item : seq) { + auto is_literal = item.second; + if (is_literal) { + literal += item.first; + } else { + flush_literal(); + ret.push_back(item); + } + } + flush_literal(); + + std::vector results; + for (const auto & item : ret) { + results.push_back(to_rule(item)); + } + return std::make_pair(join(results.begin(), results.end(), " "), false); + }; + + while (i < length) { + char c = sub_pattern[i]; + if (c == '.') { + seq.push_back(std::make_pair(get_dot(), false)); + i++; + } else if (c == '(') { + i++; + if (i < length) { + if (sub_pattern[i] == '?') { + _warnings.push_back("Unsupported pattern syntax"); + } + } + seq.push_back(std::make_pair("(" + to_rule(transform()) + ")", false)); + } else if (c == ')') { + i++; + if (start > 0 && sub_pattern[start - 1] != '(') { + _errors.push_back("Unbalanced parentheses"); + } + return join_seq(); + } else if (c == '[') { + std::string square_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != ']') { + if (sub_pattern[i] == '\\') { + square_brackets += sub_pattern.substr(i, 2); + i += 2; + } else { + square_brackets += sub_pattern[i]; + i++; + } + } + if (i >= length) { + _errors.push_back("Unbalanced square brackets"); + } + square_brackets += ']'; + i++; + seq.push_back(std::make_pair(square_brackets, false)); + } else if (c == '|') { + seq.push_back(std::make_pair("|", false)); + i++; + } else if (c == '*' || c == '+' || c == '?') { + seq.back() = std::make_pair(to_rule(seq.back()) + c, false); + i++; + } else if (c == '{') { + std::string curly_brackets = std::string(1, c); + i++; + while (i < length && sub_pattern[i] != '}') { + curly_brackets += sub_pattern[i]; + i++; + } + if (i >= length) { + _errors.push_back("Unbalanced curly brackets"); + } + curly_brackets += '}'; + i++; + auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ","); + int min_times = 0; + int max_times = std::numeric_limits::max(); + try { + if (nums.size() == 1) { + min_times = max_times = std::stoi(nums[0]); + } else if (nums.size() != 2) { + _errors.push_back("Wrong number of values in curly brackets"); + } else { + if (!nums[0].empty()) { + min_times = std::stoi(nums[0]); + } + if (!nums[1].empty()) { + max_times = std::stoi(nums[1]); + } + } + } catch (const std::invalid_argument & e) { + _errors.push_back("Invalid number in curly brackets"); + return std::make_pair("", false); + } + auto &last = seq.back(); + auto &sub = last.first; + auto sub_is_literal = last.second; + + if (!sub_is_literal) { + std::string & sub_id = sub_rule_ids[sub]; + if (sub_id.empty()) { + sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); + } + sub = sub_id; + } + seq.back().first = build_repetition( + sub_is_literal ? "\"" + sub + "\"" : sub, + min_times, + max_times, + "", + sub_is_literal + ); + seq.back().second = false; + } else { + std::string literal; + auto is_non_literal = [&](char c) { + return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end(); + }; + while (i < length) { + if (sub_pattern[i] == '\\' && i < length - 1) { + char next = sub_pattern[i + 1]; + if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) { + i++; + literal += sub_pattern[i]; + i++; + } else { + literal += sub_pattern.substr(i, 2); + i += 2; + } + } else if (sub_pattern[i] == '"') { + literal += "\\\""; + i++; + } else if (!is_non_literal(sub_pattern[i]) && + (i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) { + literal += sub_pattern[i]; + i++; + } else { + break; + } + } + if (!literal.empty()) { + seq.push_back(std::make_pair(literal, true)); + } + } + } + return join_seq(); + }; + return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space"); + } + + std::string _resolve_ref(const std::string & ref) { + std::string ref_name = ref.substr(ref.find_last_of('/') + 1); + if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { + _refs_being_resolved.insert(ref); + json resolved = _refs[ref]; + ref_name = visit(resolved, ref_name); + _refs_being_resolved.erase(ref); + } + return ref_name; + } + + std::string _build_object_rule( + const std::vector> & properties, + const std::unordered_set & required, + const std::string & name, + const json & additional_properties) + { + std::vector required_props; + std::vector optional_props; + std::unordered_map prop_kv_rule_names; + for (const auto & kv : properties) { + const auto &prop_name = kv.first; + const auto &prop_schema = kv.second; + + std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name); + prop_kv_rule_names[prop_name] = _add_rule( + name + (name.empty() ? "" : "-") + prop_name + "-kv", + format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name + ); + if (required.find(prop_name) != required.end()) { + required_props.push_back(prop_name); + } else { + optional_props.push_back(prop_name); + } + } + if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { + std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; + std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); + std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); + prop_kv_rule_names["*"] = kv_rule; + optional_props.push_back("*"); + } + + std::string rule = "\"{\" space "; + for (size_t i = 0; i < required_props.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += prop_kv_rule_names[required_props[i]]; + } + + if (!optional_props.empty()) { + rule += " ("; + if (!required_props.empty()) { + rule += " \",\" space ( "; + } + + std::function &, bool)> get_recursive_refs = [&](const std::vector & ks, bool first_is_optional) { + std::string res; + if (ks.empty()) { + return res; + } + std::string k = ks[0]; + std::string kv_rule_name = prop_kv_rule_names[k]; + if (k == "*") { + res = _add_rule( + name + (name.empty() ? "" : "-") + "additional-kvs", + kv_rule_name + " ( \",\" space " + kv_rule_name + " )*" + ); + } else if (first_is_optional) { + res = "( \",\" space " + kv_rule_name + " )?"; + } else { + res = kv_rule_name; + } + if (ks.size() > 1) { + res += " " + _add_rule( + name + (name.empty() ? "" : "-") + k + "-rest", + get_recursive_refs(std::vector(ks.begin() + 1, ks.end()), true) + ); + } + return res; + }; + + for (size_t i = 0; i < optional_props.size(); i++) { + if (i > 0) { + rule += " | "; + } + rule += get_recursive_refs(std::vector(optional_props.begin() + i, optional_props.end()), false); + } + if (!required_props.empty()) { + rule += " )"; + } + rule += " )?"; + } + + rule += " \"}\" space"; + + return rule; + } + + std::string _add_primitive(const std::string & name, const BuiltinRule & rule) { + auto n = _add_rule(name, rule.content); + for (const auto & dep : rule.deps) { + BuiltinRule dep_rule; + auto it = PRIMITIVE_RULES.find(dep); + if (it == PRIMITIVE_RULES.end()) { + it = STRING_FORMAT_RULES.find(dep); + if (it == STRING_FORMAT_RULES.end()) { + _errors.push_back("Rule " + dep + " not known"); + continue; + } + } + if (_rules.find(dep) == _rules.end()) { + _add_primitive(dep, it->second); + } + } + return n; + } + +public: + SchemaConverter( + const std::function & fetch_json, + bool dotall) + : _fetch_json(fetch_json), _dotall(dotall) + { + _rules["space"] = SPACE_RULE; + } + + void resolve_refs(json & schema, const std::string & url) { + /* + * Resolves all $ref fields in the given schema, fetching any remote schemas, + * replacing each $ref with absolute reference URL and populates _refs with the + * respective referenced (sub)schema dictionaries. + */ + std::function visit_refs = [&](json & n) { + if (n.is_array()) { + for (auto & x : n) { + visit_refs(x); + } + } else if (n.is_object()) { + if (n.contains("$ref")) { + std::string ref = n["$ref"]; + if (_refs.find(ref) == _refs.end()) { + json target; + if (ref.find("https://") == 0) { + std::string base_url = ref.substr(0, ref.find('#')); + auto it = _refs.find(base_url); + if (it != _refs.end()) { + target = it->second; + } else { + // Fetch the referenced schema and resolve its refs + auto referenced = _fetch_json(ref); + resolve_refs(referenced, base_url); + _refs[base_url] = referenced; + } + if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) { + return; + } + } else if (ref.find("#/") == 0) { + target = schema; + n["$ref"] = url + ref; + ref = url + ref; + } else { + _errors.push_back("Unsupported ref: " + ref); + return; + } + std::string pointer = ref.substr(ref.find('#') + 1); + std::vector tokens = split(pointer, "/"); + for (size_t i = 1; i < tokens.size(); ++i) { + std::string sel = tokens[i]; + if (target.is_null() || !target.contains(sel)) { + _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); + return; + } + target = target[sel]; + } + _refs[ref] = target; + } + } else { + for (auto & kv : n.items()) { + visit_refs(kv.value()); + } + } + } + }; + + visit_refs(schema); + } + + std::string _generate_constant_rule(const json & value) { + return format_literal(value.dump()); + } + + std::string visit(const json & schema, const std::string & name) { + json schema_type = schema.contains("type") ? schema["type"] : json(); + std::string schema_format = schema.contains("format") ? schema["format"].get() : ""; + std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name; + + if (schema.contains("$ref")) { + return _add_rule(rule_name, _resolve_ref(schema["$ref"])); + } else if (schema.contains("oneOf") || schema.contains("anyOf")) { + std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); + return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); + } else if (schema_type.is_array()) { + std::vector schema_types; + for (const auto & t : schema_type) { + schema_types.push_back({{"type", t}}); + } + return _add_rule(rule_name, _generate_union_rule(name, schema_types)); + } else if (schema.contains("const")) { + return _add_rule(rule_name, _generate_constant_rule(schema["const"])); + } else if (schema.contains("enum")) { + std::vector enum_values; + for (const auto & v : schema["enum"]) { + enum_values.push_back(_generate_constant_rule(v)); + } + return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | ")); + } else if ((schema_type.is_null() || schema_type == "object") + && (schema.contains("properties") || + (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { + std::unordered_set required; + if (schema.contains("required") && schema["required"].is_array()) { + for (const auto & item : schema["required"]) { + if (item.is_string()) { + required.insert(item.get()); + } + } + } + std::vector> properties; + if (schema.contains("properties")) { + for (const auto & prop : schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + } + } + return _add_rule(rule_name, + _build_object_rule( + properties, required, name, + schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); + } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { + std::unordered_set required; + std::vector> properties; + std::string hybrid_name = name; + std::function add_component = [&](const json & comp_schema, bool is_required) { + if (comp_schema.contains("$ref")) { + add_component(_refs[comp_schema["$ref"]], is_required); + } else if (comp_schema.contains("properties")) { + for (const auto & prop : comp_schema["properties"].items()) { + properties.emplace_back(prop.key(), prop.value()); + if (is_required) { + required.insert(prop.key()); + } + } + } else { + // todo warning + } + }; + for (auto & t : schema["allOf"]) { + if (t.contains("anyOf")) { + for (auto & tt : t["anyOf"]) { + add_component(tt, false); + } + } else { + add_component(t, true); + } + } + return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); + } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { + json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; + if (items.is_array()) { + std::string rule = "\"[\" space "; + for (size_t i = 0; i < items.size(); i++) { + if (i > 0) { + rule += " \",\" space "; + } + rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i)); + } + rule += " \"]\" space"; + return _add_rule(rule_name, rule); + } else { + std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); + int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; + json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); + int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); + + return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); + } + } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { + return _visit_pattern(schema["pattern"], rule_name); + } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { + return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); + } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { + auto prim_name = schema_format + "-string"; + return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); + } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { + std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); + int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; + int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); + return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); + } else if (schema.empty() || schema_type == "object") { + return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); + } else { + if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { + _errors.push_back("Unrecognized schema: " + schema.dump()); + return ""; + } + // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero + return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); + } + } + + void check_errors() { + if (!_errors.empty()) { + throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n")); + } + if (!_warnings.empty()) { + fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str()); + } + } + + std::string format_grammar() { + std::stringstream ss; + for (const auto & kv : _rules) { + ss << kv.first << " ::= " << kv.second << std::endl; + } + return ss.str(); + } +}; + +std::string json_schema_to_grammar(const json & schema) { + SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false); + auto copy = schema; + converter.resolve_refs(copy, "input"); + converter.visit(copy, ""); + converter.check_errors(); + return converter.format_grammar(); +} diff --git a/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.h b/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.h new file mode 100644 index 0000000000000000000000000000000000000000..e1abed30375826bbf7f201a0a17ff56c6c810630 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/json-schema-to-grammar.h @@ -0,0 +1,4 @@ +#pragma once +#include "json.hpp" + +std::string json_schema_to_grammar(const nlohmann::ordered_json& schema); diff --git a/llama-cpp-python/vendor/llama.cpp/common/json.hpp b/llama-cpp-python/vendor/llama.cpp/common/json.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a858728c4ceb8a7048da422a96d38d61d3062d62 --- /dev/null +++ b/llama-cpp-python/vendor/llama.cpp/common/json.hpp @@ -0,0 +1,24766 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#if JSON_DIAGNOSTICS + #include // accumulate +#endif +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifndef JSON_HAS_STATIC_RTTI + #if !defined(_HAS_STATIC_RTTI) || _HAS_STATIC_RTTI != 0 + #define JSON_HAS_STATIC_RTTI 1 + #else + #define JSON_HAS_STATIC_RTTI 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType, \ + class CustomBaseClass> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple +#include // char_traits + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector, // cppcheck-suppress syntaxError + class CustomBaseClass = void> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +///////////////// +// char_traits // +///////////////// + +// Primary template of char_traits calls std char_traits +template +struct char_traits : std::char_traits +{}; + +// Explicitly define char traits for unsigned char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = unsigned char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +// Explicitly define char traits for signed char since it is not standard +template<> +struct char_traits : std::char_traits +{ + using char_type = signed char; + using int_type = uint64_t; + + // Redefine to_int_type function + static int_type to_int_type(char_type c) noexcept + { + return static_cast(c); + } + + static char_type to_char_type(int_type i) noexcept + { + return static_cast(i); + } + + static constexpr int_type eof() noexcept + { + return static_cast(EOF); + } +}; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template