pek111
/

Meta-Llama-3.1-8B-Instruct-GGUF

Text Generation

Inference Endpoints

Model card Files Files and versions Community

pek111 commited on Jul 24

Commit

0d05ea4

•

1 Parent(s): d991fb7

Update README.md

Files changed (1) hide show

README.md +4 -5

README.md CHANGED Viewed

@@ -216,15 +216,14 @@ python -m pip install llama_cpp_python>=0.2.26 --verbose --force-reinstall --no-
 import llama_cpp
 llm_cpp = llama_cpp.Llama(
-    model_path="models/llama-3-typhoon-v1.5-8b-instruct.Q6_K.gguf",  # Path to the model
     n_threads=10,  # CPU cores
     n_batch=512,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     n_gpu_layers=33,  # Change this value based on your model and your GPU VRAM pool.
     n_ctx=2048,  # Max context length
 )
-prompt = """
-<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
@@ -251,10 +250,10 @@ print(response)
     "id": "cmpl-b0971ce1-1607-42b3-b6dd-8bf8e324307a",
     "object": "text_completion",
     "created": 1721478196,
-    "model": "models/llama-3-typhoon-v1.5-8b-instruct.Q6_K.gguf",
     "choices": [
         {
-            "text": "\n<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant who're always speak Thai.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n1+1 เท่ากับเท่าไหร่<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n2",
             "index": 0,
             "logprobs": None,
             "finish_reason": "stop",

 import llama_cpp
 llm_cpp = llama_cpp.Llama(
+    model_path="models/Meta-Llama-3.1-8B-Instruct-GGUF.Q6_K.gguf",  # Path to the model
     n_threads=10,  # CPU cores
     n_batch=512,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     n_gpu_layers=33,  # Change this value based on your model and your GPU VRAM pool.
     n_ctx=2048,  # Max context length
 )
+prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
     "id": "cmpl-b0971ce1-1607-42b3-b6dd-8bf8e324307a",
     "object": "text_completion",
     "created": 1721478196,
+    "model": "models/Meta-Llama-3.1-8B-Instruct-GGUF.Q6_K.gguf",
     "choices": [
         {
+            "text": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHow fast can cheetah run?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThey can reach top speed of about 75mph (120 kmh)<|eot_id|>",
             "index": 0,
             "logprobs": None,
             "finish_reason": "stop",