Update README.md
Browse files
README.md
CHANGED
@@ -216,15 +216,14 @@ python -m pip install llama_cpp_python>=0.2.26 --verbose --force-reinstall --no-
|
|
216 |
import llama_cpp
|
217 |
|
218 |
llm_cpp = llama_cpp.Llama(
|
219 |
-
model_path="models/
|
220 |
n_threads=10, # CPU cores
|
221 |
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
222 |
n_gpu_layers=33, # Change this value based on your model and your GPU VRAM pool.
|
223 |
n_ctx=2048, # Max context length
|
224 |
)
|
225 |
|
226 |
-
prompt = """
|
227 |
-
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
228 |
|
229 |
You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
230 |
|
@@ -251,10 +250,10 @@ print(response)
|
|
251 |
"id": "cmpl-b0971ce1-1607-42b3-b6dd-8bf8e324307a",
|
252 |
"object": "text_completion",
|
253 |
"created": 1721478196,
|
254 |
-
"model": "models/
|
255 |
"choices": [
|
256 |
{
|
257 |
-
"text": "
|
258 |
"index": 0,
|
259 |
"logprobs": None,
|
260 |
"finish_reason": "stop",
|
|
|
216 |
import llama_cpp
|
217 |
|
218 |
llm_cpp = llama_cpp.Llama(
|
219 |
+
model_path="models/Meta-Llama-3.1-8B-Instruct-GGUF.Q6_K.gguf", # Path to the model
|
220 |
n_threads=10, # CPU cores
|
221 |
n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
222 |
n_gpu_layers=33, # Change this value based on your model and your GPU VRAM pool.
|
223 |
n_ctx=2048, # Max context length
|
224 |
)
|
225 |
|
226 |
+
prompt = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
|
227 |
|
228 |
You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
229 |
|
|
|
250 |
"id": "cmpl-b0971ce1-1607-42b3-b6dd-8bf8e324307a",
|
251 |
"object": "text_completion",
|
252 |
"created": 1721478196,
|
253 |
+
"model": "models/Meta-Llama-3.1-8B-Instruct-GGUF.Q6_K.gguf",
|
254 |
"choices": [
|
255 |
{
|
256 |
+
"text": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHow fast can cheetah run?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nThey can reach top speed of about 75mph (120 kmh)<|eot_id|>",
|
257 |
"index": 0,
|
258 |
"logprobs": None,
|
259 |
"finish_reason": "stop",
|