desert
commited on
Commit
•
932195b
1
Parent(s):
ded9e09
init inference
Browse files
app.py
CHANGED
@@ -3,15 +3,27 @@ from llama_cpp import Llama
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
# Model identifier from Hugging Face
|
6 |
-
|
7 |
|
8 |
# Download the GGUF file from Hugging Face
|
9 |
-
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Chat function
|
17 |
def chat_with_model(user_input, chat_history):
|
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
# Model identifier from Hugging Face
|
6 |
+
adapter_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID
|
7 |
|
8 |
# Download the GGUF file from Hugging Face
|
9 |
+
lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
|
10 |
|
11 |
+
from huggingface_hub import hf_hub_download
|
12 |
+
|
13 |
+
# Download the base model GGUF file
|
14 |
+
base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
|
15 |
+
base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
|
16 |
+
|
17 |
+
|
18 |
+
# Load the base model
|
19 |
+
print("Loading base model...")
|
20 |
+
llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)
|
21 |
+
|
22 |
+
# Apply the LoRA adapter
|
23 |
+
print("Applying LoRA adapter...")
|
24 |
+
llm.load_adapter(adapter_path=lora_adapter_path)
|
25 |
+
|
26 |
+
print("Model ready with LoRA adapter!")
|
27 |
|
28 |
# Chat function
|
29 |
def chat_with_model(user_input, chat_history):
|