desert commited on
Commit
932195b
1 Parent(s): ded9e09

init inference

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -3,15 +3,27 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
  # Model identifier from Hugging Face
6
- model_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID
7
 
8
  # Download the GGUF file from Hugging Face
9
- model_path = hf_hub_download(repo_id=model_repo, filename="llama_lora_model.gguf")
10
 
11
- # Load the GGUF model using llama-cpp-python
12
- print("Loading model...")
13
- llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) # Adjust threads as needed
14
- print("Model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Chat function
17
  def chat_with_model(user_input, chat_history):
 
3
  from huggingface_hub import hf_hub_download
4
 
5
  # Model identifier from Hugging Face
6
+ adapter_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID
7
 
8
  # Download the GGUF file from Hugging Face
9
+ lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
10
 
11
+ from huggingface_hub import hf_hub_download
12
+
13
+ # Download the base model GGUF file
14
+ base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
15
+ base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
16
+
17
+
18
+ # Load the base model
19
+ print("Loading base model...")
20
+ llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)
21
+
22
+ # Apply the LoRA adapter
23
+ print("Applying LoRA adapter...")
24
+ llm.load_adapter(adapter_path=lora_adapter_path)
25
+
26
+ print("Model ready with LoRA adapter!")
27
 
28
  # Chat function
29
  def chat_with_model(user_input, chat_history):