Spaces:

Mat17892
/

iris

Runtime error

desert commited on Dec 2, 2024

Commit

932195b

1 Parent(s): ded9e09

init inference

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,15 +3,27 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 # Model identifier from Hugging Face
-model_repo = "Mat17892/lora_llama_gguf_g14"  # Hugging Face model ID
 # Download the GGUF file from Hugging Face
-model_path = hf_hub_download(repo_id=model_repo, filename="llama_lora_model.gguf")
-# Load the GGUF model using llama-cpp-python
-print("Loading model...")
-llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8)  # Adjust threads as needed
-print("Model loaded!")
 # Chat function
 def chat_with_model(user_input, chat_history):

 from huggingface_hub import hf_hub_download
 # Model identifier from Hugging Face
+adapter_repo = "Mat17892/lora_llama_gguf_g14"  # Hugging Face model ID
 # Download the GGUF file from Hugging Face
+lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf")
+from huggingface_hub import hf_hub_download
+# Download the base model GGUF file
+base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF"
+base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf")
+# Load the base model
+print("Loading base model...")
+llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8)
+# Apply the LoRA adapter
+print("Applying LoRA adapter...")
+llm.load_adapter(adapter_path=lora_adapter_path)
+print("Model ready with LoRA adapter!")
 # Chat function
 def chat_with_model(user_input, chat_history):