|
import os |
|
import gradio as gr |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
MODEL_REPO = "Serveurperso/gemma-2-2b-it-LoRA" |
|
BASE_MODEL_FILE = "gemma-2-2b-it-Q8_0.gguf" |
|
LORA_ADAPTER_FILE = "gemma-2-2B-it-F16-LoRA.gguf" |
|
|
|
|
|
print("🚀 Téléchargement du modèle GGUF...") |
|
base_model_path = hf_hub_download(repo_id=MODEL_REPO, filename=BASE_MODEL_FILE) |
|
lora_adapter_path = hf_hub_download(repo_id=MODEL_REPO, filename=LORA_ADAPTER_FILE) |
|
print(f"✅ Modèle de base GGUF téléchargé : {base_model_path}") |
|
print(f"✅ Adaptateur LoRA GGUF téléchargé : {lora_adapter_path}") |
|
|
|
|
|
print("🚀 Chargement du modèle GGUF en mémoire...") |
|
llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=os.cpu_count(), lora_adapter=lora_adapter_path) |
|
print("✅ Modèle GGUF chargé avec succès avec LoRA !") |
|
|
|
|
|
def chat(message): |
|
output = llm(message, max_tokens=128) |
|
return output["choices"][0]["text"] |
|
|
|
|
|
iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Mémé Ginette GGUF + LoRA") |
|
|
|
print("🚀 Interface Gradio lancée sur port 7860") |
|
iface.launch(share=True) |
|
|