Spaces:

d-e-e-k-11
/

chatbot

Runtime error

App Files Files Community

d-e-e-k-11 commited on Feb 19

Commit

c55e854

verified ·

1 Parent(s): 13470f7

Fix: auto-download model from HF model repo at startup

Browse files

Files changed (1) hide show

app.py +50 -21

app.py CHANGED Viewed

@@ -1,40 +1,69 @@
 import gradio as gr
 from llama_cpp import Llama
 import os
-MODEL_PATH = "llama-2-7b-chat.ggmlv3.q2_K.bin"
-# Load model at startup
-print("Loading Llama-2 model...")
 llm = None
-if os.path.exists(MODEL_PATH):
-    llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=False)
-    print("Model loaded!")
 else:
-    print(f"Model not found at {MODEL_PATH}. Upload the model file to the Space.")
 def chat(message, history):
     if llm is None:
-        return "Model not loaded. Please upload 'llama-2-7b-chat.ggmlv3.q2_K.bin' to this Space."
-    # Build conversation context from history
     context = ""
-    for user_msg, bot_msg in history[-5:]:  # use last 5 turns
         context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
-    prompt = f"[INST] <<SYS>>\nYou are a helpful AI assistant.\n<</SYS>>\n\n{context}[INST] {message} [/INST]"
-    output = llm(prompt, max_tokens=512, stop=["[/INST]", "</s>"], echo=False)
-    response = output["choices"][0]["text"].strip()
-    return response
-# Gradio chat interface
 demo = gr.ChatInterface(
     fn=chat,
     title="Llama-2-7B Chatbot",
     description=(
-        "An offline AI chatbot powered by **Llama-2-7B** (GGMLv3 Q2_K quantized).\n\n"
-        "> Note: The model file `llama-2-7b-chat.ggmlv3.q2_K.bin` must be uploaded to the Space files."
     ),
     theme=gr.themes.Soft(
         primary_hue="blue",

 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
 import os
+MODEL_REPO  = "d-e-e-k-11/llama-2-7b-chat-ggml"
+MODEL_FILE  = "llama-2-7b-chat.ggmlv3.q2_K.bin"
+LOCAL_PATH  = "/tmp/llama-model.bin"
+# ─── Load Model ──────────────────────────────────────────────────────
 llm = None
+print("Checking for model...")
+if not os.path.exists(LOCAL_PATH):
+    print(f"Downloading model from {MODEL_REPO} ...")
+    try:
+        cached = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
+        os.symlink(cached, LOCAL_PATH)
+        print("Model downloaded via hf_hub_download.")
+    except Exception as e:
+        print(f"Download failed: {e}")
+if os.path.exists(LOCAL_PATH):
+    print("Loading Llama-2 model into memory...")
+    try:
+        llm = Llama(model_path=LOCAL_PATH, n_ctx=2048, n_threads=4, verbose=False)
+        print("Model ready!")
+    except Exception as e:
+        print(f"Failed to load model: {e}")
 else:
+    print("Model file not found. Chatbot will return placeholder responses.")
+# ─── Chat Function ───────────────────────────────────────────────────
 def chat(message, history):
     if llm is None:
+        return (
+            "Model is still loading or unavailable. "
+            "Please wait a moment and try again, or check the Space logs."
+        )
+    # Build context from last 5 turns
     context = ""
+    for user_msg, bot_msg in history[-5:]:
         context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
+    prompt = (
+        f"[INST] <<SYS>>\nYou are a helpful, respectful AI assistant.\n<</SYS>>\n\n"
+        f"{context}[INST] {message} [/INST]"
+    )
+    output = llm(
+        prompt,
+        max_tokens=512,
+        stop=["[/INST]", "</s>", "User:"],
+        echo=False,
+    )
+    return output["choices"][0]["text"].strip()
+# ─── Gradio UI ───────────────────────────────────────────────────────
 demo = gr.ChatInterface(
     fn=chat,
     title="Llama-2-7B Chatbot",
     description=(
+        "**Offline AI chatbot** powered by Llama-2-7B (GGMLv3 Q2_K quantized).\n\n"
+        "Model is downloaded automatically from Hugging Face on startup (~2.7 GB). "
+        "First load may take a few minutes."
     ),
     theme=gr.themes.Soft(
         primary_hue="blue",