d-e-e-k-11 commited on
Commit
c55e854
Β·
verified Β·
1 Parent(s): 13470f7

Fix: auto-download model from HF model repo at startup

Browse files
Files changed (1) hide show
  1. app.py +50 -21
app.py CHANGED
@@ -1,40 +1,69 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
 
3
  import os
4
 
5
- MODEL_PATH = "llama-2-7b-chat.ggmlv3.q2_K.bin"
 
 
6
 
7
- # Load model at startup
8
- print("Loading Llama-2 model...")
9
  llm = None
10
- if os.path.exists(MODEL_PATH):
11
- llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=4, verbose=False)
12
- print("Model loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  else:
14
- print(f"Model not found at {MODEL_PATH}. Upload the model file to the Space.")
15
 
 
16
  def chat(message, history):
17
  if llm is None:
18
- return "Model not loaded. Please upload 'llama-2-7b-chat.ggmlv3.q2_K.bin' to this Space."
19
-
20
- # Build conversation context from history
 
 
 
21
  context = ""
22
- for user_msg, bot_msg in history[-5:]: # use last 5 turns
23
  context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
24
-
25
- prompt = f"[INST] <<SYS>>\nYou are a helpful AI assistant.\n<</SYS>>\n\n{context}[INST] {message} [/INST]"
26
-
27
- output = llm(prompt, max_tokens=512, stop=["[/INST]", "</s>"], echo=False)
28
- response = output["choices"][0]["text"].strip()
29
- return response
30
-
31
- # Gradio chat interface
 
 
 
 
 
 
 
32
  demo = gr.ChatInterface(
33
  fn=chat,
34
  title="Llama-2-7B Chatbot",
35
  description=(
36
- "An offline AI chatbot powered by **Llama-2-7B** (GGMLv3 Q2_K quantized).\n\n"
37
- "> Note: The model file `llama-2-7b-chat.ggmlv3.q2_K.bin` must be uploaded to the Space files."
 
38
  ),
39
  theme=gr.themes.Soft(
40
  primary_hue="blue",
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
  import os
5
 
6
+ MODEL_REPO = "d-e-e-k-11/llama-2-7b-chat-ggml"
7
+ MODEL_FILE = "llama-2-7b-chat.ggmlv3.q2_K.bin"
8
+ LOCAL_PATH = "/tmp/llama-model.bin"
9
 
10
+ # ─── Load Model ──────────────────────────────────────────────────────
 
11
  llm = None
12
+ print("Checking for model...")
13
+
14
+ if not os.path.exists(LOCAL_PATH):
15
+ print(f"Downloading model from {MODEL_REPO} ...")
16
+ try:
17
+ cached = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
18
+ os.symlink(cached, LOCAL_PATH)
19
+ print("Model downloaded via hf_hub_download.")
20
+ except Exception as e:
21
+ print(f"Download failed: {e}")
22
+
23
+ if os.path.exists(LOCAL_PATH):
24
+ print("Loading Llama-2 model into memory...")
25
+ try:
26
+ llm = Llama(model_path=LOCAL_PATH, n_ctx=2048, n_threads=4, verbose=False)
27
+ print("Model ready!")
28
+ except Exception as e:
29
+ print(f"Failed to load model: {e}")
30
  else:
31
+ print("Model file not found. Chatbot will return placeholder responses.")
32
 
33
+ # ─── Chat Function ───────────────────────────────────────────────────
34
  def chat(message, history):
35
  if llm is None:
36
+ return (
37
+ "Model is still loading or unavailable. "
38
+ "Please wait a moment and try again, or check the Space logs."
39
+ )
40
+
41
+ # Build context from last 5 turns
42
  context = ""
43
+ for user_msg, bot_msg in history[-5:]:
44
  context += f"[INST] {user_msg} [/INST] {bot_msg} </s>"
45
+
46
+ prompt = (
47
+ f"[INST] <<SYS>>\nYou are a helpful, respectful AI assistant.\n<</SYS>>\n\n"
48
+ f"{context}[INST] {message} [/INST]"
49
+ )
50
+
51
+ output = llm(
52
+ prompt,
53
+ max_tokens=512,
54
+ stop=["[/INST]", "</s>", "User:"],
55
+ echo=False,
56
+ )
57
+ return output["choices"][0]["text"].strip()
58
+
59
+ # ─── Gradio UI ───────────────────────────────────────────────────────
60
  demo = gr.ChatInterface(
61
  fn=chat,
62
  title="Llama-2-7B Chatbot",
63
  description=(
64
+ "**Offline AI chatbot** powered by Llama-2-7B (GGMLv3 Q2_K quantized).\n\n"
65
+ "Model is downloaded automatically from Hugging Face on startup (~2.7 GB). "
66
+ "First load may take a few minutes."
67
  ),
68
  theme=gr.themes.Soft(
69
  primary_hue="blue",