Znilsson commited on
Commit
44522b6
Β·
verified Β·
1 Parent(s): 07229b0

Initial SurvivalAI Pro deploy

Browse files
Files changed (1) hide show
  1. app.py +128 -55
app.py CHANGED
@@ -1,69 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
-
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
24
 
25
- response = ""
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
 
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
62
- with gr.Blocks() as demo:
63
- with gr.Sidebar():
64
- gr.LoginButton()
65
- chatbot.render()
 
 
 
 
66
 
67
 
68
  if __name__ == "__main__":
69
- demo.launch()
 
1
+ """
2
+ SurvivalAI Pro β€” HF Space chat interface.
3
+
4
+ Runs the V1 LoRA-finetuned Phi-3 GGUF (Q4_K_M) locally inside the Space via
5
+ llama-cpp-python. The Space is CPU-only; on paid CPU Upgrade hardware we get
6
+ ~6-10 tokens/sec. The GGUF is fetched from a separate HF model repo at cold
7
+ start because the 2.4 GB file exceeds Space repo limits.
8
+ """
9
+
10
+ import os
11
+ from pathlib import Path
12
+
13
  import gradio as gr
14
+ from huggingface_hub import hf_hub_download
15
+ from llama_cpp import Llama
16
+
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # ── Config ───────────────────────────────────────────────────────────────────
19
+ MODEL_REPO = "Znilsson/survivalai-phi3-gguf" # private model repo
20
+ MODEL_FILENAME = "survivalai-phi3-Q4_K_M.gguf"
21
+ N_CTX = 4096
22
+ N_THREADS = int(os.environ.get("N_THREADS", "4"))
23
+ N_BATCH = 256
24
+ MAX_TOKENS = 400
25
+ TEMPERATURE = 0.7
26
+ TOP_P = 0.9
27
 
28
+ SYSTEM_MSG = (
29
+ "You are SurvivalAI, an expert survival and civilizational knowledge "
30
+ "assistant. You provide accurate, practical, and potentially life-saving "
31
+ "information about wilderness survival, emergency preparedness, first aid, "
32
+ "food procurement, water purification, shelter construction, navigation, "
33
+ "and rebuilding civilization. Your responses are clear, actionable, and "
34
+ "thorough. The user is in an off-grid context β€” assume no doctor, no "
35
+ "Poison Control, no internet, no professional help is available. Give "
36
+ "the best answer you can with the knowledge you have."
37
+ )
38
 
39
+ # Phi-3 chat template
40
+ PHI3_TMPL = "<|user|>\n{user}<|end|>\n<|assistant|>\n"
41
+ STOP_TOKENS = ["<|end|>", "<|user|>", "<|endoftext|>"]
42
 
 
43
 
44
+ # ── Model download + load (cold start) ───────────────────────────────────────
45
+ print(f"Fetching {MODEL_FILENAME} from {MODEL_REPO}...")
46
+ model_path = hf_hub_download(
47
+ repo_id = MODEL_REPO,
48
+ filename = MODEL_FILENAME,
49
+ token = os.environ.get("HF_TOKEN"), # required if repo is private
50
+ cache_dir = "/data" if Path("/data").exists() else None,
51
+ )
52
+ print(f"Model file: {model_path}")
 
 
53
 
54
+ print(f"Loading Llama (n_ctx={N_CTX}, n_threads={N_THREADS})...")
55
+ llm = Llama(
56
+ model_path = model_path,
57
+ n_ctx = N_CTX,
58
+ n_threads = N_THREADS,
59
+ n_batch = N_BATCH,
60
+ verbose = False,
61
+ )
62
+ print("Model loaded. Ready.")
63
 
64
 
65
+ # ── Chat function ────────────────────────────────────────────────────────────
66
+ def build_prompt(history, user_msg):
67
+ """Build a Phi-3 prompt incorporating system message + chat history.
68
+
69
+ Phi-3 chat template uses <|system|>, <|user|>, <|assistant|>, <|end|>.
70
+ We collapse the system message into the first user turn for simplicity
71
+ (this is the same approach used during training/eval).
72
+ """
73
+ parts = []
74
+ # Embed system msg as a preamble inside the first user turn so behavior
75
+ # matches what the eval rubric saw during training.
76
+ if not history:
77
+ first_user = f"{SYSTEM_MSG}\n\nQuestion: {user_msg}"
78
+ parts.append(PHI3_TMPL.format(user=first_user).rstrip("\n"))
79
+ else:
80
+ # Replay history
81
+ for i, (u, a) in enumerate(history):
82
+ if i == 0:
83
+ u = f"{SYSTEM_MSG}\n\nQuestion: {u}"
84
+ parts.append(f"<|user|>\n{u}<|end|>\n<|assistant|>\n{a}<|end|>")
85
+ # Add current turn
86
+ parts.append(f"<|user|>\n{user_msg}<|end|>\n<|assistant|>\n")
87
+ return "\n".join(parts)
88
+
89
+
90
+ def chat_fn(message, history):
91
+ """Generator: yields incremental partial responses for streaming UI."""
92
+ prompt = build_prompt(history, message)
93
+ accum = ""
94
+ try:
95
+ for chunk in llm(
96
+ prompt,
97
+ max_tokens = MAX_TOKENS,
98
+ temperature = TEMPERATURE,
99
+ top_p = TOP_P,
100
+ stop = STOP_TOKENS,
101
+ stream = True,
102
+ ):
103
+ tok = chunk["choices"][0]["text"]
104
+ accum += tok
105
+ yield accum
106
+ except Exception as e:
107
+ yield f"[ERROR: {e}]"
108
+
109
+
110
+ # ── UI ───────────────────────────────────────────────────────────────────────
111
+ EXAMPLES = [
112
+ "I cut my leg badly with an axe in the woods. Walk me through what to do.",
113
+ "How do I find drinkable water if I'm stuck in a forest with no supplies?",
114
+ "It's getting dark and dropping below freezing. How do I build a shelter from what's around?",
115
+ "What edible plants are common in temperate North American forests?",
116
+ "I need to navigate without a compass. How do I find north?",
117
+ ]
118
+
119
+ DESCRIPTION = """
120
+ **SurvivalAI Pro** β€” fine-tuned off-grid survival assistant, running fully on CPU inside this Space.
121
+
122
+ Built on Phi-3-mini-4k-instruct, fine-tuned on ~150,000 survival-knowledge Q/A pairs covering medical
123
+ first aid, water, food, shelter, fire, navigation, signaling, foraging, hunting, and tools.
124
+
125
+ ⚠️ **Prototype β€” not for clinical or life-critical use.** This model can produce confident-sounding
126
+ but incorrect specifics for trap categories like exact drug dosages or precise frequencies. For
127
+ survival-skill guidance it scores well; for precise numerical specifics, double-check with an
128
+ authoritative reference.
129
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ demo = gr.ChatInterface(
132
+ fn = chat_fn,
133
+ title = "πŸ•οΈ SurvivalAI Pro",
134
+ description = DESCRIPTION,
135
+ examples = EXAMPLES,
136
+ cache_examples = False,
137
+ theme = gr.themes.Soft(),
138
+ )
139
 
140
 
141
  if __name__ == "__main__":
142
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)