Update app.py
Browse files
app.py
CHANGED
|
@@ -1255,16 +1255,10 @@ class PromptCompiler:
|
|
| 1255 |
return f"{head} {insight}\n\nUser: {final_instruction}\nAssistant:"
|
| 1256 |
|
| 1257 |
class Hive:
|
| 1258 |
-
def __init__(self, model_id: Optional[str]=None, device: Optional[str]=None, caps: Optional[Dict]=None
|
| 1259 |
self.caps = caps or probe_caps()
|
| 1260 |
-
self.
|
| 1261 |
-
|
| 1262 |
-
if not self.lite_mode:
|
| 1263 |
-
self.store=CurveStore(CFG["CURVE_DIR"]); self.librarian=LibrarianCurve(self.store)
|
| 1264 |
-
self.engine=EngineCurve()
|
| 1265 |
-
self.overlay=RuntimeOverlay()
|
| 1266 |
-
self.changes=ChangeManager(Hive)
|
| 1267 |
-
self.compiler=PromptCompiler()
|
| 1268 |
if not model_id:
|
| 1269 |
model_id, info = pick_model(self.caps)
|
| 1270 |
device = info.get("device","cpu")
|
|
@@ -1289,10 +1283,11 @@ class Hive:
|
|
| 1289 |
self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=trust, **kwargs)
|
| 1290 |
self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tok, device=0 if (torch and torch.cuda.is_available() and device=="cuda") else -1, return_full_text=False)
|
| 1291 |
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
|
|
|
| 1296 |
|
| 1297 |
def summarize_for_memory(self, text:str, max_new_tokens:int=160)->str:
|
| 1298 |
prompt=("Condense the following content into 4–6 bullet points with names, dates, numbers, and a one-line takeaway. Keep it factual.\n\n"
|
|
@@ -1301,11 +1296,9 @@ class Hive:
|
|
| 1301 |
return out[0]["generated_text"].split("Summary:",1)[-1].strip()
|
| 1302 |
|
| 1303 |
def add_curve(self, text:str, meta:Dict, scope:str="general"):
|
| 1304 |
-
if self.lite_mode: return
|
| 1305 |
self.librarian.ingest_pairs([text],[meta],scope)
|
| 1306 |
|
| 1307 |
def online_update(self, query_hint: Optional[str]=None)->Dict:
|
| 1308 |
-
if self.lite_mode: return {"ok":False, "reason":"lite mode"}
|
| 1309 |
if not CFG["ONLINE_ENABLE"]: return {"ok":False,"reason":"online disabled"}
|
| 1310 |
if not online_available(int(CFG["ONLINE_TIMEOUT"])): return {"ok":False,"reason":"offline"}
|
| 1311 |
seen=_load_json(ONLINE_DB, {})
|
|
@@ -1322,7 +1315,6 @@ class Hive:
|
|
| 1322 |
_save_json(ONLINE_DB, seen); return {"ok":True,"added":added}
|
| 1323 |
|
| 1324 |
def web_update_and_store(self, query:str, max_docs:int, timeout:int)->int:
|
| 1325 |
-
if self.lite_mode: return 0
|
| 1326 |
if not (CFG["ONLINE_ENABLE"] and online_available(timeout)): return 0
|
| 1327 |
hits=web_search_snippets(query, max_results=max_docs, timeout=timeout); added=0
|
| 1328 |
for h in hits:
|
|
@@ -1335,23 +1327,16 @@ class Hive:
|
|
| 1335 |
|
| 1336 |
def chat(self, message:str, effective_role:str, caller_id: Optional[str],
|
| 1337 |
k:int=None, max_new_tokens:int=256, temperature:float=None, prompt_override: Optional[str] = None) -> str: # type: ignore
|
| 1338 |
-
|
| 1339 |
-
if self.lite_mode:
|
| 1340 |
-
# In lite mode, we bypass all complex logic and just chat.
|
| 1341 |
-
prompt = f"User: {message}\nAssistant:"
|
| 1342 |
-
temp = temperature if temperature is not None else 0.7
|
| 1343 |
-
out = self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temp)
|
| 1344 |
-
return out[0]["generated_text"].strip()
|
| 1345 |
-
|
| 1346 |
-
online_now = NET.online_quick()
|
| 1347 |
if not online_now: NET.kick_async()
|
| 1348 |
kk = k if k is not None else self.retrieval_k
|
| 1349 |
-
temp = temperature if temperature is not None else self.decoding_temperature
|
| 1350 |
|
| 1351 |
user_obj, _ = _find_user(_load_users(), caller_id)
|
| 1352 |
user_prefs = (user_obj.get("prefs", {}) or {}) if user_obj else {}
|
| 1353 |
user_lang = user_prefs.get("language", "en")
|
| 1354 |
phonics_on = user_prefs.get("phonics_on", False)
|
|
|
|
| 1355 |
intent = self.engine.choose_route(message)
|
| 1356 |
final_message = message
|
| 1357 |
|
|
@@ -1363,7 +1348,7 @@ class Hive:
|
|
| 1363 |
final_message = f"Explain how to pronounce the word '{word_to_process}'. Use this phonics hint in your explanation: {phonics_hint}"
|
| 1364 |
elif prompt_override:
|
| 1365 |
final_message = f"{prompt_override}\n\nHere is the text to work on:\n{message}"
|
| 1366 |
-
if "review" in prompt_override.lower() or "essay" in prompt_override.lower(): intent = "essay_review"
|
| 1367 |
|
| 1368 |
snippets, scores = self.librarian.retrieve_scoped_with_scores(message, effective_role, caller_id, k=kk)
|
| 1369 |
cov=coverage_score_from_snippets(snippets, scores)
|
|
@@ -1375,7 +1360,7 @@ class Hive:
|
|
| 1375 |
except Exception:
|
| 1376 |
pass
|
| 1377 |
prompt=self.compiler.compile(final_message, snippets, token_budget=int(CFG["CTX_TOKENS"]), intent=intent, user_lang=user_lang)
|
| 1378 |
-
_=self.engine.run(message, snippets)
|
| 1379 |
out=self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temp)
|
| 1380 |
reply=out[0]["generated_text"].strip()
|
| 1381 |
if CFG["NO_PROFANITY"]:
|
|
@@ -1405,12 +1390,20 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1405 |
# Lazily initialize a global Hive instance to be shared across UI callbacks
|
| 1406 |
HIVE_INSTANCE: Optional[Hive] = None
|
| 1407 |
def get_hive_instance():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1408 |
nonlocal HIVE_INSTANCE
|
| 1409 |
-
if
|
| 1410 |
-
|
| 1411 |
-
bootstrap_instance.
|
| 1412 |
-
|
| 1413 |
-
|
|
|
|
|
|
|
|
|
|
| 1414 |
return HIVE_INSTANCE
|
| 1415 |
|
| 1416 |
with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
|
|
@@ -1439,12 +1432,12 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1439 |
mode_picker.change(set_mode, [role_state, mode_picker], [mode_state])
|
| 1440 |
|
| 1441 |
with gr.Tab("Hive"):
|
| 1442 |
-
core_status = gr.Markdown("⏳ **Initializing Hive Core...**
|
| 1443 |
chat=gr.Chatbot(height=420)
|
| 1444 |
-
msg=gr.Textbox(placeholder="
|
| 1445 |
|
| 1446 |
def talk(m, uid, role, mode, hist):
|
| 1447 |
-
hive_instance = get_hive_instance()
|
| 1448 |
eff = role if mode=="admin" else "user"
|
| 1449 |
|
| 1450 |
# --- Tutor Intent Routing ---
|
|
@@ -1460,21 +1453,26 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1460 |
|
| 1461 |
reply=hive_instance.chat(m or "", effective_role=eff, caller_id=uid, prompt_override=prompt_override, max_new_tokens=max_tokens)
|
| 1462 |
|
| 1463 |
-
# privacy routing
|
| 1464 |
-
|
| 1465 |
-
|
| 1466 |
-
|
| 1467 |
-
|
| 1468 |
-
|
|
|
|
| 1469 |
return hist+[[m, reply]], ""
|
| 1470 |
msg.submit(talk,[msg,uid_state,role_state,mode_state,chat],[chat,msg])
|
| 1471 |
|
| 1472 |
with gr.Accordion("Tools & Settings", open=False):
|
| 1473 |
# This function will run on UI load, wait for the core, and then update the UI.
|
| 1474 |
def wait_for_hive_core():
|
| 1475 |
-
|
|
|
|
|
|
|
|
|
|
| 1476 |
ready_placeholder = f"Talk to {CFG['AGENT_NAME']}"
|
| 1477 |
-
|
|
|
|
| 1478 |
demo.load(wait_for_hive_core, [], [core_status, msg])
|
| 1479 |
|
| 1480 |
with gr.Row():
|
|
@@ -1756,6 +1754,7 @@ class Bootstrap:
|
|
| 1756 |
self.config = config
|
| 1757 |
self.caps: Optional[Dict] = None
|
| 1758 |
self.hive_instance: Optional[Hive] = None
|
|
|
|
| 1759 |
self.hive_ready = threading.Event()
|
| 1760 |
|
| 1761 |
def run(self):
|
|
@@ -1764,12 +1763,19 @@ class Bootstrap:
|
|
| 1764 |
self.caps = probe_caps()
|
| 1765 |
print(f"[Bootstrap] System capabilities: {self.caps}")
|
| 1766 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1767 |
# Launch UI immediately, it will wait for the hive_ready event
|
| 1768 |
ui_thread = threading.Thread(target=self.launch, daemon=True)
|
| 1769 |
ui_thread.start()
|
| 1770 |
|
| 1771 |
print("[Bootstrap] Initializing Hive core in background...")
|
| 1772 |
-
|
|
|
|
|
|
|
| 1773 |
self.hive_ready.set() # Signal that the Hive instance is ready
|
| 1774 |
print("[Bootstrap] Hive core is ready.")
|
| 1775 |
|
|
@@ -1805,7 +1811,8 @@ class Bootstrap:
|
|
| 1805 |
self.run_cli_loop()
|
| 1806 |
|
| 1807 |
def run_cli_loop(self):
|
| 1808 |
-
"""Runs a command-line interface loop for Hive."""
|
|
|
|
| 1809 |
print("Hive is ready. Type a message and press Enter (Ctrl+C to exit).")
|
| 1810 |
try:
|
| 1811 |
while True:
|
|
|
|
| 1255 |
return f"{head} {insight}\n\nUser: {final_instruction}\nAssistant:"
|
| 1256 |
|
| 1257 |
class Hive:
|
| 1258 |
+
def __init__(self, model_id: Optional[str]=None, device: Optional[str]=None, caps: Optional[Dict]=None): # type: ignore
|
| 1259 |
self.caps = caps or probe_caps()
|
| 1260 |
+
self.store=CurveStore(CFG["CURVE_DIR"]); self.librarian=LibrarianCurve(self.store)
|
| 1261 |
+
self.compiler=PromptCompiler(); self.engine=EngineCurve()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1262 |
if not model_id:
|
| 1263 |
model_id, info = pick_model(self.caps)
|
| 1264 |
device = info.get("device","cpu")
|
|
|
|
| 1283 |
self.model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=trust, **kwargs)
|
| 1284 |
self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tok, device=0 if (torch and torch.cuda.is_available() and device=="cuda") else -1, return_full_text=False)
|
| 1285 |
|
| 1286 |
+
self.overlay=RuntimeOverlay()
|
| 1287 |
+
self.retrieval_k=6; self.decoding_temperature=0.7; self.web_threshold=0.40
|
| 1288 |
+
self.overlay.apply_to(self)
|
| 1289 |
+
self.changes=ChangeManager(Hive)
|
| 1290 |
+
self.selfopt=SelfOptimizer(self); self.selfopt.start() # type: ignore
|
| 1291 |
|
| 1292 |
def summarize_for_memory(self, text:str, max_new_tokens:int=160)->str:
|
| 1293 |
prompt=("Condense the following content into 4–6 bullet points with names, dates, numbers, and a one-line takeaway. Keep it factual.\n\n"
|
|
|
|
| 1296 |
return out[0]["generated_text"].split("Summary:",1)[-1].strip()
|
| 1297 |
|
| 1298 |
def add_curve(self, text:str, meta:Dict, scope:str="general"):
|
|
|
|
| 1299 |
self.librarian.ingest_pairs([text],[meta],scope)
|
| 1300 |
|
| 1301 |
def online_update(self, query_hint: Optional[str]=None)->Dict:
|
|
|
|
| 1302 |
if not CFG["ONLINE_ENABLE"]: return {"ok":False,"reason":"online disabled"}
|
| 1303 |
if not online_available(int(CFG["ONLINE_TIMEOUT"])): return {"ok":False,"reason":"offline"}
|
| 1304 |
seen=_load_json(ONLINE_DB, {})
|
|
|
|
| 1315 |
_save_json(ONLINE_DB, seen); return {"ok":True,"added":added}
|
| 1316 |
|
| 1317 |
def web_update_and_store(self, query:str, max_docs:int, timeout:int)->int:
|
|
|
|
| 1318 |
if not (CFG["ONLINE_ENABLE"] and online_available(timeout)): return 0
|
| 1319 |
hits=web_search_snippets(query, max_results=max_docs, timeout=timeout); added=0
|
| 1320 |
for h in hits:
|
|
|
|
| 1327 |
|
| 1328 |
def chat(self, message:str, effective_role:str, caller_id: Optional[str],
|
| 1329 |
k:int=None, max_new_tokens:int=256, temperature:float=None, prompt_override: Optional[str] = None) -> str: # type: ignore
|
| 1330 |
+
online_now=NET.online_quick()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1331 |
if not online_now: NET.kick_async()
|
| 1332 |
kk = k if k is not None else self.retrieval_k
|
| 1333 |
+
temp = temperature if temperature is not None else self.decoding_temperature # type: ignore
|
| 1334 |
|
| 1335 |
user_obj, _ = _find_user(_load_users(), caller_id)
|
| 1336 |
user_prefs = (user_obj.get("prefs", {}) or {}) if user_obj else {}
|
| 1337 |
user_lang = user_prefs.get("language", "en")
|
| 1338 |
phonics_on = user_prefs.get("phonics_on", False)
|
| 1339 |
+
|
| 1340 |
intent = self.engine.choose_route(message)
|
| 1341 |
final_message = message
|
| 1342 |
|
|
|
|
| 1348 |
final_message = f"Explain how to pronounce the word '{word_to_process}'. Use this phonics hint in your explanation: {phonics_hint}"
|
| 1349 |
elif prompt_override:
|
| 1350 |
final_message = f"{prompt_override}\n\nHere is the text to work on:\n{message}"
|
| 1351 |
+
if "review" in prompt_override.lower() or "essay" in prompt_override.lower(): intent = "essay_review"
|
| 1352 |
|
| 1353 |
snippets, scores = self.librarian.retrieve_scoped_with_scores(message, effective_role, caller_id, k=kk)
|
| 1354 |
cov=coverage_score_from_snippets(snippets, scores)
|
|
|
|
| 1360 |
except Exception:
|
| 1361 |
pass
|
| 1362 |
prompt=self.compiler.compile(final_message, snippets, token_budget=int(CFG["CTX_TOKENS"]), intent=intent, user_lang=user_lang)
|
| 1363 |
+
_=self.engine.run(message, snippets)
|
| 1364 |
out=self.pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temp)
|
| 1365 |
reply=out[0]["generated_text"].strip()
|
| 1366 |
if CFG["NO_PROFANITY"]:
|
|
|
|
| 1390 |
# Lazily initialize a global Hive instance to be shared across UI callbacks
|
| 1391 |
HIVE_INSTANCE: Optional[Hive] = None
|
| 1392 |
def get_hive_instance():
|
| 1393 |
+
"""
|
| 1394 |
+
Returns the appropriate Hive instance.
|
| 1395 |
+
If the full instance is ready, returns it.
|
| 1396 |
+
Otherwise, returns the 'lite' instance for immediate chat.
|
| 1397 |
+
"""
|
| 1398 |
nonlocal HIVE_INSTANCE
|
| 1399 |
+
# Check if the full instance is ready without blocking
|
| 1400 |
+
if bootstrap_instance.hive_ready.is_set():
|
| 1401 |
+
if HIVE_INSTANCE is None or HIVE_INSTANCE == bootstrap_instance.hive_lite_instance:
|
| 1402 |
+
HIVE_INSTANCE = bootstrap_instance.hive_instance
|
| 1403 |
+
print("[UI] Full Hive instance attached.")
|
| 1404 |
+
elif HIVE_INSTANCE is None:
|
| 1405 |
+
HIVE_INSTANCE = bootstrap_instance.hive_lite_instance
|
| 1406 |
+
print("[UI] Lite Hive instance attached.")
|
| 1407 |
return HIVE_INSTANCE
|
| 1408 |
|
| 1409 |
with gr.Blocks(title="Hive 🐝 Full Merged Optimized") as demo:
|
|
|
|
| 1432 |
mode_picker.change(set_mode, [role_state, mode_picker], [mode_state])
|
| 1433 |
|
| 1434 |
with gr.Tab("Hive"):
|
| 1435 |
+
core_status = gr.Markdown("⏳ **Initializing Full Hive Core...** You can chat with the Lite model now. Advanced features will be enabled shortly.")
|
| 1436 |
chat=gr.Chatbot(height=420)
|
| 1437 |
+
msg=gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True)
|
| 1438 |
|
| 1439 |
def talk(m, uid, role, mode, hist):
|
| 1440 |
+
hive_instance = get_hive_instance()
|
| 1441 |
eff = role if mode=="admin" else "user"
|
| 1442 |
|
| 1443 |
# --- Tutor Intent Routing ---
|
|
|
|
| 1453 |
|
| 1454 |
reply=hive_instance.chat(m or "", effective_role=eff, caller_id=uid, prompt_override=prompt_override, max_new_tokens=max_tokens)
|
| 1455 |
|
| 1456 |
+
# In full mode, perform privacy routing and save to memory
|
| 1457 |
+
if not hive_instance.lite_mode:
|
| 1458 |
+
personal = False
|
| 1459 |
+
if re.search(r"\b(my|mine|me|I|our|we)\b", (m or ""), re.I) and re.search(r"\b(password|address|email|phone|ssn|school|kid|medical|bank|card|passport)\b", (m or ""), re.I):
|
| 1460 |
+
personal = True
|
| 1461 |
+
scope = f"user:{uid}" if (uid and personal) else "general"
|
| 1462 |
+
if hive_instance.librarian: hive_instance.librarian.ingest_pairs([m or ""],[{"dataset":"chat"}], scope=scope)
|
| 1463 |
return hist+[[m, reply]], ""
|
| 1464 |
msg.submit(talk,[msg,uid_state,role_state,mode_state,chat],[chat,msg])
|
| 1465 |
|
| 1466 |
with gr.Accordion("Tools & Settings", open=False):
|
| 1467 |
# This function will run on UI load, wait for the core, and then update the UI.
|
| 1468 |
def wait_for_hive_core():
|
| 1469 |
+
# This function now just updates the UI when the full core is ready.
|
| 1470 |
+
bootstrap_instance.hive_ready.wait()
|
| 1471 |
+
# Re-fetch instance to ensure it's the full one.
|
| 1472 |
+
get_hive_instance()
|
| 1473 |
ready_placeholder = f"Talk to {CFG['AGENT_NAME']}"
|
| 1474 |
+
# The textbox is already interactive, we just update the status and placeholder
|
| 1475 |
+
return "✅ **Full Hive Core is Ready.**", gr.Textbox(placeholder=ready_placeholder)
|
| 1476 |
demo.load(wait_for_hive_core, [], [core_status, msg])
|
| 1477 |
|
| 1478 |
with gr.Row():
|
|
|
|
| 1754 |
self.config = config
|
| 1755 |
self.caps: Optional[Dict] = None
|
| 1756 |
self.hive_instance: Optional[Hive] = None
|
| 1757 |
+
self.hive_lite_instance: Optional[Hive] = None
|
| 1758 |
self.hive_ready = threading.Event()
|
| 1759 |
|
| 1760 |
def run(self):
|
|
|
|
| 1763 |
self.caps = probe_caps()
|
| 1764 |
print(f"[Bootstrap] System capabilities: {self.caps}")
|
| 1765 |
|
| 1766 |
+
# Create a 'lite' instance immediately for basic chat
|
| 1767 |
+
print("[Bootstrap] Initializing Lite Hive core...")
|
| 1768 |
+
self.hive_lite_instance = Hive(lite=True)
|
| 1769 |
+
print("[Bootstrap] Lite Hive core is ready.")
|
| 1770 |
+
|
| 1771 |
# Launch UI immediately, it will wait for the hive_ready event
|
| 1772 |
ui_thread = threading.Thread(target=self.launch, daemon=True)
|
| 1773 |
ui_thread.start()
|
| 1774 |
|
| 1775 |
print("[Bootstrap] Initializing Hive core in background...")
|
| 1776 |
+
# Now initialize the full instance. This is the slow part.
|
| 1777 |
+
self.hive_instance = Hive(lite=False)
|
| 1778 |
+
|
| 1779 |
self.hive_ready.set() # Signal that the Hive instance is ready
|
| 1780 |
print("[Bootstrap] Hive core is ready.")
|
| 1781 |
|
|
|
|
| 1811 |
self.run_cli_loop()
|
| 1812 |
|
| 1813 |
def run_cli_loop(self):
|
| 1814 |
+
"""Runs a command-line interface loop for Hive. Waits for full init."""
|
| 1815 |
+
self.hive_ready.wait()
|
| 1816 |
print("Hive is ready. Type a message and press Enter (Ctrl+C to exit).")
|
| 1817 |
try:
|
| 1818 |
while True:
|