LINE-ROBOT

Sleeping

App Files Files Community

cwadayi commited on Aug 16, 2025

Commit

dfdd885

verified ·

1 Parent(s): 6212ae6

Update ai_service.py

Browse files

Files changed (1) hide show

ai_service.py +77 -82

ai_service.py CHANGED Viewed

@@ -1,82 +1,77 @@
-# ai_service.py
-import os
-from config import (
-    LLM_DEVICE, LLM_THREADS, LLM_MODEL, TRANSFORMERS_CACHE,
-    LLM_MAX_NEW_TOKENS, LLM_TOP_K, LLM_TEMPERATURE
-)
-# 用於延遲載入語言模型的字典
-_LLM = {"loaded": False, "ok": False, "err": None, "model": None, "tokenizer": None, "device": "cpu"}
-def _ensure_llm():
-    """在首次使用時載入 AI 模型與分詞器。"""
-    if _LLM["loaded"]:
-        return _LLM["ok"], _LLM["err"]
-    _LLM["loaded"] = True
-    try:
-        import torch
-        from transformers import AutoTokenizer, AutoModelForCausalLM
-        device = LLM_DEVICE
-        if device not in ("cuda", "cpu"):
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            torch.set_num_threads(max(1, int(LLM_THREADS)))
-        tok = AutoTokenizer.from_pretrained(LLM_MODEL, cache_dir=TRANSFORMERS_CACHE)
-        mdl = AutoModelForCausalLM.from_pretrained(LLM_MODEL, cache_dir=TRANSFORMERS_CACHE)
-        try:
-            mdl = mdl.to(device)
-        except Exception:
-            device = "cpu"
-            mdl = mdl.to(device)
-        _LLM.update({"ok": True, "model": mdl, "tokenizer": tok, "device": device})
-        return True, None
-    except Exception as e:
-        _LLM["err"] = f"{e}"
-        _LLM["ok"] = False
-        return False, _LLM["err"]
-def generate_ai_text(user_prompt: str) -> str:
-    """使用已載入的 AI 模型生成文字回應。"""
-    ok, err = _ensure_llm()
-    if not ok:
-        return (
-            "🤖 AI 尚未啟用：缺少依賴或模型未下載。\n"
-            "請在 requirements.txt 加入 transformers、torch、accelerate、safetensors 等。\n"
-            f"詳細錯誤：{err}"
-        )
-    import torch
-    tok = _LLM["tokenizer"]
-    mdl = _LLM["model"]
-    device = _LLM["device"]
-    sys_prefix = (
-        "你是一個地震資訊與一般問答的 LINE 助理。回答要精簡、清楚；"
-        "若與地震相關可加入注意事項；若無關則一般回覆。\n\n使用者："
-    )
-    prompt = sys_prefix + user_prompt
-    try:
-        inputs = tok(prompt, return_tensors="pt").to(device)
-        with torch.no_grad():
-            output = mdl.generate(
-                input_ids=inputs["input_ids"],
-                attention_mask=inputs.get("attention_mask"),
-                max_new_tokens=LLM_MAX_NEW_TOKENS,
-                do_sample=True,
-                top_k=LLM_TOP_K,
-                temperature=LLM_TEMPERATURE,
-                pad_token_id=tok.eos_token_id,
-            )
-        text = tok.decode(output[0], skip_special_tokens=True)
-        if sys_prefix in text:
-            text = text.split(sys_prefix, 1)[-1]
-        if user_prompt in text:
-            text = text.split(user_prompt, 1)[-1].strip()
-        return (text or "（沒有產生內容）")[:1200]
-    except Exception as e:
-        return f"AI 產生發生錯誤：{e}"

+# ai_service.py
+from config import (
+    HUGGING_FACE_TOKEN, LLM_MODEL, LLM_MAX_NEW_TOKENS,
+    LLM_TOP_K, LLM_TEMPERATURE
+)
+# 用於延遲載入語言模型的字典，現在只儲存 pipeline 物件
+_LLM = {"loaded": False, "ok": False, "err": None, "model": None}
+def _ensure_llm():
+    """在首次使用時載入 AI 模型。"""
+    if _LLM["loaded"]:
+        return _LLM["ok"], _LLM["err"]
+    _LLM["loaded"] = True
+    # 檢查 HF Token 是否存在
+    if not HUGGING_FACE_TOKEN:
+        _LLM["err"] = "HUGGING_FACE_TOKEN secret not set in deployment environment."
+        _LLM["ok"] = False
+        return False, _LLM["err"]
+    try:
+        import torch
+        from transformers import pipeline
+        from huggingface_hub import login
+        # 使用 Token 登入
+        login(token=HUGGING_FACE_TOKEN)
+        # 建立 text-generation pipeline
+        # device_map="auto" 會自動使用 GPU (如果可用)
+        pipe = pipeline(
+            "text-generation",
+            model=LLM_MODEL,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+        )
+        _LLM.update({"ok": True, "model": pipe})
+        return True, None
+    except Exception as e:
+        # 如果 Token 無效或未同意模型條款，會在此處拋出錯誤
+        _LLM["err"] = f"{e}"
+        _LLM["ok"] = False
+        return False, _LLM["err"]
+def generate_ai_text(user_prompt: str) -> str:
+    """使用已載入的 Gemma 模型生成文字回應。"""
+    ok, err = _ensure_llm()
+    if not ok:
+        return (
+            "🤖 AI 模型無法使用。\n"
+            "可能原因：\n"
+            "1. 未在 Hugging Face Spaces 設定名為 HUGGING_FACE_TOKEN 的 Secret。\n"
+            "2. 尚未在 Hugging Face 網站上同意 gemma-2b-it 模型的使用條款。\n"
+            f"\n詳細錯誤：{err}"
+        )
+    pipe = _LLM["model"]
+    # 建立符合 Gemma 指令微調模型的 Prompt 格式
+    prompt = f"<start_of_turn>user\n你是一個多功能的台灣在地LINE助理，請用繁體中文簡潔有力地回答問題。{user_prompt}<end_of_turn>\n<start_of_turn>model\n"
+    try:
+        outputs = pipe(
+            prompt,
+            max_new_tokens=LLM_MAX_NEW_TOKENS,
+            do_sample=True,
+            temperature=LLM_TEMPERATURE,
+            top_k=LLM_TOP_K,
+            top_p=0.95,
+        )
+        # 從 pipeline 的輸出中解析出模型生成的部分
+        full_text = outputs[0]["generated_text"]
+        response = full_text.split("<start_of_turn>model\n")[-1]
+        return response.strip() or "（AI 沒有產生任何內容）"
+    except Exception as e:
+        return f"AI 產生內容時發生錯誤：{e}"