cwadayi commited on
Commit
dfdd885
·
verified ·
1 Parent(s): 6212ae6

Update ai_service.py

Browse files
Files changed (1) hide show
  1. ai_service.py +77 -82
ai_service.py CHANGED
@@ -1,82 +1,77 @@
1
- # ai_service.py
2
- import os
3
- from config import (
4
- LLM_DEVICE, LLM_THREADS, LLM_MODEL, TRANSFORMERS_CACHE,
5
- LLM_MAX_NEW_TOKENS, LLM_TOP_K, LLM_TEMPERATURE
6
- )
7
-
8
- # 用於延遲載入語言模型的字典
9
- _LLM = {"loaded": False, "ok": False, "err": None, "model": None, "tokenizer": None, "device": "cpu"}
10
-
11
- def _ensure_llm():
12
- """在首次使用時載入 AI 模型與分詞器。"""
13
- if _LLM["loaded"]:
14
- return _LLM["ok"], _LLM["err"]
15
-
16
- _LLM["loaded"] = True
17
- try:
18
- import torch
19
- from transformers import AutoTokenizer, AutoModelForCausalLM
20
-
21
- device = LLM_DEVICE
22
- if device not in ("cuda", "cpu"):
23
- device = "cuda" if torch.cuda.is_available() else "cpu"
24
- torch.set_num_threads(max(1, int(LLM_THREADS)))
25
-
26
- tok = AutoTokenizer.from_pretrained(LLM_MODEL, cache_dir=TRANSFORMERS_CACHE)
27
- mdl = AutoModelForCausalLM.from_pretrained(LLM_MODEL, cache_dir=TRANSFORMERS_CACHE)
28
-
29
- try:
30
- mdl = mdl.to(device)
31
- except Exception:
32
- device = "cpu"
33
- mdl = mdl.to(device)
34
-
35
- _LLM.update({"ok": True, "model": mdl, "tokenizer": tok, "device": device})
36
- return True, None
37
- except Exception as e:
38
- _LLM["err"] = f"{e}"
39
- _LLM["ok"] = False
40
- return False, _LLM["err"]
41
-
42
- def generate_ai_text(user_prompt: str) -> str:
43
- """使用已載入的 AI 模型生成文字回應。"""
44
- ok, err = _ensure_llm()
45
- if not ok:
46
- return (
47
- "🤖 AI 尚未啟用:缺少依賴或模型未下載。\n"
48
- "請在 requirements.txt 加入 transformers、torch、accelerate、safetensors 等。\n"
49
- f"詳細錯誤:{err}"
50
- )
51
-
52
- import torch
53
- tok = _LLM["tokenizer"]
54
- mdl = _LLM["model"]
55
- device = _LLM["device"]
56
-
57
- sys_prefix = (
58
- "你是一個地震資訊與一般問答的 LINE 助理。回答要精簡、清楚;"
59
- "若與地震相關可加入注意事項;若無關則一般回覆。\n\n使用者:"
60
- )
61
- prompt = sys_prefix + user_prompt
62
-
63
- try:
64
- inputs = tok(prompt, return_tensors="pt").to(device)
65
- with torch.no_grad():
66
- output = mdl.generate(
67
- input_ids=inputs["input_ids"],
68
- attention_mask=inputs.get("attention_mask"),
69
- max_new_tokens=LLM_MAX_NEW_TOKENS,
70
- do_sample=True,
71
- top_k=LLM_TOP_K,
72
- temperature=LLM_TEMPERATURE,
73
- pad_token_id=tok.eos_token_id,
74
- )
75
- text = tok.decode(output[0], skip_special_tokens=True)
76
- if sys_prefix in text:
77
- text = text.split(sys_prefix, 1)[-1]
78
- if user_prompt in text:
79
- text = text.split(user_prompt, 1)[-1].strip()
80
- return (text or "(沒有產生內容)")[:1200]
81
- except Exception as e:
82
- return f"AI 產生發生錯誤:{e}"
 
1
+ # ai_service.py
2
+ from config import (
3
+ HUGGING_FACE_TOKEN, LLM_MODEL, LLM_MAX_NEW_TOKENS,
4
+ LLM_TOP_K, LLM_TEMPERATURE
5
+ )
6
+
7
+ # 用於延遲載入語言模型的字典,現在只儲存 pipeline 物件
8
+ _LLM = {"loaded": False, "ok": False, "err": None, "model": None}
9
+
10
+ def _ensure_llm():
11
+ """在首次使用時載入 AI 模型。"""
12
+ if _LLM["loaded"]:
13
+ return _LLM["ok"], _LLM["err"]
14
+ _LLM["loaded"] = True
15
+
16
+ # 檢查 HF Token 是否存在
17
+ if not HUGGING_FACE_TOKEN:
18
+ _LLM["err"] = "HUGGING_FACE_TOKEN secret not set in deployment environment."
19
+ _LLM["ok"] = False
20
+ return False, _LLM["err"]
21
+
22
+ try:
23
+ import torch
24
+ from transformers import pipeline
25
+ from huggingface_hub import login
26
+
27
+ # 使用 Token 登入
28
+ login(token=HUGGING_FACE_TOKEN)
29
+
30
+ # 建立 text-generation pipeline
31
+ # device_map="auto" 會自動使用 GPU (如果可用)
32
+ pipe = pipeline(
33
+ "text-generation",
34
+ model=LLM_MODEL,
35
+ torch_dtype=torch.bfloat16,
36
+ device_map="auto",
37
+ )
38
+ _LLM.update({"ok": True, "model": pipe})
39
+ return True, None
40
+ except Exception as e:
41
+ # 如果 Token 無效或未同意模型條款,會在此處拋出錯誤
42
+ _LLM["err"] = f"{e}"
43
+ _LLM["ok"] = False
44
+ return False, _LLM["err"]
45
+
46
+ def generate_ai_text(user_prompt: str) -> str:
47
+ """使用已載入的 Gemma 模型生成文字回應。"""
48
+ ok, err = _ensure_llm()
49
+ if not ok:
50
+ return (
51
+ "🤖 AI 模型無法使用。\n"
52
+ "可能原因:\n"
53
+ "1. 未在 Hugging Face Spaces 設定名為 HUGGING_FACE_TOKEN 的 Secret。\n"
54
+ "2. 尚未在 Hugging Face 網站上同意 gemma-2b-it 模型的使用條款。\n"
55
+ f"\n詳細錯誤:{err}"
56
+ )
57
+
58
+ pipe = _LLM["model"]
59
+
60
+ # 建立符合 Gemma 指令微調模型的 Prompt 格式
61
+ prompt = f"<start_of_turn>user\n你是一個多功能的台灣在地LINE助理,請用繁體中文簡潔有力地回答問題。{user_prompt}<end_of_turn>\n<start_of_turn>model\n"
62
+
63
+ try:
64
+ outputs = pipe(
65
+ prompt,
66
+ max_new_tokens=LLM_MAX_NEW_TOKENS,
67
+ do_sample=True,
68
+ temperature=LLM_TEMPERATURE,
69
+ top_k=LLM_TOP_K,
70
+ top_p=0.95,
71
+ )
72
+ # 從 pipeline 的輸出中解析出模型生成的部分
73
+ full_text = outputs[0]["generated_text"]
74
+ response = full_text.split("<start_of_turn>model\n")[-1]
75
+ return response.strip() or "(AI 沒有產生任何內容)"
76
+ except Exception as e:
77
+ return f"AI 產生內容時發生錯誤:{e}"