Spaces:

2045max
/

faq-bot

Sleeping

App Files Files Community

faq-bot / app.py

2045max

fix: drop provider='hf-inference' (auto-route works for 72B)

2d57fba verified 11 days ago

raw

history blame contribute delete

3.66 kB

	import json
	import os
	import gradio as gr
	from sentence_transformers import SentenceTransformer, util
	from huggingface_hub import InferenceClient

	print("Loading embedding model...")
	model = SentenceTransformer("BAAI/bge-small-zh-v1.5")

	print("Loading FAQs...")
	with open("faqs.json", "r", encoding="utf-8") as f:
	faqs = json.load(f)

	print(f"Encoding {len(faqs)} FAQ questions...")
	questions = [item["q"] for item in faqs]
	faq_embeddings = model.encode(questions, normalize_embeddings=True)
	print("Ready!")

	THRESHOLD = 0.55

	# --- LLM ---
	SYSTEM_PROMPT = """你是一个友好、简洁的 AI 学习答疑助手。
	规则：
	1. 严格基于"参考资料"回答，不要编造
	2. 资料里没有的内容，直接说"我暂时没这方面的资料"
	3. 用自然、口语化的中文，避免生硬复读资料原文
	4. 控制在 3 句话以内"""

	USER_PROMPT_TEMPLATE = """【参考资料】
	{context}

	【用户问题】
	{question}

	请基于资料用自然语言回答。"""

	client = InferenceClient(
	model="Qwen/Qwen2.5-72B-Instruct",
	token=os.environ.get("HF_TOKEN"),
	timeout=20,
	)


	def llm_answer(question, top_faqs):
	if not os.environ.get("HF_TOKEN"):
	return top_faqs[0]["a"] + "\n\n_(需要在 Space Secrets 设置 HF_TOKEN 以启用 LLM)_"
	context = "\n\n".join(f"Q: {f['q']}\nA: {f['a']}" for f in top_faqs)
	user_prompt = USER_PROMPT_TEMPLATE.format(context=context, question=question)
	try:
	resp = client.chat_completion(
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": user_prompt},
	],
	max_tokens=200,
	temperature=0.3,
	)
	return resp.choices[0].message.content
	except Exception as e:
	return top_faqs[0]["a"] + f"\n\n_(LLM 暂不可用：{e})_"


	# --- Gradio ---
	def chat(query):
	if not query or not query.strip():
	return "请输入您的问题", "", ""

	q_emb = model.encode(query, normalize_embeddings=True)
	scores = util.cos_sim(q_emb, faq_embeddings)[0]
	top_idx = scores.argsort(descending=True)[:3].tolist()
	top1 = top_idx[0]
	top1_score = float(scores[top1])

	if top1_score < THRESHOLD:
	reply = "抱歉，我暂时无法理解您的问题。建议换个说法，或查看下方相关问题。"
	else:
	top3_faqs = [faqs[i] for i in top_idx]
	reply = llm_answer(query, top3_faqs)

	info = f"类别: {faqs[top1]['category']}　\|　匹配度: {top1_score:.2f}　\|　匹配的问题: {faqs[top1]['q']}"
	related = "### 您可能也想问：\n"
	for i in top_idx[1:]:
	related += f"- {faqs[i]['q']} _(相似度 {float(scores[i]):.2f})_\n"
	return reply, info, related


	examples = [
	"embedding 是什么意思？",
	"中文应该用哪个向量模型？",
	"BERT 和 GPT 有什么不一样？",
	"pipeline 是干什么用的？",
	"AI 怎么知道两句话意思一样？",
	"怎么把模型跑到 GPU 上？",
	"为什么 LLM 会胡说八道？",
	"今天天气怎么样？",
	]

	iface = gr.Interface(
	fn=chat,
	inputs=gr.Textbox(label="您的问题", placeholder="例如：embedding 是什么？", lines=2),
	outputs=[
	gr.Markdown(label="答案"),
	gr.Markdown(label="检索详情"),
	gr.Markdown(label="相关问题"),
	],
	title="🤖 AI 学习 FAQ 机器人（RAG）",
	description="基于 BAAI/bge-small-zh-v1.5 检索 + Qwen2.5-7B-Instruct 生成 · 30 条 AI 学习 FAQ",
	examples=examples,
	flagging_mode="never",
	theme="soft",
	)

	if __name__ == "__main__":
	iface.launch()