Spaces:

Nguyen5
/

chatbot1

Sleeping

App Files Files Community

chatbot1 / app.py

Nguyen5

commit

533ef4b 3 months ago

raw

history blame contribute delete

6.04 kB

	# app.py – Prüfungsrechts-Chatbot (RAG + Sprachmodus)
	# Version 26.11 – ohne Modi, stabil für Text + Voice

	import gradio as gr
	from gradio_pdf import PDF
	from huggingface_hub import hf_hub_download

	from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE
	from split_documents import split_documents
	from vectorstore import build_vectorstore
	from retriever import get_retriever
	from llm import load_llm
	from rag_pipeline import answer, PDF_BASE_URL, LAW_URL

	from speech_io import transcribe_audio, synthesize_speech

	# =====================================================
	# INITIALISIERUNG (global)
	# =====================================================

	print("🔹 Lade Dokumente ...")
	_docs = load_documents()

	print("🔹 Splitte Dokumente ...")
	_chunks = split_documents(_docs)

	print("🔹 Baue VectorStore (FAISS) ...")
	_vs = build_vectorstore(_chunks)

	print("🔹 Erzeuge Retriever ...")
	_retriever = get_retriever(_vs)

	print("🔹 Lade LLM ...")
	_llm = load_llm()

	print("🔹 Lade Dateien für Viewer …")
	_pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset")
	_html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset")

	# =====================================================
	# Quellen formatieren – Markdown für Chat
	# =====================================================

	def format_sources_markdown(sources):
	if not sources:
	return ""

	lines = ["", "📚 Quellen (genutzte Dokumentstellen):"]
	for s in sources:
	sid = s["id"]
	src = s["source"]
	page = s["page"]
	url = s["url"]
	snippet = s["snippet"]

	title = f"Quelle {sid} – {src}"

	if url:
	base = f"- [{title}]({url})"
	else:
	base = f"- {title}"

	if page and "Prüfungsordnung" in src:
	base += f", Seite {page}"

	lines.append(base)

	if snippet:
	lines.append(f" > {snippet}")

	return "\n".join(lines)

	# =====================================================
	# TEXT CHATBOT
	# =====================================================

	def chatbot_text(user_message, history):
	if not user_message:
	return history, ""

	answer_text, sources = answer(
	question=user_message,
	retriever=_retriever,
	chat_model=_llm,
	)

	quellen_block = format_sources_markdown(sources)

	history = history + [
	{"role": "user", "content": user_message},
	{"role": "assistant", "content": answer_text + quellen_block},
	]

	return history, ""

	# =====================================================
	# VOICE CHATBOT
	# =====================================================

	def chatbot_voice(audio_path, history):
	# 1. Speech → Text
	text = transcribe_audio(audio_path)
	if not text:
	return history, None, ""

	# Lưu vào lịch sử chat
	history = history + [{"role": "user", "content": text}]

	# 2. RAG trả lời
	answer_text, sources = answer(
	question=text,
	retriever=_retriever,
	chat_model=_llm,
	)
	quellen_block = format_sources_markdown(sources)

	bot_msg = answer_text + quellen_block
	history = history + [{"role": "assistant", "content": bot_msg}]

	# 3. Text → Speech
	audio = synthesize_speech(bot_msg)

	return history, audio, ""

	# =====================================================
	# LAST ANSWER → TTS
	# =====================================================

	def read_last_answer(history):
	if not history:
	return None

	for msg in reversed(history):
	if msg["role"] == "assistant":
	return synthesize_speech(msg["content"])

	return None

	# =====================================================
	# UI – GRADIO
	# =====================================================

	with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo:
	gr.Markdown("# 🧑‍⚖️ Prüfungsrechts-Chatbot")
	gr.Markdown(
	"Dieser Chatbot beantwortet Fragen ausschließlich aus der "
	"Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). "
	"Du kannst Text eingeben oder direkt ins Mikrofon sprechen."
	)

	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(label="Chat", height=500)

	msg = gr.Textbox(
	label="Frage eingeben",
	placeholder="Stelle deine Frage zum Prüfungsrecht …",
	)

	# TEXT SENDEN
	msg.submit(
	chatbot_text,
	[msg, chatbot],
	[chatbot, msg]
	)

	send_btn = gr.Button("Senden (Text)")
	send_btn.click(
	chatbot_text,
	[msg, chatbot],
	[chatbot, msg]
	)

	# SPRACHEINGABE
	gr.Markdown("### 🎙️ Spracheingabe")
	voice_in = gr.Audio(sources=["microphone"], type="filepath")
	voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy")

	voice_btn = gr.Button("Sprechen & senden")
	voice_btn.click(
	chatbot_voice,
	[voice_in, chatbot],
	[chatbot, voice_out, msg]
	)

	read_btn = gr.Button("🔁 Antwort erneut vorlesen")
	read_btn.click(
	read_last_answer,
	[chatbot],
	[voice_out]
	)

	clear_btn = gr.Button("Chat zurücksetzen")
	clear_btn.click(lambda: [], None, chatbot)

	# =====================
	# RECHTE SPALTE: Viewer
	# =====================

	with gr.Column(scale=1):
	gr.Markdown("### 📄 Prüfungsordnung (PDF)")
	PDF(_pdf_path, height=350)

	gr.Markdown("### 📘 Hochschulgesetz NRW (Website)")
	gr.HTML(
	f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>'
	)

	if __name__ == "__main__":
	demo.launch()