|
|
|
|
|
|
|
|
import gradio as gr |
|
|
from gradio_pdf import PDF |
|
|
|
|
|
from load_documents import load_all_documents |
|
|
from split_documents import split_documents |
|
|
from vectorstore import build_vectorstore |
|
|
from retriever import get_retriever |
|
|
from llm import load_llm |
|
|
from rag_pipeline import answer |
|
|
from speech_io import transcribe_audio, synthesize_speech |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("📚 Lade Dokumente…") |
|
|
docs = load_all_documents() |
|
|
|
|
|
print("🔪 Splitte Dokumente…") |
|
|
chunks = split_documents(docs) |
|
|
|
|
|
print("🔍 Erstelle VectorStore…") |
|
|
vs = build_vectorstore(chunks) |
|
|
|
|
|
print("🔎 Erzeuge Retriever…") |
|
|
retriever = get_retriever(vs) |
|
|
|
|
|
print("🤖 Lade LLM…") |
|
|
llm = load_llm() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def format_sources(src): |
|
|
if not src: |
|
|
return "" |
|
|
|
|
|
out = ["", "## 📚 Quellen"] |
|
|
|
|
|
for s in src: |
|
|
line = f"- [{s['source']}]({s['url']})" |
|
|
if s.get("page") is not None: |
|
|
line += f" (Seite {s['page']})" |
|
|
out.append(line) |
|
|
|
|
|
return "\n".join(out) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat_fn(message, history): |
|
|
""" |
|
|
message: dict {"text": str, "files": [...]} von gr.MultimodalTextbox |
|
|
history: Liste von OpenAI-ähnlichen Messages (role, content) |
|
|
""" |
|
|
|
|
|
if isinstance(message, dict): |
|
|
text = (message.get("text") or "").strip() |
|
|
files = message.get("files") or [] |
|
|
else: |
|
|
text = str(message or "").strip() |
|
|
files = [] |
|
|
|
|
|
|
|
|
audio_path = None |
|
|
for f in files: |
|
|
|
|
|
if isinstance(f, dict): |
|
|
path = f.get("path") |
|
|
else: |
|
|
path = f |
|
|
if isinstance(path, str) and path: |
|
|
audio_path = path |
|
|
break |
|
|
|
|
|
|
|
|
if audio_path: |
|
|
spoken = transcribe_audio(audio_path) |
|
|
if text: |
|
|
text = (text + " " + spoken).strip() |
|
|
else: |
|
|
text = spoken |
|
|
|
|
|
if not text: |
|
|
|
|
|
return history, None, {"text": "", "files": []} |
|
|
|
|
|
|
|
|
ans, sources = answer(text, retriever, llm) |
|
|
bot_msg = ans + format_sources(sources) |
|
|
|
|
|
|
|
|
history = history + [ |
|
|
{"role": "user", "content": text}, |
|
|
{"role": "assistant", "content": bot_msg}, |
|
|
] |
|
|
|
|
|
|
|
|
tts_audio = synthesize_speech(bot_msg) |
|
|
|
|
|
|
|
|
cleared_input = {"text": "", "files": []} |
|
|
|
|
|
return history, tts_audio, cleared_input |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_last_answer(history): |
|
|
if not history: |
|
|
return None |
|
|
|
|
|
for msg in reversed(history): |
|
|
if msg.get("role") == "assistant": |
|
|
return synthesize_speech(msg.get("content", "")) |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo: |
|
|
gr.Markdown("# 🧑⚖️ Prüfungsrechts-Chatbot") |
|
|
gr.Markdown( |
|
|
"Dieser Chatbot beantwortet Fragen **ausschließlich** aus der " |
|
|
"Prüfungsordnung (PDF) und dem Hochschulgesetz NRW. " |
|
|
"Du kannst Text eingeben oder direkt ins Mikrofon sprechen." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
chatbot = gr.Chatbot( |
|
|
label="Chat", |
|
|
height=500, |
|
|
) |
|
|
|
|
|
|
|
|
voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy") |
|
|
|
|
|
|
|
|
chat_input = gr.MultimodalTextbox( |
|
|
label=None, |
|
|
placeholder="Stelle deine Frage zum Prüfungsrecht … oder sprich ins Mikrofon", |
|
|
show_label=False, |
|
|
sources=["microphone"], |
|
|
file_types=["audio"], |
|
|
max_lines=6, |
|
|
) |
|
|
|
|
|
|
|
|
chat_input.submit( |
|
|
chat_fn, |
|
|
[chat_input, chatbot], |
|
|
[chatbot, voice_out, chat_input], |
|
|
) |
|
|
|
|
|
send_btn = gr.Button("Senden") |
|
|
send_btn.click( |
|
|
chat_fn, |
|
|
[chat_input, chatbot], |
|
|
[chatbot, voice_out, chat_input], |
|
|
) |
|
|
|
|
|
|
|
|
read_btn = gr.Button("🔁 Antwort erneut vorlesen") |
|
|
read_btn.click( |
|
|
read_last_answer, |
|
|
[chatbot], |
|
|
[voice_out], |
|
|
) |
|
|
|
|
|
|
|
|
clear_btn = gr.Button("Chat zurücksetzen") |
|
|
clear_btn.click( |
|
|
lambda: ([], None, {"text": "", "files": []}), |
|
|
None, |
|
|
[chatbot, voice_out, chat_input], |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
pdf_meta = next(d.metadata for d in docs if d.metadata["type"] == "pdf") |
|
|
gr.Markdown("### 📄 Prüfungsordnung (PDF)") |
|
|
PDF(pdf_meta["pdf_url"], height=350) |
|
|
|
|
|
|
|
|
hg_meta = next(d.metadata for d in docs if d.metadata["type"] == "hg") |
|
|
hg_url = hg_meta["viewer_url"].split("#")[0] |
|
|
|
|
|
gr.Markdown("### 📘 Hochschulgesetz NRW (Viewer)") |
|
|
gr.HTML( |
|
|
f'<iframe src="{hg_url}" ' |
|
|
'style="width:100%;height:350px;border:none;"></iframe>' |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue().launch(ssr_mode=False, show_error=True) |
|
|
|