| | |
| | |
| |
|
| | import gradio as gr |
| | from gradio_pdf import PDF |
| | from huggingface_hub import hf_hub_download |
| |
|
| | from load_documents import load_documents, DATASET, PDF_FILE, HTML_FILE |
| | from split_documents import split_documents |
| | from vectorstore import build_vectorstore |
| | from retriever import get_retriever |
| | from llm import load_llm |
| | from rag_pipeline import answer, PDF_BASE_URL, LAW_URL |
| |
|
| | from speech_io import transcribe_audio, synthesize_speech |
| |
|
| | |
| | |
| | |
| |
|
| | print("🔹 Lade Dokumente ...") |
| | _docs = load_documents() |
| |
|
| | print("🔹 Splitte Dokumente ...") |
| | _chunks = split_documents(_docs) |
| |
|
| | print("🔹 Baue VectorStore (FAISS) ...") |
| | _vs = build_vectorstore(_chunks) |
| |
|
| | print("🔹 Erzeuge Retriever ...") |
| | _retriever = get_retriever(_vs) |
| |
|
| | print("🔹 Lade LLM ...") |
| | _llm = load_llm() |
| |
|
| | print("🔹 Lade Dateien für Viewer …") |
| | _pdf_path = hf_hub_download(DATASET, PDF_FILE, repo_type="dataset") |
| | _html_path = hf_hub_download(DATASET, HTML_FILE, repo_type="dataset") |
| |
|
| | |
| | |
| | |
| |
|
| | def format_sources_markdown(sources): |
| | if not sources: |
| | return "" |
| |
|
| | lines = ["", "**📚 Quellen (genutzte Dokumentstellen):**"] |
| | for s in sources: |
| | sid = s["id"] |
| | src = s["source"] |
| | page = s["page"] |
| | url = s["url"] |
| | snippet = s["snippet"] |
| |
|
| | title = f"Quelle {sid} – {src}" |
| |
|
| | if url: |
| | base = f"- [{title}]({url})" |
| | else: |
| | base = f"- {title}" |
| |
|
| | if page and "Prüfungsordnung" in src: |
| | base += f", Seite {page}" |
| |
|
| | lines.append(base) |
| |
|
| | if snippet: |
| | lines.append(f" > {snippet}") |
| |
|
| | return "\n".join(lines) |
| |
|
| | |
| | |
| | |
| |
|
| | def chatbot_text(user_message, history): |
| | if not user_message: |
| | return history, "" |
| |
|
| | answer_text, sources = answer( |
| | question=user_message, |
| | retriever=_retriever, |
| | chat_model=_llm, |
| | ) |
| |
|
| | quellen_block = format_sources_markdown(sources) |
| |
|
| | history = history + [ |
| | {"role": "user", "content": user_message}, |
| | {"role": "assistant", "content": answer_text + quellen_block}, |
| | ] |
| |
|
| | return history, "" |
| |
|
| | |
| | |
| | |
| |
|
| | def chatbot_voice(audio_path, history): |
| | |
| | text = transcribe_audio(audio_path) |
| | if not text: |
| | return history, None, "" |
| |
|
| | |
| | history = history + [{"role": "user", "content": text}] |
| |
|
| | |
| | answer_text, sources = answer( |
| | question=text, |
| | retriever=_retriever, |
| | chat_model=_llm, |
| | ) |
| | quellen_block = format_sources_markdown(sources) |
| |
|
| | bot_msg = answer_text + quellen_block |
| | history = history + [{"role": "assistant", "content": bot_msg}] |
| |
|
| | |
| | audio = synthesize_speech(bot_msg) |
| |
|
| | return history, audio, "" |
| |
|
| | |
| | |
| | |
| |
|
| | def read_last_answer(history): |
| | if not history: |
| | return None |
| |
|
| | for msg in reversed(history): |
| | if msg["role"] == "assistant": |
| | return synthesize_speech(msg["content"]) |
| |
|
| | return None |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks(title="Prüfungsrechts-Chatbot (RAG + Sprache)") as demo: |
| | gr.Markdown("# 🧑⚖️ Prüfungsrechts-Chatbot") |
| | gr.Markdown( |
| | "Dieser Chatbot beantwortet Fragen **ausschließlich** aus der " |
| | "Prüfungsordnung (PDF) und dem Hochschulgesetz NRW (Website). " |
| | "Du kannst Text eingeben oder direkt ins Mikrofon sprechen." |
| | ) |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=2): |
| | chatbot = gr.Chatbot(label="Chat", height=500) |
| |
|
| | msg = gr.Textbox( |
| | label="Frage eingeben", |
| | placeholder="Stelle deine Frage zum Prüfungsrecht …", |
| | ) |
| |
|
| | |
| | msg.submit( |
| | chatbot_text, |
| | [msg, chatbot], |
| | [chatbot, msg] |
| | ) |
| |
|
| | send_btn = gr.Button("Senden (Text)") |
| | send_btn.click( |
| | chatbot_text, |
| | [msg, chatbot], |
| | [chatbot, msg] |
| | ) |
| |
|
| | |
| | gr.Markdown("### 🎙️ Spracheingabe") |
| | voice_in = gr.Audio(sources=["microphone"], type="filepath") |
| | voice_out = gr.Audio(label="Vorgelesene Antwort", type="numpy") |
| |
|
| | voice_btn = gr.Button("Sprechen & senden") |
| | voice_btn.click( |
| | chatbot_voice, |
| | [voice_in, chatbot], |
| | [chatbot, voice_out, msg] |
| | ) |
| |
|
| | read_btn = gr.Button("🔁 Antwort erneut vorlesen") |
| | read_btn.click( |
| | read_last_answer, |
| | [chatbot], |
| | [voice_out] |
| | ) |
| |
|
| | clear_btn = gr.Button("Chat zurücksetzen") |
| | clear_btn.click(lambda: [], None, chatbot) |
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Column(scale=1): |
| | gr.Markdown("### 📄 Prüfungsordnung (PDF)") |
| | PDF(_pdf_path, height=350) |
| |
|
| | gr.Markdown("### 📘 Hochschulgesetz NRW (Website)") |
| | gr.HTML( |
| | f'<iframe src="{LAW_URL}" style="width:100%;height:350px;border:none;"></iframe>' |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|
| |
|