Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import spaces | |
| from rag_system import RAGSystem | |
| from i18n import get_text | |
| # Initialize RAG system | |
| rag = RAGSystem() | |
| def process_pdf(pdf_file, embedding_model, chunk_size, chunk_overlap): | |
| """Process uploaded PDF and create embeddings""" | |
| try: | |
| # Set embedding model BEFORE processing | |
| rag.set_embedding_model(embedding_model) | |
| if pdf_file is None: | |
| # Load default corpus | |
| status, chunks_display, corpus_text = rag.load_default_corpus(chunk_size, chunk_overlap) | |
| else: | |
| status, chunks_display, corpus_text = rag.process_document(pdf_file.name, chunk_size, chunk_overlap) | |
| # Generate example questions based on the corpus | |
| example_questions = rag.generate_example_questions(num_questions=5) | |
| return status, chunks_display, corpus_text, example_questions | |
| except Exception as e: | |
| return f"Error: {str(e)}", "", "", [] | |
| def perform_query( | |
| query, | |
| top_k, | |
| similarity_threshold, | |
| llm_model, | |
| temperature, | |
| max_tokens | |
| ): | |
| """Perform RAG query and return results""" | |
| if not rag.is_ready(): | |
| return "", "⚠️ Please process a corpus first in the Corpus tab.", "", "" | |
| try: | |
| # Set LLM model | |
| rag.set_llm_model(llm_model) | |
| # Retrieve relevant chunks | |
| results = rag.retrieve(query, top_k, similarity_threshold) | |
| # Format retrieved chunks display | |
| chunks_display = format_chunks(results) | |
| # Generate answer | |
| answer, prompt = rag.generate( | |
| query, | |
| results, | |
| temperature, | |
| max_tokens | |
| ) | |
| return chunks_display, prompt, answer, "" | |
| except Exception as e: | |
| import traceback | |
| error_details = traceback.format_exc() | |
| return "", "", "", f"❌ Error: {str(e)}\n\nFull traceback:\n{error_details}" | |
| def format_chunks(results): | |
| """Format retrieved chunks with scores for display""" | |
| if not results: | |
| return "No relevant chunks found." | |
| output = "### 📄 Retrieved Chunks\n\n" | |
| for i, (chunk, score) in enumerate(results, 1): | |
| output += f"**Chunk {i}** - Similarity Score: `{score:.4f}`\n" | |
| output += f"```\n{chunk}\n```\n\n" | |
| return output | |
| def create_interface(): | |
| with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo: | |
| # Header - Bilingual | |
| gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG") | |
| gr.Markdown("*A pedagogical tool to understand Retrieval Augmented Generation / Un outil pédagogique pour comprendre la génération augmentée par récupération*") | |
| with gr.Tabs() as tabs: | |
| # Tab 1: Corpus Management | |
| with gr.Tab(label="📚 Corpus"): | |
| gr.Markdown("## Corpus Management / Gestion du Corpus") | |
| gr.Markdown(""" | |
| **EN - Default corpus:** Multiple PDF documents from the `documents/` folder. Or upload your own PDF. | |
| **FR - Corpus par défaut :** Plusieurs documents PDF du dossier `documents/`. Ou téléchargez votre propre PDF. | |
| 1. Select your embedding model / Sélectionnez votre modèle d'embedding | |
| 2. Adjust chunking parameters if needed / Ajustez les paramètres de découpage si nécessaire | |
| 3. Click "Process Corpus" / Cliquez sur "Process Corpus" | |
| """) | |
| # Embedding model selection FIRST | |
| embedding_model = gr.Dropdown( | |
| choices=[ | |
| "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", | |
| "intfloat/multilingual-e5-base", | |
| "ibm-granite/granite-embedding-107m-multilingual", | |
| ], | |
| value="sentence-transformers/all-MiniLM-L6-v2", | |
| label="🔤 Embedding Model / Modèle d'Embedding (select before processing / sélectionnez avant traitement)" | |
| ) | |
| pdf_upload = gr.File( | |
| label="📄 Upload PDF / Télécharger PDF (optional / optionnel)", | |
| file_types=[".pdf"] | |
| ) | |
| with gr.Row(): | |
| chunk_size = gr.Slider( | |
| minimum=100, | |
| maximum=1000, | |
| value=500, | |
| step=50, | |
| label="Chunk Size / Taille des Chunks (characters / caractères)" | |
| ) | |
| chunk_overlap = gr.Slider( | |
| minimum=0, | |
| maximum=200, | |
| value=50, | |
| step=10, | |
| label="Chunk Overlap / Chevauchement (characters / caractères)" | |
| ) | |
| process_btn = gr.Button("🚀 Process Corpus / Traiter le Corpus", variant="primary", size="lg") | |
| corpus_status = gr.Textbox(label="Status / Statut", interactive=False) | |
| # Display default corpus info | |
| with gr.Accordion("📖 Corpus Information / Informations sur le Corpus", open=False): | |
| default_corpus_display = gr.Markdown() | |
| # Display processed chunks | |
| with gr.Accordion("📑 Processed Chunks / Chunks Traités", open=False): | |
| processed_chunks_display = gr.Markdown() | |
| # State to hold example questions | |
| example_questions_state = gr.State([]) | |
| process_btn.click( | |
| fn=process_pdf, | |
| inputs=[pdf_upload, embedding_model, chunk_size, chunk_overlap], | |
| outputs=[corpus_status, processed_chunks_display, default_corpus_display, example_questions_state] | |
| ) | |
| # Tab 2: Retrieval Configuration | |
| with gr.Tab(label="🔍 Retrieval / Récupération"): | |
| gr.Markdown("## Retrieval Configuration / Configuration de la Récupération") | |
| gr.Markdown(""" | |
| **EN:** Configure how relevant chunks are retrieved from the corpus. | |
| **FR:** Configurez comment les chunks pertinents sont récupérés du corpus. | |
| """) | |
| gr.Markdown("**Current Embedding Model / Modèle d'Embedding Actuel:** The model selected in the Corpus tab / Le modèle sélectionné dans l'onglet Corpus") | |
| with gr.Row(): | |
| top_k = gr.Slider( | |
| minimum=1, | |
| maximum=10, | |
| value=3, | |
| step=1, | |
| label="Top K (number of chunks / nombre de chunks à récupérer)" | |
| ) | |
| similarity_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.05, | |
| label="Similarity Threshold / Seuil de Similarité (minimum score / score minimum)" | |
| ) | |
| # Tab 3: Generation Configuration | |
| with gr.Tab(label="🤖 Generation / Génération"): | |
| gr.Markdown("## Generation Configuration / Configuration de la Génération") | |
| gr.Markdown(""" | |
| **EN:** Select the language model and configure generation parameters. | |
| **FR:** Sélectionnez le modèle de langage et configurez les paramètres de génération. | |
| """) | |
| llm_model = gr.Dropdown( | |
| choices=[ | |
| "meta-llama/Llama-3.2-1B-Instruct", | |
| "Qwen/Qwen3-1.7B", | |
| "google/gemma-2-2b-it", | |
| ], | |
| value="meta-llama/Llama-3.2-1B-Instruct", | |
| label="Language Model / Modèle de Langage" | |
| ) | |
| with gr.Row(): | |
| temperature = gr.Slider( | |
| minimum=0.0, | |
| maximum=2.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature / Température (creativity / créativité)" | |
| ) | |
| max_tokens = gr.Slider( | |
| minimum=100, | |
| maximum=2048, | |
| value=800, | |
| step=50, | |
| label="Max Tokens (response length / longueur réponse - higher for reasoning / plus pour raisonnement)" | |
| ) | |
| # Tab 4: Query & Results | |
| with gr.Tab(label="💬 Query / Requête"): | |
| gr.Markdown("## Ask a Question / Posez une Question") | |
| query_input = gr.Textbox( | |
| label="Your Question / Votre Question", | |
| placeholder="Enter your question here / Entrez votre question ici...", | |
| lines=3 | |
| ) | |
| with gr.Accordion("💡 Example Questions / Questions d'Exemple (click to expand / cliquez pour développer)", open=True): | |
| gr.Markdown("*Questions generated based on your corpus content / Questions générées à partir de votre corpus*") | |
| examples_markdown = gr.Markdown(visible=False) | |
| # Connect processing to update examples | |
| def format_questions_markdown(questions): | |
| if not questions or len(questions) == 0: | |
| return gr.update(value="", visible=False) | |
| md = "" | |
| for i, q in enumerate(questions, 1): | |
| md += f"{i}. {q}\n\n" | |
| return gr.update(value=md, visible=True) | |
| example_questions_state.change( | |
| fn=format_questions_markdown, | |
| inputs=[example_questions_state], | |
| outputs=[examples_markdown] | |
| ) | |
| query_btn = gr.Button("🔍 Submit Query", variant="primary", size="lg") | |
| # Results in order: chunks → prompt → answer | |
| gr.Markdown("---") | |
| gr.Markdown("### 📊 Results") | |
| with gr.Accordion("1️⃣ Retrieved Chunks", open=True): | |
| chunks_output = gr.Markdown() | |
| with gr.Accordion("2️⃣ Prompt Sent to LLM", open=True): | |
| prompt_output = gr.Textbox(lines=10, max_lines=20, show_copy_button=True) | |
| with gr.Accordion("3️⃣ Generated Answer", open=True): | |
| answer_output = gr.Markdown() | |
| error_output = gr.Textbox(label="Errors", visible=False) | |
| query_btn.click( | |
| fn=perform_query, | |
| inputs=[ | |
| query_input, | |
| top_k, | |
| similarity_threshold, | |
| llm_model, | |
| temperature, | |
| max_tokens | |
| ], | |
| outputs=[chunks_output, prompt_output, answer_output, error_output] | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| **Note**: This is a pedagogical demonstration of RAG systems. | |
| Models run on HuggingFace infrastructure. | |
| **Note** : Ceci est une démonstration pédagogique des systèmes RAG. | |
| Les modèles tournent sur l'infrastructure HuggingFace. | |
| """) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.launch() | |