import gradio as gr import spaces from rag_system import RAGSystem from i18n import get_text # Initialize RAG system rag = RAGSystem() @spaces.GPU def process_pdf(pdf_file, embedding_model, chunk_size, chunk_overlap): """Process uploaded PDF and create embeddings""" try: # Set embedding model BEFORE processing rag.set_embedding_model(embedding_model) if pdf_file is None: # Load default corpus status, chunks_display, corpus_text = rag.load_default_corpus(chunk_size, chunk_overlap) else: status, chunks_display, corpus_text = rag.process_document(pdf_file.name, chunk_size, chunk_overlap) # Generate example questions based on the corpus example_questions = rag.generate_example_questions(num_questions=5) return status, chunks_display, corpus_text, example_questions except Exception as e: return f"Error: {str(e)}", "", "", [] @spaces.GPU def perform_query( query, top_k, similarity_threshold, llm_model, temperature, max_tokens ): """Perform RAG query and return results""" if not rag.is_ready(): return "", "⚠️ Please process a corpus first in the Corpus tab.", "", "" try: # Set LLM model rag.set_llm_model(llm_model) # Retrieve relevant chunks results = rag.retrieve(query, top_k, similarity_threshold) # Format retrieved chunks display chunks_display = format_chunks(results) # Generate answer answer, prompt = rag.generate( query, results, temperature, max_tokens ) return chunks_display, prompt, answer, "" except Exception as e: import traceback error_details = traceback.format_exc() return "", "", "", f"❌ Error: {str(e)}\n\nFull traceback:\n{error_details}" def format_chunks(results): """Format retrieved chunks with scores for display""" if not results: return "No relevant chunks found." output = "### 📄 Retrieved Chunks\n\n" for i, (chunk, score) in enumerate(results, 1): output += f"**Chunk {i}** - Similarity Score: `{score:.4f}`\n" output += f"```\n{chunk}\n```\n\n" return output def create_interface(): with gr.Blocks(title="RAG Pedagogical Demo", theme=gr.themes.Soft()) as demo: # Header - Bilingual gr.Markdown("# 🎓 RAG Pedagogical Demo / Démo Pédagogique RAG") gr.Markdown("*A pedagogical tool to understand Retrieval Augmented Generation / Un outil pédagogique pour comprendre la génération augmentée par récupération*") with gr.Tabs() as tabs: # Tab 1: Corpus Management with gr.Tab(label="📚 Corpus"): gr.Markdown("## Corpus Management / Gestion du Corpus") gr.Markdown(""" **EN - Default corpus:** Multiple PDF documents from the `documents/` folder. Or upload your own PDF. **FR - Corpus par défaut :** Plusieurs documents PDF du dossier `documents/`. Ou téléchargez votre propre PDF. 1. Select your embedding model / Sélectionnez votre modèle d'embedding 2. Adjust chunking parameters if needed / Ajustez les paramètres de découpage si nécessaire 3. Click "Process Corpus" / Cliquez sur "Process Corpus" """) # Embedding model selection FIRST embedding_model = gr.Dropdown( choices=[ "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "intfloat/multilingual-e5-base", "ibm-granite/granite-embedding-107m-multilingual", ], value="sentence-transformers/all-MiniLM-L6-v2", label="🔤 Embedding Model / Modèle d'Embedding (select before processing / sélectionnez avant traitement)" ) pdf_upload = gr.File( label="📄 Upload PDF / Télécharger PDF (optional / optionnel)", file_types=[".pdf"] ) with gr.Row(): chunk_size = gr.Slider( minimum=100, maximum=1000, value=500, step=50, label="Chunk Size / Taille des Chunks (characters / caractères)" ) chunk_overlap = gr.Slider( minimum=0, maximum=200, value=50, step=10, label="Chunk Overlap / Chevauchement (characters / caractères)" ) process_btn = gr.Button("🚀 Process Corpus / Traiter le Corpus", variant="primary", size="lg") corpus_status = gr.Textbox(label="Status / Statut", interactive=False) # Display default corpus info with gr.Accordion("📖 Corpus Information / Informations sur le Corpus", open=False): default_corpus_display = gr.Markdown() # Display processed chunks with gr.Accordion("📑 Processed Chunks / Chunks Traités", open=False): processed_chunks_display = gr.Markdown() # State to hold example questions example_questions_state = gr.State([]) process_btn.click( fn=process_pdf, inputs=[pdf_upload, embedding_model, chunk_size, chunk_overlap], outputs=[corpus_status, processed_chunks_display, default_corpus_display, example_questions_state] ) # Tab 2: Retrieval Configuration with gr.Tab(label="🔍 Retrieval / Récupération"): gr.Markdown("## Retrieval Configuration / Configuration de la Récupération") gr.Markdown(""" **EN:** Configure how relevant chunks are retrieved from the corpus. **FR:** Configurez comment les chunks pertinents sont récupérés du corpus. """) gr.Markdown("**Current Embedding Model / Modèle d'Embedding Actuel:** The model selected in the Corpus tab / Le modèle sélectionné dans l'onglet Corpus") with gr.Row(): top_k = gr.Slider( minimum=1, maximum=10, value=3, step=1, label="Top K (number of chunks / nombre de chunks à récupérer)" ) similarity_threshold = gr.Slider( minimum=0.0, maximum=1.0, value=0.5, step=0.05, label="Similarity Threshold / Seuil de Similarité (minimum score / score minimum)" ) # Tab 3: Generation Configuration with gr.Tab(label="🤖 Generation / Génération"): gr.Markdown("## Generation Configuration / Configuration de la Génération") gr.Markdown(""" **EN:** Select the language model and configure generation parameters. **FR:** Sélectionnez le modèle de langage et configurez les paramètres de génération. """) llm_model = gr.Dropdown( choices=[ "meta-llama/Llama-3.2-1B-Instruct", "Qwen/Qwen3-1.7B", "google/gemma-2-2b-it", ], value="meta-llama/Llama-3.2-1B-Instruct", label="Language Model / Modèle de Langage" ) with gr.Row(): temperature = gr.Slider( minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature / Température (creativity / créativité)" ) max_tokens = gr.Slider( minimum=100, maximum=2048, value=800, step=50, label="Max Tokens (response length / longueur réponse - higher for reasoning / plus pour raisonnement)" ) # Tab 4: Query & Results with gr.Tab(label="💬 Query / Requête"): gr.Markdown("## Ask a Question / Posez une Question") query_input = gr.Textbox( label="Your Question / Votre Question", placeholder="Enter your question here / Entrez votre question ici...", lines=3 ) with gr.Accordion("💡 Example Questions / Questions d'Exemple (click to expand / cliquez pour développer)", open=True): gr.Markdown("*Questions generated based on your corpus content / Questions générées à partir de votre corpus*") examples_markdown = gr.Markdown(visible=False) # Connect processing to update examples def format_questions_markdown(questions): if not questions or len(questions) == 0: return gr.update(value="", visible=False) md = "" for i, q in enumerate(questions, 1): md += f"{i}. {q}\n\n" return gr.update(value=md, visible=True) example_questions_state.change( fn=format_questions_markdown, inputs=[example_questions_state], outputs=[examples_markdown] ) query_btn = gr.Button("🔍 Submit Query", variant="primary", size="lg") # Results in order: chunks → prompt → answer gr.Markdown("---") gr.Markdown("### 📊 Results") with gr.Accordion("1️⃣ Retrieved Chunks", open=True): chunks_output = gr.Markdown() with gr.Accordion("2️⃣ Prompt Sent to LLM", open=True): prompt_output = gr.Textbox(lines=10, max_lines=20, show_copy_button=True) with gr.Accordion("3️⃣ Generated Answer", open=True): answer_output = gr.Markdown() error_output = gr.Textbox(label="Errors", visible=False) query_btn.click( fn=perform_query, inputs=[ query_input, top_k, similarity_threshold, llm_model, temperature, max_tokens ], outputs=[chunks_output, prompt_output, answer_output, error_output] ) # Footer gr.Markdown(""" --- **Note**: This is a pedagogical demonstration of RAG systems. Models run on HuggingFace infrastructure. **Note** : Ceci est une démonstration pédagogique des systèmes RAG. Les modèles tournent sur l'infrastructure HuggingFace. """) return demo if __name__ == "__main__": demo = create_interface() demo.launch()