import gradio as gr import google.generativeai as genai import os import json import time import base64 import fitz # Importation correcte pour Hugging Face from PIL import Image import io import tempfile import shutil # Configuration GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI" genai.configure(api_key=GOOGLE_API_KEY) model = genai.GenerativeModel('gemini-1.5-flash') # Interface text (English only) TEXT = { "title": "Elixir - Document Intelligence", "description": "This demo showcases the capabilities of a generative AI model to interpret, understand, and classify any type of document WITHOUT CUSTOMIZATION. For developing a complete, precise, and defined pipeline, please contact martial@lexiapro.fr.", "instructions": [ "1. Upload a PDF document (1-10 pages) such as an invoice, regulatory document, report...", "2. Processing by Elixir", "3. Transcription of identified sections and elements (without customization)" ], "upload": "📂 Upload your document", "analyze": "🔍 Analyze document", "preview": "📄 Preview", "tabs": { "overview": "📋 Overview", "entities": "👥 Entities", "values": "💰 Values", "dates": "📅 Dates", "tables": "📊 Tables", "keypoints": "🔑 Key Points", "references": "🔗 References", "json": "📄 Complete JSON" }, "no_data": "No information found", "processing": "Processing...", "error": { "file_not_found": "File not found", "pdf_conversion": "Unable to convert PDF to image", "no_info": "No information extracted from PDF pages", "too_many_pages": "The PDF has more than 10 pages. Please upload a document with 10 pages or less." } } # Modern CSS - Style amélioré CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); :root { --primary: #4f46e5; --primary-light: #818cf8; --primary-dark: #3730a3; --secondary: #10b981; --accent: #f59e0b; --dark: #111827; --light: #f9fafb; --gray-50: #f8fafc; --gray-100: #f1f5f9; --gray-200: #e2e8f0; --gray-300: #cbd5e1; --gray-400: #94a3b8; --gray-500: #64748b; --text-primary: #1e293b; --text-secondary: #475569; --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05); --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); --shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05); --radius-sm: 0.25rem; --radius: 0.5rem; --radius-md: 0.75rem; --radius-lg: 1rem; } body, .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; color: var(--text-primary); background-color: var(--light); line-height: 1.6; } /* Layout principal */ .container { max-width: 1300px; margin: 0 auto; padding: 0 1rem; } .main-content { display: flex; gap: 2rem; align-items: flex-start; } .left-panel { flex: 1; } .right-panel { flex: 2; } /* En-tête */ .header { margin-bottom: 2rem; padding: 0.75rem 1.25rem; background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); border-radius: var(--radius-lg); box-shadow: var(--shadow-md); position: relative; overflow: hidden; color: white; height: 60px; display: flex; align-items: center; justify-content: center; } .header::before { content: ''; position: absolute; top: -50%; left: -50%; width: 200%; height: 200%; background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 60%); animation: pulse 15s ease-in-out infinite; z-index: 1; } @keyframes pulse { 0% { transform: scale(1); opacity: 0.5; } 50% { transform: scale(1.05); opacity: 0.8; } 100% { transform: scale(1); opacity: 0.5; } } .header img { max-height: 40px !important; object-fit: contain; position: relative; z-index: 2; } /* Intro card */ .intro-card { background: white; border-radius: var(--radius); box-shadow: var(--shadow); border: 1px solid var(--gray-200); overflow: hidden; margin-bottom: 1.5rem; transition: transform 0.3s ease, box-shadow 0.3s ease; } .intro-card:hover { transform: translateY(-3px); box-shadow: var(--shadow-md); } .intro-header { padding: 1.25rem; border-bottom: 1px solid var(--gray-200); background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); color: white; font-weight: 600; display: flex; align-items: center; gap: 0.5rem; } .intro-header h3 { margin: 0; font-size: 1.25rem; font-weight: 600; text-shadow: 0 1px 2px rgba(0,0,0,0.1); } .intro-body { padding: 1.5rem; } .intro-description { color: var(--text-primary); line-height: 1.7; font-size: 1.05rem; margin-bottom: 1.5rem; } .contact-links { display: flex; flex-wrap: wrap; gap: 1rem; margin-top: 1.5rem; background: linear-gradient(to right, rgba(79, 70, 229, 0.05), rgba(79, 70, 229, 0.1)); padding: 1.25rem; border-radius: var(--radius); border: 1px solid var(--gray-200); } .contact-link { display: flex; align-items: center; gap: 0.5rem; padding: 0.75rem 1rem; background: white; border-radius: var(--radius); color: var(--primary); text-decoration: none; font-weight: 500; transition: all 0.2s ease; box-shadow: var(--shadow-sm); border: 1px solid var(--gray-200); } .contact-link:hover { transform: translateY(-2px); box-shadow: var(--shadow); color: var(--primary-dark); border-color: var(--primary-light); } /* Accordéon pour workflow */ .accordion { border-radius: var(--radius); overflow: hidden; margin-bottom: 1.5rem; } .accordion-header { background: var(--gray-50); padding: 1.25rem; cursor: pointer; display: flex; align-items: center; justify-content: space-between; font-weight: 600; color: var(--primary); border: 1px solid var(--gray-200); border-radius: var(--radius); transition: all 0.3s ease; } .accordion-header:hover { background: var(--gray-100); } .accordion-header::after { content: "↓"; transition: transform 0.3s ease; } .accordion.active .accordion-header::after { transform: rotate(180deg); } .accordion-content { max-height: 0; overflow: hidden; transition: max-height 0.3s ease; background: white; border: 1px solid var(--gray-200); border-top: 0; border-radius: 0 0 var(--radius) var(--radius); padding: 0 1.25rem; } .accordion.active .accordion-content { max-height: 1000px; padding: 1.25rem; } .workflow-container { text-align: center; } .workflow-container img { max-width: 100%; border-radius: var(--radius); box-shadow: var(--shadow); margin-top: 1rem; } /* Instructions */ .instructions { background: white; padding: 1.5rem; border-radius: var(--radius); border: 1px solid var(--gray-200); box-shadow: var(--shadow); margin-bottom: 2rem; } .instructions h3 { color: var(--primary); margin-top: 0; margin-bottom: 1rem; font-weight: 600; font-size: 1.25rem; display: flex; align-items: center; gap: 0.5rem; } .instructions h3::before { content: '📋'; } .instructions ol { margin: 0; padding-left: 1.5rem; } .instructions li { margin-bottom: 0.75rem; position: relative; padding-left: 0.5rem; } .instructions li:last-child { margin-bottom: 0; } /* Upload section */ .upload-section { background: white; border-radius: var(--radius); box-shadow: var(--shadow); border: 1px solid var(--gray-200); padding: 1.5rem; } /* File input styling */ .file-container { border: 2px dashed var(--primary-light) !important; border-radius: var(--radius) !important; padding: 2rem !important; text-align: center !important; transition: all 0.3s ease !important; background-color: rgba(79, 70, 229, 0.05) !important; cursor: pointer !important; position: relative; } .file-container:hover { background-color: rgba(79, 70, 229, 0.1) !important; } .file-container::before { content: "📄"; font-size: 2rem; display: block; margin-bottom: 0.5rem; } button.primary { background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important; color: white !important; border: none !important; padding: 0.75rem 1.5rem !important; font-weight: 600 !important; border-radius: var(--radius) !important; transition: all 0.3s ease !important; box-shadow: 0 4px 6px rgba(79, 70, 229, 0.25) !important; width: 100% !important; margin-top: 1rem !important; } button.primary:hover { transform: translateY(-2px) !important; box-shadow: 0 7px 14px rgba(79, 70, 229, 0.3) !important; } /* Results tabs */ .tabs .tab-nav { background-color: var(--gray-50) !important; padding: 0.5rem !important; border-radius: var(--radius) var(--radius) 0 0 !important; border: 1px solid var(--gray-200) !important; border-bottom: none !important; } .tabs .tab-nav button { margin: 0 !important; padding: 0.75rem 1rem !important; font-weight: 500 !important; color: var(--text-secondary) !important; position: relative !important; transition: all 0.3s ease !important; } .tabs .tab-nav button.selected { color: var(--primary) !important; font-weight: 600 !important; } .tabs .tab-nav button.selected::after { content: ''; position: absolute; bottom: -0.5rem; left: 0; width: 100%; height: 3px; background: var(--primary); border-radius: 3px 3px 0 0; } .tabs .tabitem { background: white !important; padding: 1.5rem !important; border-radius: 0 0 var(--radius) var(--radius) !important; border: 1px solid var(--gray-200) !important; box-shadow: var(--shadow) !important; } /* Card components */ .info-card { background: white; padding: 0; border-radius: var(--radius); margin-bottom: 1.5rem; border: 1px solid var(--gray-200); box-shadow: var(--shadow); overflow: hidden; transition: transform 0.2s ease, box-shadow 0.2s ease; } .info-card:hover { transform: translateY(-2px); box-shadow: var(--shadow-md); } .info-card h3 { margin: 0; color: white; font-size: 1.1rem; font-weight: 600; padding: 1rem 1.5rem; background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); position: relative; } .info-card .content { padding: 1.25rem; } /* Formatage des listes dans les cartes */ .list-container { display: flex; flex-direction: column; gap: 1rem; } .list-item { padding: 1rem; background: var(--gray-50); border-radius: var(--radius); border: 1px solid var(--gray-200); transition: all 0.2s ease; } .list-item:hover { background: white; border-color: var(--primary-light); box-shadow: var(--shadow-sm); } .list-item-header { font-weight: 600; color: var(--primary); margin-bottom: 0.5rem; display: flex; align-items: center; gap: 0.5rem; } .list-item-header::before { content: '•'; color: var(--primary); font-size: 1.5rem; line-height: 1; } .list-item-content { color: var(--text-secondary); font-size: 0.95rem; } /* Améliorations tables */ .tables-container { display: flex; flex-direction: column; gap: 2rem; } .table-wrapper { overflow: hidden; border-radius: var(--radius); box-shadow: var(--shadow); background: white; } .table-wrapper h4 { padding: 1rem; margin: 0; background: linear-gradient(to right, var(--primary-light), var(--primary)); color: white; font-weight: 600; } .table-description { margin: 0; padding: 0.75rem 1rem; background: var(--gray-50); color: var(--text-secondary); border-bottom: 1px solid var(--gray-200); font-size: 0.9rem; font-style: italic; } .data-table { width: 100%; border-collapse: collapse; font-size: 0.95rem; } .data-table th { background: var(--gray-100); padding: 0.75rem 1rem; text-align: left; font-weight: 600; color: var(--primary-dark); border-bottom: 2px solid var(--primary-light); } .data-table td { padding: 0.75rem 1rem; border-bottom: 1px solid var(--gray-200); color: var(--text-secondary); } .data-table tr:last-child td { border-bottom: none; } .data-table tr:nth-child(even) { background-color: var(--gray-50); } .data-table tr:hover { background-color: rgba(79, 70, 229, 0.05); } /* Metadata grid */ .metadata-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 1rem; } .metadata-item { background: var(--gray-50); padding: 1rem; border-radius: var(--radius); border: 1px solid var(--gray-200); transition: all 0.2s ease; } .metadata-item:hover { background: white; border-color: var(--primary-light); box-shadow: var(--shadow-sm); } .metadata-item h4 { margin: 0 0 0.5rem 0; color: var(--primary); font-weight: 600; font-size: 0.9rem; text-transform: uppercase; letter-spacing: 0.5px; } .metadata-item p { margin: 0; color: var(--text-primary); font-weight: 500; } /* JSON viewer */ .json-viewer { background: var(--dark); color: #e2e8f0; padding: 1.25rem; border-radius: var(--radius); overflow: auto; font-family: 'Fira Code', 'Courier New', monospace; font-size: 0.9rem; line-height: 1.5; max-height: 400px; white-space: pre-wrap; } /* Loading animation */ .loading-spinner { display: inline-block; width: 50px; height: 50px; border: 3px solid rgba(79, 70, 229, 0.3); border-radius: 50%; border-top-color: var(--primary); animation: spin 1s ease-in-out infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Error message */ .error { padding: 1rem; background-color: #fee2e2; border: 1px solid #fecaca; border-radius: var(--radius); color: #b91c1c; font-weight: 500; } /* Responsive design */ @media (max-width: 1024px) { .main-content { flex-direction: column; } .left-panel, .right-panel { flex: none; width: 100%; } } """ # Prompt pour Gemini avec instruction améliorée pour les tableaux GEMINI_PROMPT = """ Analyze this document and extract relevant information in JSON format. Adapt the extraction based on the document type (invoice, contract, report, KID, etc.). Expected response structure: { "metadata": { "title": "Document title", "date": "Document date", "type": "Document type", "author": "Document author or issuer" }, "entities": [ { "name": "Entity name", "type": "Entity type (person, organization, etc.)", "role": "Role in the document" } ], "values": [ { "description": "Value description", "value": "Exact value", "unit": "Unit if applicable" } ], "dates": [ { "description": "Date description", "date": "Exact date", "importance": "Importance (high, medium, low)" } ], "tables": [ { "title": "Table title", "description": "Table description", "data": [ { "column1": "Value in row 1, column 1", "column2": "Value in row 1, column 2", "column3": "Value in row 1, column 3" }, { "column1": "Value in row 2, column 1", "column2": "Value in row 2, column 2", "column3": "Value in row 2, column 3" } ] } ], "key_points": [ { "category": "Key point category", "description": "Detailed description", "importance": "Importance (high, medium, low)" } ], "references": [ { "type": "Reference type", "value": "Reference value" } ] } Important instructions: 1. First identify the document type and adapt the extraction accordingly 2. For tables (this is EXTREMELY important): - Pay special attention to detect and extract ALL tables in the document - Carefully identify tables even if they don't have visible borders or lines - Identify column headers correctly (first row or separate header row) - Extract all rows and all columns with exact cell values - Maintain the same number of columns for each row - Preserve the exact structure of each table - For each table, provide a descriptive title based on content - For each table, include a brief description explaining what the table contains - If a table spans multiple pages, try to reconstruct it as one table - Include ALL data from the table, don't omit any rows or columns 3. For values: - Extract amounts, percentages, numbers - Include units when present 4. For dates: - Extract all important dates - Include the context of each date 5. For entities: - Identify people, organizations, locations - Include their role in the document 6. For references: - Extract reference numbers, codes, identifiers 7. For key points: - Identify important information based on document type - Categorize them appropriately General rules: - Respond only with JSON, without any additional text - Extract only factual and verifiable information - Be precise with values and dates - If a category is not relevant for the document, leave an empty array - Adapt categories based on document type - Do not make assumptions about missing data """ def create_info_card(title, content): """Create a formatted information card""" if not content: return f"""

{title}

{TEXT["no_data"]}

""" return f"""

{title}

{content}
""" def format_list(items, key1, key2): """Format a list of items with two keys""" if not items: return TEXT["no_data"] html = "
" for item in items: html += f"""
{item[key1]}
{item[key2]}
""" html += "
" return html def format_table(table_data): """Format a table in HTML""" if not table_data: return TEXT["no_data"] html = "
" try: for table in table_data: # Vérifier si la table a des données if not table.get('data') or len(table['data']) == 0: continue title = table.get('title', 'Tableau sans titre') description = table.get('description', '') html += f"""

{title}

{description}

""" # Vérifier le format des données first_row = table['data'][0] if isinstance(first_row, dict): # Extraire les en-têtes du premier élément headers = list(first_row.keys()) # Ajouter les en-têtes html += "" for header in headers: html += f"" html += "" # Ajouter les lignes de données for row in table['data']: html += "" for key in headers: value = row.get(key, "") html += f"" html += "" elif isinstance(first_row, list): # Traiter les données au format liste for row in table['data']: html += "" for cell in row: html += f"" html += "" html += "
{header}
{value}
{cell}
" except Exception as e: print(f"Erreur lors du formatage des tableaux: {str(e)}") html += f"""
Erreur lors de l'affichage des tableaux. Veuillez vérifier le format JSON.
""" html += "
" if html == "
": return TEXT["no_data"] return html def process_single_image(image): """Process a single image and extract information""" try: print("Envoi de l'image à Gemini pour analyse...") response = model.generate_content( [GEMINI_PROMPT, image], generation_config={ "temperature": 0.1, "top_p": 0.8, "top_k": 40, "max_output_tokens": 2048, } ) try: response_text = response.text.strip() print(f"Réponse reçue de Gemini, longueur: {len(response_text)} caractères") # Nettoyage du texte JSON if response_text.startswith("```json"): response_text = response_text.replace("```json", "").replace("```", "").strip() elif response_text.startswith("```"): response_text = response_text.replace("```", "").strip() # Parse JSON json_data = json.loads(response_text) # Vérifier et corriger le format des tableaux si nécessaire if "tables" in json_data and json_data["tables"]: for i, table in enumerate(json_data["tables"]): if "data" not in table or not table["data"]: table["data"] = [] # S'assurer que la table a un titre if "title" not in table or not table["title"]: table["title"] = f"Tableau {i+1}" # S'assurer que la table a une description if "description" not in table: table["description"] = "" return json_data except json.JSONDecodeError as e: print(f"Erreur de décodage JSON: {str(e)}") print(f"Contenu problématique: {response_text[:500]}...") return {"error": "Erreur de format JSON dans la réponse"} except Exception as e: print(f"Erreur lors du traitement de la réponse Gemini: {str(e)}") return {"error": str(e)} except Exception as e: print(f"Erreur lors de l'appel à Gemini: {str(e)}") return {"error": str(e)} def merge_results(results): """Merge multiple results into one""" if not results: return None merged = { "metadata": {}, "entities": [], "values": [], "dates": [], "tables": [], "key_points": [], "references": [] } # Merge metadata (take from first result with data) for result in results: if "metadata" in result and result["metadata"]: merged["metadata"] = result["metadata"] break # Merge lists for result in results: for category in ["entities", "values", "dates", "tables", "key_points", "references"]: if category in result and result[category]: merged[category].extend(result[category]) return merged def process_document(file, progress=gr.Progress()): """Process a document and extract information""" if not file: return {"error": TEXT["error"]["file_not_found"]} try: if file.name.lower().endswith('.pdf'): # Créer une copie temporaire du fichier au cas où il serait déplacé/modifié pendant le traitement temp_dir = tempfile.mkdtemp() temp_pdf = os.path.join(temp_dir, "temp.pdf") shutil.copy2(file.name, temp_pdf) try: # Ouvrir le PDF avec PyMuPDF doc = fitz.open(temp_pdf) if doc.page_count > 10: # Nettoyer les fichiers temporaires shutil.rmtree(temp_dir, ignore_errors=True) return {"error": TEXT["error"]["too_many_pages"]} print(f"Traitement d'un PDF de {doc.page_count} pages") results = [] for i in range(doc.page_count): progress((i+1) / doc.page_count, desc=f"{TEXT['processing']} page {i+1}/{doc.page_count}") try: page = doc[i] # Augmenter la résolution pour une meilleure qualité zoom = 2.0 # zoom factor mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, alpha=False) # Convertir le pixmap en PIL Image img_data = pix.tobytes("jpeg") pil_img = Image.open(io.BytesIO(img_data)) # Traiter l'image avec le modèle Gemini result = process_single_image(pil_img) if result and "error" not in result: results.append(result) print(f"Page {i+1} traitée avec succès") else: print(f"Pas d'informations extraites de la page {i+1}") except Exception as e: print(f"Erreur lors du traitement de la page {i+1}: {str(e)}") # Fermer le document PDF doc.close() # Nettoyer les fichiers temporaires shutil.rmtree(temp_dir, ignore_errors=True) if results: return merge_results(results) else: return {"error": TEXT["error"]["no_info"]} except Exception as e: print(f"Erreur lors du traitement du PDF: {str(e)}") # Nettoyer les fichiers temporaires shutil.rmtree(temp_dir, ignore_errors=True) return {"error": str(e)} elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')): try: image = Image.open(file.name) return process_single_image(image) except Exception as e: print(f"Erreur lors du traitement de l'image: {str(e)}") return {"error": str(e)} else: return {"error": TEXT["error"]["file_not_found"]} except Exception as e: print(f"Erreur inattendue dans process_document: {str(e)}") return {"error": str(e)} def update_preview(file): """Update the preview with the uploaded file""" if not file: return [] try: if file.name.lower().endswith('.pdf'): # Créer une copie temporaire du fichier au cas où il serait déplacé/modifié pendant le traitement temp_dir = tempfile.mkdtemp() temp_pdf = os.path.join(temp_dir, "temp.pdf") shutil.copy2(file.name, temp_pdf) try: # Utiliser PyMuPDF pour convertir les pages en images doc = fitz.open(temp_pdf) image_paths = [] # Ne traiter que les 3 premières pages max_pages = min(3, doc.page_count) print(f"PDF a {doc.page_count} pages, prévisualisant {max_pages} pages") for i in range(max_pages): try: page = doc[i] # Augmenter la résolution pour une meilleure qualité zoom = 2.0 # zoom factor mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat, alpha=False) # Sauvegarder l'image temp_filename = f"temp_preview_{i}.jpg" pix.save(temp_filename, "jpeg") image_paths.append(temp_filename) print(f"Page {i+1} convertie et sauvegardée dans {temp_filename}") except Exception as e: print(f"Erreur lors du traitement de la page {i+1}: {str(e)}") # Fermer le document PDF doc.close() print(f"Prévisualisation créée avec succès: {len(image_paths)} images") # Nettoyer les fichiers temporaires shutil.rmtree(temp_dir, ignore_errors=True) return image_paths except Exception as e: print(f"Erreur lors de la conversion PDF: {str(e)}") # Nettoyer les fichiers temporaires shutil.rmtree(temp_dir, ignore_errors=True) return [] elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')): return [file.name] else: print(f"Format de fichier non pris en charge: {file.name}") return [] except Exception as e: print(f"Erreur inattendue dans update_preview: {str(e)}") return [] def process_and_display(file): """Process document and display results in the interface""" if not file: return [f"
{TEXT['error']['file_not_found']}
"] * 8 result = process_document(file) if "error" in result: error_msg = result["error"] if error_msg in TEXT["error"]: error_msg = TEXT["error"][error_msg] return [f"
{error_msg}
"] * 8 # Format metadata as HTML metadata_html = "
" if "metadata" in result and result["metadata"]: for key, value in result["metadata"].items(): metadata_html += f"""

{key}

{value}

""" else: metadata_html += f"

{TEXT['no_data']}

" metadata_html += "
" # Format JSON data json_html = f"
{json.dumps(result, indent=2, ensure_ascii=False)}
" # Initialize all tabs with default values outputs = [ metadata_html, create_info_card(TEXT["tabs"]["entities"], format_list(result.get("entities", []), "name", "role")), create_info_card(TEXT["tabs"]["values"], format_list(result.get("values", []), "description", "value")), create_info_card(TEXT["tabs"]["dates"], format_list(result.get("dates", []), "description", "date")), create_info_card(TEXT["tabs"]["tables"], format_table(result.get("tables", []))), create_info_card(TEXT["tabs"]["keypoints"], format_list(result.get("key_points", []), "category", "description")), create_info_card(TEXT["tabs"]["references"], format_list(result.get("references", []), "type", "value")), json_html ] return outputs # Fonction pour encoder les images en base64 def get_image_base64(file_path): try: with open(file_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') return encoded_string except Exception as e: print(f"Erreur lors de l'encodage de l'image {file_path}: {str(e)}") return "" # Chemins vers les images logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "elixir-logo-typo.png") workflow_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "Editor _ Mermaid Chart-2025-04-15-142548.png") # Encoder les images en base64 logo_base64 = get_image_base64(logo_path) workflow_base64 = get_image_base64(workflow_path) # Logo et workflow HTML logo_html = f"""
Elixir Logo
""" workflow_html = f"""
Elixir Workflow
""" # Ajouter du JavaScript pour l'accordéon et autres interactivités js_code = """ """ # Interface Gradio améliorée with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo: gr.HTML(js_code) # Ajouter le JavaScript # En-tête avec logo header = gr.HTML(logo_html) # Première rangée: Document Intelligence + How Elixir Works with gr.Row(equal_height=True): # Document Intelligence à gauche with gr.Column(scale=1): gr.HTML(f"""

📄 Document Intelligence

{TEXT["description"]}
""") # How Elixir Works à droite with gr.Column(scale=1): gr.HTML(f"""

🔄 How Elixir Works

{workflow_html}
""") # Deuxième rangée: Interface d'utilisation avec input à gauche et output à droite with gr.Row(): # Colonne de gauche: Instructions et upload with gr.Column(scale=1): # Instructions gr.HTML("""

How to use Elixir

  1. Upload a PDF document (1-10 pages) such as an invoice, regulatory document, report...
  2. Processing by Elixir
  3. Transcription of identified sections and elements (without customization)
""") # Section de téléchargement with gr.Group(elem_classes=["upload-section"]): file_input = gr.File(label=TEXT["upload"], file_types=[".pdf", ".png", ".jpg", ".jpeg"], elem_classes=["file-container"]) submit_btn = gr.Button(TEXT["analyze"], variant="primary", elem_classes=["primary"]) preview = gr.Gallery(label=TEXT["preview"], show_label=True, elem_id="preview-gallery") # Colonne de droite: Résultats et JSON with gr.Column(scale=1): # Onglets de résultats with gr.Tabs(elem_classes=["tabs"]) as tabs: with gr.TabItem(TEXT["tabs"]["overview"]): metadata_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["entities"]): entities_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["values"]): values_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["dates"]): dates_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["tables"]): tables_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["keypoints"]): keypoints_view = gr.HTML() with gr.TabItem(TEXT["tabs"]["references"]): references_view = gr.HTML() # JSON complet en dessous des onglets gr.HTML("""

📄 Complete JSON

""") json_view = gr.HTML() gr.HTML("
") # Animation de chargement loading_indicator = gr.HTML(f""" """) file_input.change( fn=update_preview, inputs=file_input, outputs=preview ) submit_btn.click( fn=process_and_display, inputs=file_input, outputs=[metadata_view, entities_view, values_view, dates_view, tables_view, keypoints_view, references_view, json_view] ) if __name__ == "__main__": demo.launch(share=True, server_name="0.0.0.0", server_port=7860)