Spaces:

Marsouuu
/

general-elixir-demo

Running

Assistant AI

Fix PyMuPDF import

851b6b6 2 months ago

42.6 kB

	import gradio as gr
	import google.generativeai as genai
	import os
	import json
	import time
	import base64
	import fitz # Importation correcte pour Hugging Face
	from PIL import Image
	import io
	import tempfile
	import shutil

	# Configuration
	GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI"
	genai.configure(api_key=GOOGLE_API_KEY)
	model = genai.GenerativeModel('gemini-1.5-flash')

	# Interface text (English only)
	TEXT = {
	"title": "Elixir - Document Intelligence",
	"description": "This demo showcases the capabilities of a generative AI model to interpret, understand, and classify any type of document WITHOUT CUSTOMIZATION. For developing a complete, precise, and defined pipeline, please contact martial@lexiapro.fr.",
	"instructions": [
	"1. Upload a PDF document (1-10 pages) such as an invoice, regulatory document, report...",
	"2. Processing by Elixir",
	"3. Transcription of identified sections and elements (without customization)"
	],
	"upload": "📂 Upload your document",
	"analyze": "🔍 Analyze document",
	"preview": "📄 Preview",
	"tabs": {
	"overview": "📋 Overview",
	"entities": "👥 Entities",
	"values": "💰 Values",
	"dates": "📅 Dates",
	"tables": "📊 Tables",
	"keypoints": "🔑 Key Points",
	"references": "🔗 References",
	"json": "📄 Complete JSON"
	},
	"no_data": "No information found",
	"processing": "Processing...",
	"error": {
	"file_not_found": "File not found",
	"pdf_conversion": "Unable to convert PDF to image",
	"no_info": "No information extracted from PDF pages",
	"too_many_pages": "The PDF has more than 10 pages. Please upload a document with 10 pages or less."
	}
	}

	# Modern CSS - Style amélioré
	CSS = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

	:root {
	--primary: #4f46e5;
	--primary-light: #818cf8;
	--primary-dark: #3730a3;
	--secondary: #10b981;
	--accent: #f59e0b;
	--dark: #111827;
	--light: #f9fafb;
	--gray-50: #f8fafc;
	--gray-100: #f1f5f9;
	--gray-200: #e2e8f0;
	--gray-300: #cbd5e1;
	--gray-400: #94a3b8;
	--gray-500: #64748b;
	--text-primary: #1e293b;
	--text-secondary: #475569;
	--shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
	--shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
	--shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
	--radius-sm: 0.25rem;
	--radius: 0.5rem;
	--radius-md: 0.75rem;
	--radius-lg: 1rem;
	}

	body, .gradio-container {
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
	color: var(--text-primary);
	background-color: var(--light);
	line-height: 1.6;
	}

	/* Layout principal */
	.container {
	max-width: 1300px;
	margin: 0 auto;
	padding: 0 1rem;
	}

	.main-content {
	display: flex;
	gap: 2rem;
	align-items: flex-start;
	}

	.left-panel {
	flex: 1;
	}

	.right-panel {
	flex: 2;
	}

	/* En-tête */
	.header {
	margin-bottom: 2rem;
	padding: 0.75rem 1.25rem;
	background: linear-gradient(135deg, var(--primary-light), var(--primary-dark));
	border-radius: var(--radius-lg);
	box-shadow: var(--shadow-md);
	position: relative;
	overflow: hidden;
	color: white;
	height: 60px;
	display: flex;
	align-items: center;
	justify-content: center;
	}

	.header::before {
	content: '';
	position: absolute;
	top: -50%;
	left: -50%;
	width: 200%;
	height: 200%;
	background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 60%);
	animation: pulse 15s ease-in-out infinite;
	z-index: 1;
	}

	@keyframes pulse {
	0% { transform: scale(1); opacity: 0.5; }
	50% { transform: scale(1.05); opacity: 0.8; }
	100% { transform: scale(1); opacity: 0.5; }
	}

	.header img {
	max-height: 40px !important;
	object-fit: contain;
	position: relative;
	z-index: 2;
	}

	/* Intro card */
	.intro-card {
	background: white;
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	border: 1px solid var(--gray-200);
	overflow: hidden;
	margin-bottom: 1.5rem;
	transition: transform 0.3s ease, box-shadow 0.3s ease;
	}

	.intro-card:hover {
	transform: translateY(-3px);
	box-shadow: var(--shadow-md);
	}

	.intro-header {
	padding: 1.25rem;
	border-bottom: 1px solid var(--gray-200);
	background: linear-gradient(135deg, var(--primary-light), var(--primary-dark));
	color: white;
	font-weight: 600;
	display: flex;
	align-items: center;
	gap: 0.5rem;
	}

	.intro-header h3 {
	margin: 0;
	font-size: 1.25rem;
	font-weight: 600;
	text-shadow: 0 1px 2px rgba(0,0,0,0.1);
	}

	.intro-body {
	padding: 1.5rem;
	}

	.intro-description {
	color: var(--text-primary);
	line-height: 1.7;
	font-size: 1.05rem;
	margin-bottom: 1.5rem;
	}

	.contact-links {
	display: flex;
	flex-wrap: wrap;
	gap: 1rem;
	margin-top: 1.5rem;
	background: linear-gradient(to right, rgba(79, 70, 229, 0.05), rgba(79, 70, 229, 0.1));
	padding: 1.25rem;
	border-radius: var(--radius);
	border: 1px solid var(--gray-200);
	}

	.contact-link {
	display: flex;
	align-items: center;
	gap: 0.5rem;
	padding: 0.75rem 1rem;
	background: white;
	border-radius: var(--radius);
	color: var(--primary);
	text-decoration: none;
	font-weight: 500;
	transition: all 0.2s ease;
	box-shadow: var(--shadow-sm);
	border: 1px solid var(--gray-200);
	}

	.contact-link:hover {
	transform: translateY(-2px);
	box-shadow: var(--shadow);
	color: var(--primary-dark);
	border-color: var(--primary-light);
	}

	/* Accordéon pour workflow */
	.accordion {
	border-radius: var(--radius);
	overflow: hidden;
	margin-bottom: 1.5rem;
	}

	.accordion-header {
	background: var(--gray-50);
	padding: 1.25rem;
	cursor: pointer;
	display: flex;
	align-items: center;
	justify-content: space-between;
	font-weight: 600;
	color: var(--primary);
	border: 1px solid var(--gray-200);
	border-radius: var(--radius);
	transition: all 0.3s ease;
	}

	.accordion-header:hover {
	background: var(--gray-100);
	}

	.accordion-header::after {
	content: "↓";
	transition: transform 0.3s ease;
	}

	.accordion.active .accordion-header::after {
	transform: rotate(180deg);
	}

	.accordion-content {
	max-height: 0;
	overflow: hidden;
	transition: max-height 0.3s ease;
	background: white;
	border: 1px solid var(--gray-200);
	border-top: 0;
	border-radius: 0 0 var(--radius) var(--radius);
	padding: 0 1.25rem;
	}

	.accordion.active .accordion-content {
	max-height: 1000px;
	padding: 1.25rem;
	}

	.workflow-container {
	text-align: center;
	}

	.workflow-container img {
	max-width: 100%;
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	margin-top: 1rem;
	}

	/* Instructions */
	.instructions {
	background: white;
	padding: 1.5rem;
	border-radius: var(--radius);
	border: 1px solid var(--gray-200);
	box-shadow: var(--shadow);
	margin-bottom: 2rem;
	}

	.instructions h3 {
	color: var(--primary);
	margin-top: 0;
	margin-bottom: 1rem;
	font-weight: 600;
	font-size: 1.25rem;
	display: flex;
	align-items: center;
	gap: 0.5rem;
	}

	.instructions h3::before {
	content: '📋';
	}

	.instructions ol {
	margin: 0;
	padding-left: 1.5rem;
	}

	.instructions li {
	margin-bottom: 0.75rem;
	position: relative;
	padding-left: 0.5rem;
	}

	.instructions li:last-child {
	margin-bottom: 0;
	}

	/* Upload section */
	.upload-section {
	background: white;
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	border: 1px solid var(--gray-200);
	padding: 1.5rem;
	}

	/* File input styling */
	.file-container {
	border: 2px dashed var(--primary-light) !important;
	border-radius: var(--radius) !important;
	padding: 2rem !important;
	text-align: center !important;
	transition: all 0.3s ease !important;
	background-color: rgba(79, 70, 229, 0.05) !important;
	cursor: pointer !important;
	position: relative;
	}

	.file-container:hover {
	background-color: rgba(79, 70, 229, 0.1) !important;
	}

	.file-container::before {
	content: "📄";
	font-size: 2rem;
	display: block;
	margin-bottom: 0.5rem;
	}

	button.primary {
	background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important;
	color: white !important;
	border: none !important;
	padding: 0.75rem 1.5rem !important;
	font-weight: 600 !important;
	border-radius: var(--radius) !important;
	transition: all 0.3s ease !important;
	box-shadow: 0 4px 6px rgba(79, 70, 229, 0.25) !important;
	width: 100% !important;
	margin-top: 1rem !important;
	}

	button.primary:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 7px 14px rgba(79, 70, 229, 0.3) !important;
	}

	/* Results tabs */
	.tabs .tab-nav {
	background-color: var(--gray-50) !important;
	padding: 0.5rem !important;
	border-radius: var(--radius) var(--radius) 0 0 !important;
	border: 1px solid var(--gray-200) !important;
	border-bottom: none !important;
	}

	.tabs .tab-nav button {
	margin: 0 !important;
	padding: 0.75rem 1rem !important;
	font-weight: 500 !important;
	color: var(--text-secondary) !important;
	position: relative !important;
	transition: all 0.3s ease !important;
	}

	.tabs .tab-nav button.selected {
	color: var(--primary) !important;
	font-weight: 600 !important;
	}

	.tabs .tab-nav button.selected::after {
	content: '';
	position: absolute;
	bottom: -0.5rem;
	left: 0;
	width: 100%;
	height: 3px;
	background: var(--primary);
	border-radius: 3px 3px 0 0;
	}

	.tabs .tabitem {
	background: white !important;
	padding: 1.5rem !important;
	border-radius: 0 0 var(--radius) var(--radius) !important;
	border: 1px solid var(--gray-200) !important;
	box-shadow: var(--shadow) !important;
	}

	/* Card components */
	.info-card {
	background: white;
	padding: 0;
	border-radius: var(--radius);
	margin-bottom: 1.5rem;
	border: 1px solid var(--gray-200);
	box-shadow: var(--shadow);
	overflow: hidden;
	transition: transform 0.2s ease, box-shadow 0.2s ease;
	}

	.info-card:hover {
	transform: translateY(-2px);
	box-shadow: var(--shadow-md);
	}

	.info-card h3 {
	margin: 0;
	color: white;
	font-size: 1.1rem;
	font-weight: 600;
	padding: 1rem 1.5rem;
	background: linear-gradient(135deg, var(--primary-light), var(--primary-dark));
	position: relative;
	}

	.info-card .content {
	padding: 1.25rem;
	}

	/* Formatage des listes dans les cartes */
	.list-container {
	display: flex;
	flex-direction: column;
	gap: 1rem;
	}

	.list-item {
	padding: 1rem;
	background: var(--gray-50);
	border-radius: var(--radius);
	border: 1px solid var(--gray-200);
	transition: all 0.2s ease;
	}

	.list-item:hover {
	background: white;
	border-color: var(--primary-light);
	box-shadow: var(--shadow-sm);
	}

	.list-item-header {
	font-weight: 600;
	color: var(--primary);
	margin-bottom: 0.5rem;
	display: flex;
	align-items: center;
	gap: 0.5rem;
	}

	.list-item-header::before {
	content: '•';
	color: var(--primary);
	font-size: 1.5rem;
	line-height: 1;
	}

	.list-item-content {
	color: var(--text-secondary);
	font-size: 0.95rem;
	}

	/* Améliorations tables */
	.tables-container {
	display: flex;
	flex-direction: column;
	gap: 2rem;
	}

	.table-wrapper {
	overflow: hidden;
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	background: white;
	}

	.table-wrapper h4 {
	padding: 1rem;
	margin: 0;
	background: linear-gradient(to right, var(--primary-light), var(--primary));
	color: white;
	font-weight: 600;
	}

	.table-description {
	margin: 0;
	padding: 0.75rem 1rem;
	background: var(--gray-50);
	color: var(--text-secondary);
	border-bottom: 1px solid var(--gray-200);
	font-size: 0.9rem;
	font-style: italic;
	}

	.data-table {
	width: 100%;
	border-collapse: collapse;
	font-size: 0.95rem;
	}

	.data-table th {
	background: var(--gray-100);
	padding: 0.75rem 1rem;
	text-align: left;
	font-weight: 600;
	color: var(--primary-dark);
	border-bottom: 2px solid var(--primary-light);
	}

	.data-table td {
	padding: 0.75rem 1rem;
	border-bottom: 1px solid var(--gray-200);
	color: var(--text-secondary);
	}

	.data-table tr:last-child td {
	border-bottom: none;
	}

	.data-table tr:nth-child(even) {
	background-color: var(--gray-50);
	}

	.data-table tr:hover {
	background-color: rgba(79, 70, 229, 0.05);
	}

	/* Metadata grid */
	.metadata-grid {
	display: grid;
	grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
	gap: 1rem;
	}

	.metadata-item {
	background: var(--gray-50);
	padding: 1rem;
	border-radius: var(--radius);
	border: 1px solid var(--gray-200);
	transition: all 0.2s ease;
	}

	.metadata-item:hover {
	background: white;
	border-color: var(--primary-light);
	box-shadow: var(--shadow-sm);
	}

	.metadata-item h4 {
	margin: 0 0 0.5rem 0;
	color: var(--primary);
	font-weight: 600;
	font-size: 0.9rem;
	text-transform: uppercase;
	letter-spacing: 0.5px;
	}

	.metadata-item p {
	margin: 0;
	color: var(--text-primary);
	font-weight: 500;
	}

	/* JSON viewer */
	.json-viewer {
	background: var(--dark);
	color: #e2e8f0;
	padding: 1.25rem;
	border-radius: var(--radius);
	overflow: auto;
	font-family: 'Fira Code', 'Courier New', monospace;
	font-size: 0.9rem;
	line-height: 1.5;
	max-height: 400px;
	white-space: pre-wrap;
	}

	/* Loading animation */
	.loading-spinner {
	display: inline-block;
	width: 50px;
	height: 50px;
	border: 3px solid rgba(79, 70, 229, 0.3);
	border-radius: 50%;
	border-top-color: var(--primary);
	animation: spin 1s ease-in-out infinite;
	}

	@keyframes spin {
	to { transform: rotate(360deg); }
	}

	/* Error message */
	.error {
	padding: 1rem;
	background-color: #fee2e2;
	border: 1px solid #fecaca;
	border-radius: var(--radius);
	color: #b91c1c;
	font-weight: 500;
	}

	/* Responsive design */
	@media (max-width: 1024px) {
	.main-content {
	flex-direction: column;
	}

	.left-panel, .right-panel {
	flex: none;
	width: 100%;
	}
	}
	"""

	# Prompt pour Gemini avec instruction améliorée pour les tableaux
	GEMINI_PROMPT = """
	Analyze this document and extract relevant information in JSON format. Adapt the extraction based on the document type (invoice, contract, report, KID, etc.).

	Expected response structure:
	{
	"metadata": {
	"title": "Document title",
	"date": "Document date",
	"type": "Document type",
	"author": "Document author or issuer"
	},
	"entities": [
	{
	"name": "Entity name",
	"type": "Entity type (person, organization, etc.)",
	"role": "Role in the document"
	}
	],
	"values": [
	{
	"description": "Value description",
	"value": "Exact value",
	"unit": "Unit if applicable"
	}
	],
	"dates": [
	{
	"description": "Date description",
	"date": "Exact date",
	"importance": "Importance (high, medium, low)"
	}
	],
	"tables": [
	{
	"title": "Table title",
	"description": "Table description",
	"data": [
	{
	"column1": "Value in row 1, column 1",
	"column2": "Value in row 1, column 2",
	"column3": "Value in row 1, column 3"
	},
	{
	"column1": "Value in row 2, column 1",
	"column2": "Value in row 2, column 2",
	"column3": "Value in row 2, column 3"
	}
	]
	}
	],
	"key_points": [
	{
	"category": "Key point category",
	"description": "Detailed description",
	"importance": "Importance (high, medium, low)"
	}
	],
	"references": [
	{
	"type": "Reference type",
	"value": "Reference value"
	}
	]
	}

	Important instructions:
	1. First identify the document type and adapt the extraction accordingly
	2. For tables (this is EXTREMELY important):
	- Pay special attention to detect and extract ALL tables in the document
	- Carefully identify tables even if they don't have visible borders or lines
	- Identify column headers correctly (first row or separate header row)
	- Extract all rows and all columns with exact cell values
	- Maintain the same number of columns for each row
	- Preserve the exact structure of each table
	- For each table, provide a descriptive title based on content
	- For each table, include a brief description explaining what the table contains
	- If a table spans multiple pages, try to reconstruct it as one table
	- Include ALL data from the table, don't omit any rows or columns
	3. For values:
	- Extract amounts, percentages, numbers
	- Include units when present
	4. For dates:
	- Extract all important dates
	- Include the context of each date
	5. For entities:
	- Identify people, organizations, locations
	- Include their role in the document
	6. For references:
	- Extract reference numbers, codes, identifiers
	7. For key points:
	- Identify important information based on document type
	- Categorize them appropriately

	General rules:
	- Respond only with JSON, without any additional text
	- Extract only factual and verifiable information
	- Be precise with values and dates
	- If a category is not relevant for the document, leave an empty array
	- Adapt categories based on document type
	- Do not make assumptions about missing data
	"""

	def create_info_card(title, content):
	"""Create a formatted information card"""
	if not content:
	return f"""
	<div class="info-card">
	<h3>{title}</h3>
	<div class="content">
	<p>{TEXT["no_data"]}</p>
	</div>
	</div>
	"""
	return f"""
	<div class="info-card">
	<h3>{title}</h3>
	<div class="content">
	{content}
	</div>
	</div>
	"""

	def format_list(items, key1, key2):
	"""Format a list of items with two keys"""
	if not items:
	return TEXT["no_data"]
	html = "<div class='list-container'>"
	for item in items:
	html += f"""
	<div class='list-item'>
	<div class='list-item-header'>{item[key1]}</div>
	<div class='list-item-content'>{item[key2]}</div>
	</div>
	"""
	html += "</div>"
	return html

	def format_table(table_data):
	"""Format a table in HTML"""
	if not table_data:
	return TEXT["no_data"]

	html = "<div class='tables-container'>"

	try:
	for table in table_data:
	# Vérifier si la table a des données
	if not table.get('data') or len(table['data']) == 0:
	continue

	title = table.get('title', 'Tableau sans titre')
	description = table.get('description', '')

	html += f"""
	<div class='table-wrapper'>
	<h4>{title}</h4>
	<p class='table-description'>{description}</p>
	<table class='data-table'>
	"""

	# Vérifier le format des données
	first_row = table['data'][0]
	if isinstance(first_row, dict):
	# Extraire les en-têtes du premier élément
	headers = list(first_row.keys())

	# Ajouter les en-têtes
	html += "<tr>"
	for header in headers:
	html += f"<th>{header}</th>"
	html += "</tr>"

	# Ajouter les lignes de données
	for row in table['data']:
	html += "<tr>"
	for key in headers:
	value = row.get(key, "")
	html += f"<td>{value}</td>"
	html += "</tr>"
	elif isinstance(first_row, list):
	# Traiter les données au format liste
	for row in table['data']:
	html += "<tr>"
	for cell in row:
	html += f"<td>{cell}</td>"
	html += "</tr>"

	html += "</table></div>"
	except Exception as e:
	print(f"Erreur lors du formatage des tableaux: {str(e)}")
	html += f"""
	<div class='error'>
	Erreur lors de l'affichage des tableaux. Veuillez vérifier le format JSON.
	</div>
	"""

	html += "</div>"

	if html == "<div class='tables-container'></div>":
	return TEXT["no_data"]

	return html

	def process_single_image(image):
	"""Process a single image and extract information"""
	try:
	print("Envoi de l'image à Gemini pour analyse...")
	response = model.generate_content(
	[GEMINI_PROMPT, image],
	generation_config={
	"temperature": 0.1,
	"top_p": 0.8,
	"top_k": 40,
	"max_output_tokens": 2048,
	}
	)

	try:
	response_text = response.text.strip()
	print(f"Réponse reçue de Gemini, longueur: {len(response_text)} caractères")

	# Nettoyage du texte JSON
	if response_text.startswith("```json"):
	response_text = response_text.replace("```json", "").replace("```", "").strip()
	elif response_text.startswith("```"):
	response_text = response_text.replace("```", "").strip()

	# Parse JSON
	json_data = json.loads(response_text)

	# Vérifier et corriger le format des tableaux si nécessaire
	if "tables" in json_data and json_data["tables"]:
	for i, table in enumerate(json_data["tables"]):
	if "data" not in table or not table["data"]:
	table["data"] = []

	# S'assurer que la table a un titre
	if "title" not in table or not table["title"]:
	table["title"] = f"Tableau {i+1}"

	# S'assurer que la table a une description
	if "description" not in table:
	table["description"] = ""

	return json_data
	except json.JSONDecodeError as e:
	print(f"Erreur de décodage JSON: {str(e)}")
	print(f"Contenu problématique: {response_text[:500]}...")
	return {"error": "Erreur de format JSON dans la réponse"}
	except Exception as e:
	print(f"Erreur lors du traitement de la réponse Gemini: {str(e)}")
	return {"error": str(e)}
	except Exception as e:
	print(f"Erreur lors de l'appel à Gemini: {str(e)}")
	return {"error": str(e)}

	def merge_results(results):
	"""Merge multiple results into one"""
	if not results:
	return None

	merged = {
	"metadata": {},
	"entities": [],
	"values": [],
	"dates": [],
	"tables": [],
	"key_points": [],
	"references": []
	}

	# Merge metadata (take from first result with data)
	for result in results:
	if "metadata" in result and result["metadata"]:
	merged["metadata"] = result["metadata"]
	break

	# Merge lists
	for result in results:
	for category in ["entities", "values", "dates", "tables", "key_points", "references"]:
	if category in result and result[category]:
	merged[category].extend(result[category])

	return merged

	def process_document(file, progress=gr.Progress()):
	"""Process a document and extract information"""
	if not file:
	return {"error": TEXT["error"]["file_not_found"]}

	try:
	if file.name.lower().endswith('.pdf'):
	# Créer une copie temporaire du fichier au cas où il serait déplacé/modifié pendant le traitement
	temp_dir = tempfile.mkdtemp()
	temp_pdf = os.path.join(temp_dir, "temp.pdf")
	shutil.copy2(file.name, temp_pdf)

	try:
	# Ouvrir le PDF avec PyMuPDF
	doc = fitz.open(temp_pdf)

	if doc.page_count > 10:
	# Nettoyer les fichiers temporaires
	shutil.rmtree(temp_dir, ignore_errors=True)
	return {"error": TEXT["error"]["too_many_pages"]}

	print(f"Traitement d'un PDF de {doc.page_count} pages")
	results = []

	for i in range(doc.page_count):
	progress((i+1) / doc.page_count, desc=f"{TEXT['processing']} page {i+1}/{doc.page_count}")
	try:
	page = doc[i]
	# Augmenter la résolution pour une meilleure qualité
	zoom = 2.0 # zoom factor
	mat = fitz.Matrix(zoom, zoom)
	pix = page.get_pixmap(matrix=mat, alpha=False)

	# Convertir le pixmap en PIL Image
	img_data = pix.tobytes("jpeg")
	pil_img = Image.open(io.BytesIO(img_data))

	# Traiter l'image avec le modèle Gemini
	result = process_single_image(pil_img)
	if result and "error" not in result:
	results.append(result)
	print(f"Page {i+1} traitée avec succès")
	else:
	print(f"Pas d'informations extraites de la page {i+1}")
	except Exception as e:
	print(f"Erreur lors du traitement de la page {i+1}: {str(e)}")

	# Fermer le document PDF
	doc.close()

	# Nettoyer les fichiers temporaires
	shutil.rmtree(temp_dir, ignore_errors=True)

	if results:
	return merge_results(results)
	else:
	return {"error": TEXT["error"]["no_info"]}

	except Exception as e:
	print(f"Erreur lors du traitement du PDF: {str(e)}")
	# Nettoyer les fichiers temporaires
	shutil.rmtree(temp_dir, ignore_errors=True)
	return {"error": str(e)}

	elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
	try:
	image = Image.open(file.name)
	return process_single_image(image)
	except Exception as e:
	print(f"Erreur lors du traitement de l'image: {str(e)}")
	return {"error": str(e)}

	else:
	return {"error": TEXT["error"]["file_not_found"]}

	except Exception as e:
	print(f"Erreur inattendue dans process_document: {str(e)}")
	return {"error": str(e)}

	def update_preview(file):
	"""Update the preview with the uploaded file"""
	if not file:
	return []

	try:
	if file.name.lower().endswith('.pdf'):
	# Créer une copie temporaire du fichier au cas où il serait déplacé/modifié pendant le traitement
	temp_dir = tempfile.mkdtemp()
	temp_pdf = os.path.join(temp_dir, "temp.pdf")
	shutil.copy2(file.name, temp_pdf)

	try:
	# Utiliser PyMuPDF pour convertir les pages en images
	doc = fitz.open(temp_pdf)
	image_paths = []

	# Ne traiter que les 3 premières pages
	max_pages = min(3, doc.page_count)
	print(f"PDF a {doc.page_count} pages, prévisualisant {max_pages} pages")

	for i in range(max_pages):
	try:
	page = doc[i]
	# Augmenter la résolution pour une meilleure qualité
	zoom = 2.0 # zoom factor
	mat = fitz.Matrix(zoom, zoom)
	pix = page.get_pixmap(matrix=mat, alpha=False)

	# Sauvegarder l'image
	temp_filename = f"temp_preview_{i}.jpg"
	pix.save(temp_filename, "jpeg")
	image_paths.append(temp_filename)
	print(f"Page {i+1} convertie et sauvegardée dans {temp_filename}")
	except Exception as e:
	print(f"Erreur lors du traitement de la page {i+1}: {str(e)}")

	# Fermer le document PDF
	doc.close()
	print(f"Prévisualisation créée avec succès: {len(image_paths)} images")

	# Nettoyer les fichiers temporaires
	shutil.rmtree(temp_dir, ignore_errors=True)

	return image_paths
	except Exception as e:
	print(f"Erreur lors de la conversion PDF: {str(e)}")
	# Nettoyer les fichiers temporaires
	shutil.rmtree(temp_dir, ignore_errors=True)
	return []

	elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
	return [file.name]
	else:
	print(f"Format de fichier non pris en charge: {file.name}")
	return []
	except Exception as e:
	print(f"Erreur inattendue dans update_preview: {str(e)}")
	return []

	def process_and_display(file):
	"""Process document and display results in the interface"""
	if not file:
	return [f"<div class='error'>{TEXT['error']['file_not_found']}</div>"] * 8

	result = process_document(file)

	if "error" in result:
	error_msg = result["error"]
	if error_msg in TEXT["error"]:
	error_msg = TEXT["error"][error_msg]
	return [f"<div class='error'>{error_msg}</div>"] * 8

	# Format metadata as HTML
	metadata_html = "<div class='metadata-grid'>"
	if "metadata" in result and result["metadata"]:
	for key, value in result["metadata"].items():
	metadata_html += f"""
	<div class='metadata-item'>
	<h4>{key}</h4>
	<p>{value}</p>
	</div>
	"""
	else:
	metadata_html += f"<p>{TEXT['no_data']}</p>"
	metadata_html += "</div>"

	# Format JSON data
	json_html = f"<pre class='json-viewer'>{json.dumps(result, indent=2, ensure_ascii=False)}</pre>"

	# Initialize all tabs with default values
	outputs = [
	metadata_html,
	create_info_card(TEXT["tabs"]["entities"], format_list(result.get("entities", []), "name", "role")),
	create_info_card(TEXT["tabs"]["values"], format_list(result.get("values", []), "description", "value")),
	create_info_card(TEXT["tabs"]["dates"], format_list(result.get("dates", []), "description", "date")),
	create_info_card(TEXT["tabs"]["tables"], format_table(result.get("tables", []))),
	create_info_card(TEXT["tabs"]["keypoints"], format_list(result.get("key_points", []), "category", "description")),
	create_info_card(TEXT["tabs"]["references"], format_list(result.get("references", []), "type", "value")),
	json_html
	]

	return outputs

	# Fonction pour encoder les images en base64
	def get_image_base64(file_path):
	try:
	with open(file_path, "rb") as image_file:
	encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
	return encoded_string
	except Exception as e:
	print(f"Erreur lors de l'encodage de l'image {file_path}: {str(e)}")
	return ""

	# Chemins vers les images
	logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "elixir-logo-typo.png")
	workflow_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "Editor _ Mermaid Chart-2025-04-15-142548.png")

	# Encoder les images en base64
	logo_base64 = get_image_base64(logo_path)
	workflow_base64 = get_image_base64(workflow_path)

	# Logo et workflow HTML
	logo_html = f"""<div class="header">
	<img src="data:image/png;base64,{logo_base64}" alt="Elixir Logo" style="max-height: 40px; position: relative; z-index: 2;">
	</div>"""

	workflow_html = f"""<div class="workflow-container">
	<img src="data:image/png;base64,{workflow_base64}" alt="Elixir Workflow" style="max-width: 100%; border-radius: 0.5rem;">
	</div>"""

	# Ajouter du JavaScript pour l'accordéon et autres interactivités
	js_code = """
	<script>
	document.addEventListener('DOMContentLoaded', function() {
	// Accordéon
	const accordions = document.querySelectorAll('.accordion-header');
	accordions.forEach(accordion => {
	accordion.addEventListener('click', function() {
	this.parentElement.classList.toggle('active');
	});
	});

	// Animation des cartes au survol
	const cards = document.querySelectorAll('.card');
	cards.forEach(card => {
	card.addEventListener('mouseenter', function() {
	this.style.transform = 'translateY(-5px)';
	this.style.boxShadow = '0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05)';
	});
	card.addEventListener('mouseleave', function() {
	this.style.transform = 'translateY(0)';
	this.style.boxShadow = '0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06)';
	});
	});
	});
	</script>
	"""

	# Interface Gradio améliorée
	with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
	gr.HTML(js_code) # Ajouter le JavaScript

	# En-tête avec logo
	header = gr.HTML(logo_html)

	# Première rangée: Document Intelligence + How Elixir Works
	with gr.Row(equal_height=True):
	# Document Intelligence à gauche
	with gr.Column(scale=1):
	gr.HTML(f"""
	<div class="intro-card">
	<div class="intro-header">
	<h3>📄 Document Intelligence</h3>
	</div>
	<div class="intro-body">
	<div class="intro-description">
	{TEXT["description"]}
	</div>

	<div class="contact-links">
	<a href="https://lexiapro.fr/" target="_blank" class="contact-link">
	🌐 Visit lexiapro.fr
	</a>
	<a href="mailto:martial@lexiapro.fr" class="contact-link">
	✉️ Contact us
	</a>
	</div>
	</div>
	</div>
	""")

	# How Elixir Works à droite
	with gr.Column(scale=1):
	gr.HTML(f"""
	<div class="intro-card">
	<div class="intro-header">
	<h3>🔄 How Elixir Works</h3>
	</div>
	<div class="intro-body">
	{workflow_html}
	</div>
	</div>
	""")

	# Deuxième rangée: Interface d'utilisation avec input à gauche et output à droite
	with gr.Row():
	# Colonne de gauche: Instructions et upload
	with gr.Column(scale=1):
	# Instructions
	gr.HTML("""
	<div class="instructions">
	<h3>How to use Elixir</h3>
	<ol>
	<li>Upload a PDF document (1-10 pages) such as an invoice, regulatory document, report...</li>
	<li>Processing by Elixir</li>
	<li>Transcription of identified sections and elements (without customization)</li>
	</ol>
	</div>
	""")

	# Section de téléchargement
	with gr.Group(elem_classes=["upload-section"]):
	file_input = gr.File(label=TEXT["upload"], file_types=[".pdf", ".png", ".jpg", ".jpeg"], elem_classes=["file-container"])
	submit_btn = gr.Button(TEXT["analyze"], variant="primary", elem_classes=["primary"])
	preview = gr.Gallery(label=TEXT["preview"], show_label=True, elem_id="preview-gallery")

	# Colonne de droite: Résultats et JSON
	with gr.Column(scale=1):
	# Onglets de résultats
	with gr.Tabs(elem_classes=["tabs"]) as tabs:
	with gr.TabItem(TEXT["tabs"]["overview"]):
	metadata_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["entities"]):
	entities_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["values"]):
	values_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["dates"]):
	dates_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["tables"]):
	tables_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["keypoints"]):
	keypoints_view = gr.HTML()
	with gr.TabItem(TEXT["tabs"]["references"]):
	references_view = gr.HTML()

	# JSON complet en dessous des onglets
	gr.HTML("""
	<div class="intro-card" style="margin-top: 1.5rem;">
	<div class="intro-header">
	<h3>📄 Complete JSON</h3>
	</div>
	<div class="intro-body" style="padding: 0.75rem;">
	""")
	json_view = gr.HTML()
	gr.HTML("</div></div>")

	# Animation de chargement
	loading_indicator = gr.HTML(f"""
	<div id="loading" style="display:none; text-align:center; padding: 2rem;">
	<div class="loading-spinner"></div>
	<p style="margin-top: 1rem; color: var(--primary);">{TEXT['processing']}</p>
	</div>
	<script>
	document.addEventListener('DOMContentLoaded', function() {{
	const btn = document.querySelector("button.primary");
	const loading = document.getElementById("loading");
	if (btn && loading) {{
	btn.addEventListener("click", function() {{
	loading.style.display = "block";
	const observer = new MutationObserver(function(mutations) {{
	mutations.forEach(function(mutation) {{
	if (mutation.addedNodes.length) {{
	loading.style.display = "none";
	observer.disconnect();
	}}
	}});
	}});

	const resultsContainer = document.querySelector(".tabs");
	if (resultsContainer) {{
	observer.observe(resultsContainer, {{ childList: true, subtree: true }});
	}}
	}});
	}}
	}});
	</script>
	""")

	file_input.change(
	fn=update_preview,
	inputs=file_input,
	outputs=preview
	)

	submit_btn.click(
	fn=process_and_display,
	inputs=file_input,
	outputs=[metadata_view, entities_view, values_view, dates_view, tables_view, keypoints_view, references_view, json_view]
	)

	if __name__ == "__main__":
	demo.launch(share=True, server_name="0.0.0.0", server_port=7860)