Spaces:
Running
Running
Update backend/main.py
Browse files- backend/main.py +104 -141
backend/main.py
CHANGED
@@ -3,7 +3,7 @@ from fastapi.responses import HTMLResponse, Response
|
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from pathlib import Path
|
5 |
import os
|
6 |
-
from backend.utils import extract_text_from_file, translate_text
|
7 |
import io
|
8 |
from docx import Document
|
9 |
import asyncio
|
@@ -13,21 +13,22 @@ from datetime import datetime
|
|
13 |
|
14 |
app = FastAPI(
|
15 |
title="DocTranslator Pro",
|
16 |
-
description="Service avancé de traduction de documents
|
17 |
-
version="2.
|
18 |
)
|
19 |
|
20 |
-
# Configuration
|
21 |
-
MAX_FILE_SIZE = 15 * 1024 * 1024 # 15 Mo
|
22 |
-
MAX_TEXT_LENGTH = 75000 # 75k caractères
|
23 |
-
PROCESSING_TIMEOUT = 600 # 10 minutes
|
24 |
-
SUPPORTED_FORMATS = ('.pdf', '.docx', '.xlsx', '.pptx')
|
25 |
LOG_FILE = "translation_logs.log"
|
|
|
26 |
|
27 |
# Configuration des fichiers statiques
|
28 |
app.mount("/static", StaticFiles(directory="frontend"), name="static")
|
29 |
|
30 |
-
# Configuration du logging
|
31 |
logging.basicConfig(
|
32 |
level=logging.INFO,
|
33 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
@@ -40,203 +41,165 @@ logger = logging.getLogger(__name__)
|
|
40 |
|
41 |
class TranslationLogger:
|
42 |
@staticmethod
|
43 |
-
def log_transaction(filename: str, char_count: int, status: str):
|
44 |
with open(LOG_FILE, "a") as f:
|
45 |
-
f.write(f"{datetime.now().isoformat()}|{filename}|{char_count}|{status}\n")
|
46 |
|
47 |
@app.on_event("startup")
|
48 |
async def startup_event():
|
49 |
-
"""Vérification
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
if translator is None:
|
54 |
-
logger.error("❌ ERREUR CRITIQUE: Le modèle de traduction n'a pas pu être chargé!")
|
55 |
-
raise RuntimeError("Échec du chargement du modèle")
|
56 |
else:
|
57 |
-
logger.info("✅
|
58 |
-
logger.info(f"⚙️
|
59 |
-
logger.info(f"⚙️ Configuration - Texte max: {MAX_TEXT_LENGTH} caractères")
|
60 |
|
61 |
@app.get("/", response_class=HTMLResponse)
|
62 |
async def serve_frontend():
|
63 |
-
"""Endpoint
|
64 |
try:
|
65 |
with open(os.path.join("frontend", "index.html"), "r", encoding="utf-8") as f:
|
66 |
return HTMLResponse(content=f.read(), status_code=200)
|
67 |
except Exception as e:
|
68 |
logger.error(f"Erreur de chargement du frontend: {str(e)}")
|
69 |
-
raise HTTPException(
|
70 |
-
status_code=500,
|
71 |
-
detail="Erreur de chargement de l'interface. Contactez l'administrateur."
|
72 |
-
)
|
73 |
|
74 |
async def process_file(file: UploadFile, target_lang: str) -> dict:
|
75 |
-
"""
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
if file.size > MAX_FILE_SIZE:
|
80 |
-
error_msg = f"Fichier trop volumineux (> {MAX_FILE_SIZE/1024/1024:.1f} Mo)"
|
81 |
-
logger.warning(error_msg)
|
82 |
-
raise HTTPException(status_code=413, detail=error_msg)
|
83 |
|
84 |
-
# Vérification du
|
|
|
|
|
|
|
85 |
if not file.filename.lower().endswith(SUPPORTED_FORMATS):
|
86 |
-
|
87 |
-
logger.warning(error_msg)
|
88 |
-
raise HTTPException(status_code=415, detail=error_msg)
|
89 |
|
90 |
-
# Lecture du contenu avec gestion d'erreur
|
91 |
try:
|
92 |
contents = await file.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
except Exception as e:
|
94 |
-
|
95 |
-
|
96 |
-
raise HTTPException(status_code=422, detail=error_msg)
|
97 |
-
|
98 |
-
# Extraction du texte
|
99 |
-
extracted_text = extract_text_from_file(contents, file.filename)
|
100 |
-
|
101 |
-
if extracted_text.startswith("Erreur:"):
|
102 |
-
error_msg = extracted_text.replace("Erreur:", "").strip()
|
103 |
-
logger.error(f"Erreur extraction: {error_msg}")
|
104 |
-
raise HTTPException(status_code=422, detail=error_msg)
|
105 |
-
|
106 |
-
if len(extracted_text) > MAX_TEXT_LENGTH:
|
107 |
-
error_msg = f"Texte trop long ({len(extracted_text)} > {MAX_TEXT_LENGTH} caractères)"
|
108 |
-
logger.warning(error_msg)
|
109 |
-
raise HTTPException(status_code=413, detail=error_msg)
|
110 |
-
|
111 |
-
# Traduction avec suivi
|
112 |
-
logger.info(f"Début traduction - {len(extracted_text)} caractères")
|
113 |
-
translated_text = translate_text(extracted_text, target_lang)
|
114 |
-
|
115 |
-
if translated_text.startswith("Erreur:"):
|
116 |
-
error_detail = translated_text.replace("Erreur:", "").strip()
|
117 |
-
TranslationLogger.log_transaction(file.filename, len(extracted_text), "failed")
|
118 |
-
logger.error(f"Échec traduction: {error_detail}")
|
119 |
-
raise HTTPException(status_code=500, detail=error_detail)
|
120 |
-
|
121 |
-
# Succès
|
122 |
-
TranslationLogger.log_transaction(file.filename, len(extracted_text), "success")
|
123 |
-
logger.info(f"Traduction réussie - {len(extracted_text)}→{len(translated_text)} caractères")
|
124 |
-
|
125 |
-
return {
|
126 |
-
"filename": file.filename,
|
127 |
-
"original_text": extracted_text,
|
128 |
-
"translated_text": translated_text,
|
129 |
-
"char_count": len(extracted_text),
|
130 |
-
"translation_ratio": f"{len(translated_text)/len(extracted_text):.2f}" if extracted_text else "0"
|
131 |
-
}
|
132 |
|
133 |
@app.post("/api/translate")
|
134 |
async def translate_endpoint(
|
135 |
file: UploadFile = File(...),
|
136 |
target_lang: str = "en"
|
137 |
):
|
138 |
-
"""
|
139 |
-
Endpoint amélioré avec gestion complète des erreurs
|
140 |
-
"""
|
141 |
try:
|
142 |
-
# Traitement avec timeout configurable
|
143 |
result = await asyncio.wait_for(
|
144 |
process_file(file, target_lang),
|
145 |
timeout=PROCESSING_TIMEOUT
|
146 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
return {
|
148 |
"status": "success",
|
149 |
-
"
|
150 |
-
"processing_time": f"{PROCESSING_TIMEOUT}s max",
|
151 |
**result
|
152 |
}
|
153 |
|
154 |
except asyncio.TimeoutError:
|
155 |
-
|
156 |
-
logger.error(error_msg)
|
157 |
-
raise HTTPException(status_code=408, detail=error_msg)
|
158 |
except HTTPException:
|
159 |
-
raise
|
160 |
except Exception as e:
|
161 |
-
|
162 |
-
|
163 |
-
raise HTTPException(status_code=500, detail=error_msg)
|
164 |
|
165 |
@app.post("/api/download_translated")
|
166 |
async def download_translated(
|
167 |
file: UploadFile = File(...),
|
168 |
target_lang: str = "en"
|
169 |
):
|
170 |
-
"""
|
171 |
-
Endpoint de téléchargement amélioré avec gestion d'erreur
|
172 |
-
"""
|
173 |
try:
|
174 |
-
# Traitement avec timeout
|
175 |
result = await asyncio.wait_for(
|
176 |
process_file(file, target_lang),
|
177 |
timeout=PROCESSING_TIMEOUT
|
178 |
)
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
"X-Translation-Ratio": result["translation_ratio"]
|
197 |
-
}
|
198 |
-
)
|
199 |
-
except Exception as e:
|
200 |
-
error_msg = f"Erreur de génération DOCX: {str(e)}"
|
201 |
-
logger.error(error_msg)
|
202 |
-
raise HTTPException(status_code=500, detail=error_msg)
|
203 |
-
|
204 |
except asyncio.TimeoutError:
|
205 |
-
|
206 |
-
logger.error(error_msg)
|
207 |
-
raise HTTPException(status_code=408, detail=error_msg)
|
208 |
-
except HTTPException:
|
209 |
-
raise
|
210 |
except Exception as e:
|
211 |
-
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
@app.get("/api/health")
|
216 |
async def health_check():
|
217 |
-
"""Endpoint de vérification de santé
|
218 |
-
from backend.utils import init_translator
|
219 |
-
translator = init_translator('en') # Vérifie le modèle anglais
|
220 |
-
|
221 |
return {
|
222 |
-
"status": "OK" if
|
223 |
-
"
|
224 |
-
"
|
225 |
-
"max_text_length": f"{MAX_TEXT_LENGTH} caractères",
|
226 |
-
"supported_formats": SUPPORTED_FORMATS,
|
227 |
-
"log_file": LOG_FILE,
|
228 |
-
"supported_languages": ["en", "es", "ar", "de"] # Liste des langues supportées
|
229 |
}
|
230 |
|
231 |
@app.exception_handler(404)
|
232 |
async def not_found_handler(request, exc):
|
233 |
-
"""
|
234 |
-
logger.warning(f"404 - {request.url}")
|
235 |
return HTMLResponse(
|
236 |
content="""
|
237 |
<div style='text-align:center; padding:2rem'>
|
238 |
-
<h1>404 -
|
239 |
-
<p>La
|
240 |
<a href="/">Retour à l'accueil</a>
|
241 |
</div>
|
242 |
""",
|
|
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from pathlib import Path
|
5 |
import os
|
6 |
+
from backend.utils import extract_text_from_file, translate_text, default_translator
|
7 |
import io
|
8 |
from docx import Document
|
9 |
import asyncio
|
|
|
13 |
|
14 |
app = FastAPI(
|
15 |
title="DocTranslator Pro",
|
16 |
+
description="Service avancé de traduction multilingue de documents",
|
17 |
+
version="2.1.0"
|
18 |
)
|
19 |
|
20 |
+
# Configuration
|
21 |
+
MAX_FILE_SIZE = 15 * 1024 * 1024 # 15 Mo
|
22 |
+
MAX_TEXT_LENGTH = 75000 # 75k caractères
|
23 |
+
PROCESSING_TIMEOUT = 600 # 10 minutes
|
24 |
+
SUPPORTED_FORMATS = ('.pdf', '.docx', '.xlsx', '.pptx')
|
25 |
LOG_FILE = "translation_logs.log"
|
26 |
+
SUPPORTED_LANGUAGES = ['en', 'es', 'ar', 'de'] # Langues supportées
|
27 |
|
28 |
# Configuration des fichiers statiques
|
29 |
app.mount("/static", StaticFiles(directory="frontend"), name="static")
|
30 |
|
31 |
+
# Configuration du logging
|
32 |
logging.basicConfig(
|
33 |
level=logging.INFO,
|
34 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
41 |
|
42 |
class TranslationLogger:
|
43 |
@staticmethod
|
44 |
+
def log_transaction(filename: str, char_count: int, status: str, target_lang: str):
|
45 |
with open(LOG_FILE, "a") as f:
|
46 |
+
f.write(f"{datetime.now().isoformat()}|{filename}|{char_count}|{status}|{target_lang}\n")
|
47 |
|
48 |
@app.on_event("startup")
|
49 |
async def startup_event():
|
50 |
+
"""Vérification du modèle au démarrage"""
|
51 |
+
if default_translator is None:
|
52 |
+
logger.error("❌ ERREUR: Modèle par défaut (en) non chargé")
|
53 |
+
raise RuntimeError("Échec du chargement du modèle de traduction")
|
|
|
|
|
|
|
54 |
else:
|
55 |
+
logger.info("✅ Modèles prêts")
|
56 |
+
logger.info(f"⚙️ Langues supportées: {SUPPORTED_LANGUAGES}")
|
|
|
57 |
|
58 |
@app.get("/", response_class=HTMLResponse)
|
59 |
async def serve_frontend():
|
60 |
+
"""Endpoint pour servir l'interface frontend"""
|
61 |
try:
|
62 |
with open(os.path.join("frontend", "index.html"), "r", encoding="utf-8") as f:
|
63 |
return HTMLResponse(content=f.read(), status_code=200)
|
64 |
except Exception as e:
|
65 |
logger.error(f"Erreur de chargement du frontend: {str(e)}")
|
66 |
+
raise HTTPException(status_code=500, detail="Erreur de chargement de l'interface")
|
|
|
|
|
|
|
67 |
|
68 |
async def process_file(file: UploadFile, target_lang: str) -> dict:
|
69 |
+
"""Traite un fichier et retourne le résultat de la traduction"""
|
70 |
+
# Validation de la langue
|
71 |
+
if target_lang not in SUPPORTED_LANGUAGES:
|
72 |
+
raise HTTPException(status_code=400, detail=f"Langue non supportée. Options: {SUPPORTED_LANGUAGES}")
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
# Vérification du fichier
|
75 |
+
if file.size > MAX_FILE_SIZE:
|
76 |
+
raise HTTPException(status_code=413, detail=f"Fichier trop volumineux (> {MAX_FILE_SIZE/1024/1024:.1f} Mo)")
|
77 |
+
|
78 |
if not file.filename.lower().endswith(SUPPORTED_FORMATS):
|
79 |
+
raise HTTPException(status_code=415, detail=f"Formats supportés: {SUPPORTED_FORMATS}")
|
|
|
|
|
80 |
|
|
|
81 |
try:
|
82 |
contents = await file.read()
|
83 |
+
extracted_text = extract_text_from_file(contents, file.filename)
|
84 |
+
|
85 |
+
if extracted_text.startswith("Erreur:"):
|
86 |
+
raise HTTPException(status_code=422, detail=extracted_text.replace("Erreur:", "").strip())
|
87 |
+
|
88 |
+
if len(extracted_text) > MAX_TEXT_LENGTH:
|
89 |
+
raise HTTPException(status_code=413, detail=f"Texte trop long (> {MAX_TEXT_LENGTH} caractères)")
|
90 |
+
|
91 |
+
# Traduction
|
92 |
+
translated_text = translate_text(extracted_text, target_lang)
|
93 |
+
|
94 |
+
if translated_text.startswith("Erreur:"):
|
95 |
+
raise HTTPException(status_code=500, detail=translated_text.replace("Erreur:", "").strip())
|
96 |
+
|
97 |
+
return {
|
98 |
+
"filename": file.filename,
|
99 |
+
"original_text": extracted_text,
|
100 |
+
"translated_text": translated_text,
|
101 |
+
"char_count": len(extracted_text),
|
102 |
+
"target_lang": target_lang
|
103 |
+
}
|
104 |
+
|
105 |
+
except HTTPException:
|
106 |
+
raise
|
107 |
except Exception as e:
|
108 |
+
logger.error(f"Erreur traitement fichier: {str(e)}")
|
109 |
+
raise HTTPException(status_code=500, detail=f"Erreur interne: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
@app.post("/api/translate")
|
112 |
async def translate_endpoint(
|
113 |
file: UploadFile = File(...),
|
114 |
target_lang: str = "en"
|
115 |
):
|
116 |
+
"""Endpoint principal pour la traduction"""
|
|
|
|
|
117 |
try:
|
|
|
118 |
result = await asyncio.wait_for(
|
119 |
process_file(file, target_lang),
|
120 |
timeout=PROCESSING_TIMEOUT
|
121 |
)
|
122 |
+
|
123 |
+
TranslationLogger.log_transaction(
|
124 |
+
result["filename"],
|
125 |
+
result["char_count"],
|
126 |
+
"success",
|
127 |
+
target_lang
|
128 |
+
)
|
129 |
+
|
130 |
return {
|
131 |
"status": "success",
|
132 |
+
"target_language": target_lang,
|
|
|
133 |
**result
|
134 |
}
|
135 |
|
136 |
except asyncio.TimeoutError:
|
137 |
+
raise HTTPException(status_code=408, detail="Temps de traitement dépassé")
|
|
|
|
|
138 |
except HTTPException:
|
139 |
+
raise
|
140 |
except Exception as e:
|
141 |
+
logger.error(f"Erreur inattendue: {str(e)}")
|
142 |
+
raise HTTPException(status_code=500, detail="Erreur interne du serveur")
|
|
|
143 |
|
144 |
@app.post("/api/download_translated")
|
145 |
async def download_translated(
|
146 |
file: UploadFile = File(...),
|
147 |
target_lang: str = "en"
|
148 |
):
|
149 |
+
"""Endpoint pour télécharger la traduction en DOCX"""
|
|
|
|
|
150 |
try:
|
|
|
151 |
result = await asyncio.wait_for(
|
152 |
process_file(file, target_lang),
|
153 |
timeout=PROCESSING_TIMEOUT
|
154 |
)
|
155 |
|
156 |
+
doc = Document()
|
157 |
+
doc.add_paragraph(result["translated_text"])
|
158 |
+
|
159 |
+
file_stream = io.BytesIO()
|
160 |
+
doc.save(file_stream)
|
161 |
+
file_stream.seek(0)
|
162 |
+
|
163 |
+
return Response(
|
164 |
+
content=file_stream.getvalue(),
|
165 |
+
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
166 |
+
headers={
|
167 |
+
"Content-Disposition": f"attachment; filename=TRAD_{target_lang}_{result['filename']}",
|
168 |
+
"X-Target-Language": target_lang
|
169 |
+
}
|
170 |
+
)
|
171 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
except asyncio.TimeoutError:
|
173 |
+
raise HTTPException(status_code=408, detail="Temps de génération dépassé")
|
|
|
|
|
|
|
|
|
174 |
except Exception as e:
|
175 |
+
logger.error(f"Erreur génération DOCX: {str(e)}")
|
176 |
+
raise HTTPException(status_code=500, detail="Erreur de génération du document")
|
177 |
+
|
178 |
+
@app.get("/api/languages")
|
179 |
+
async def get_supported_languages():
|
180 |
+
"""Retourne les langues supportées"""
|
181 |
+
return {
|
182 |
+
"supported_languages": SUPPORTED_LANGUAGES,
|
183 |
+
"default_language": "en"
|
184 |
+
}
|
185 |
|
186 |
@app.get("/api/health")
|
187 |
async def health_check():
|
188 |
+
"""Endpoint de vérification de santé"""
|
|
|
|
|
|
|
189 |
return {
|
190 |
+
"status": "OK" if default_translator else "ERROR",
|
191 |
+
"supported_languages": SUPPORTED_LANGUAGES,
|
192 |
+
"model_loaded": bool(default_translator)
|
|
|
|
|
|
|
|
|
193 |
}
|
194 |
|
195 |
@app.exception_handler(404)
|
196 |
async def not_found_handler(request, exc):
|
197 |
+
"""Gestion des erreurs 404"""
|
|
|
198 |
return HTMLResponse(
|
199 |
content="""
|
200 |
<div style='text-align:center; padding:2rem'>
|
201 |
+
<h1>404 - Non trouvé</h1>
|
202 |
+
<p>La ressource demandée n'existe pas</p>
|
203 |
<a href="/">Retour à l'accueil</a>
|
204 |
</div>
|
205 |
""",
|