Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -45,8 +45,8 @@ os.makedirs(CACHE_DIR, exist_ok=True)
|
|
| 45 |
# Maximum dimensions for images
|
| 46 |
MAX_IMAGE_DIMENSION = 500
|
| 47 |
|
| 48 |
-
# Timeout for OCR operation
|
| 49 |
-
OCR_TIMEOUT = 20
|
| 50 |
|
| 51 |
# Crawling settings
|
| 52 |
CRAWL_DEPTH = 1
|
|
@@ -73,7 +73,7 @@ def preprocess_image(image):
|
|
| 73 |
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 74 |
logger.info(f"Resized image to: {new_width}x{new_height}")
|
| 75 |
else:
|
| 76 |
-
target_dpi = 50
|
| 77 |
image = image.resize((int(width * target_dpi / 72), int(height * target_dpi / 72)), Image.Resampling.LANCZOS)
|
| 78 |
image = image.convert("L")
|
| 79 |
return image
|
|
@@ -271,34 +271,43 @@ except Exception as e:
|
|
| 271 |
async def home():
|
| 272 |
return render_template("index.html")
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
@app.route("/web_translate", methods=["POST"])
|
| 275 |
async def web_translate():
|
|
|
|
| 276 |
try:
|
| 277 |
-
|
| 278 |
-
start_time = time.time()
|
| 279 |
-
text = request.form.get("text")
|
| 280 |
-
file = request.files.get("file")
|
| 281 |
-
|
| 282 |
-
if not text and not file:
|
| 283 |
-
return render_template("index.html", error="Please enter text or upload a file.")
|
| 284 |
-
|
| 285 |
-
if file and allowed_file(file.filename):
|
| 286 |
-
extracted_text = extract_text(file)
|
| 287 |
-
if extracted_text.startswith("Error") or extracted_text.startswith("Please"):
|
| 288 |
-
return render_template("index.html", error=extracted_text, text=text)
|
| 289 |
-
text_to_translate = extracted_text
|
| 290 |
-
else:
|
| 291 |
-
text_to_translate = text
|
| 292 |
-
|
| 293 |
-
if not text_to_translate:
|
| 294 |
-
return render_template("index.html", error="No valid text to translate.", text=text)
|
| 295 |
-
|
| 296 |
-
translated = await translate_text(text_to_translate, model, tokenizer)
|
| 297 |
-
if translated.startswith("Error"):
|
| 298 |
-
return render_template("index.html", error=translated, text=text, extracted_text=text_to_translate)
|
| 299 |
-
|
| 300 |
-
logger.info(f"Total request took {time.time() - start_time:.2f} seconds")
|
| 301 |
-
return render_template("index.html", extracted_text=text_to_translate, translated_text=translated, text=text)
|
| 302 |
except asyncio.TimeoutError:
|
| 303 |
logger.error("Request timed out after 60 seconds")
|
| 304 |
return render_template("index.html", error="Request timed out. Please try again with a smaller input.", text=text)
|
|
@@ -306,6 +315,42 @@ async def web_translate():
|
|
| 306 |
logger.error(f"Error in web_translate: {str(e)}")
|
| 307 |
return render_template("index.html", error=f"Error processing request: {str(e)}", text=text)
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
@app.route("/translate", methods=["POST"])
|
| 310 |
async def translate():
|
| 311 |
try:
|
|
@@ -322,38 +367,5 @@ async def translate():
|
|
| 322 |
logger.error(f"Error in translate: {str(e)}")
|
| 323 |
return jsonify({"error": str(e)}), 500
|
| 324 |
|
| 325 |
-
@app.route("/crawl_and_translate", methods=["POST"])
|
| 326 |
-
async def crawl_and_translate():
|
| 327 |
-
try:
|
| 328 |
-
async with asyncio.timeout(60): # Timeout for the entire route
|
| 329 |
-
start_time = time.time()
|
| 330 |
-
url = request.form.get("url")
|
| 331 |
-
|
| 332 |
-
if not url:
|
| 333 |
-
return render_template("index.html", error="Please enter a website URL.")
|
| 334 |
-
|
| 335 |
-
parsed_url = urlparse(url)
|
| 336 |
-
if not parsed_url.scheme or not parsed_url.netloc:
|
| 337 |
-
return render_template("index.html", error="Invalid URL format.", url=url)
|
| 338 |
-
|
| 339 |
-
logger.info(f"Starting crawl for URL: {url}")
|
| 340 |
-
extracted_text = crawl_website(url)
|
| 341 |
-
|
| 342 |
-
if not extracted_text or extracted_text.startswith("No Bangla text"):
|
| 343 |
-
return render_template("index.html", error=extracted_text or "No text found to translate.", url=url)
|
| 344 |
-
|
| 345 |
-
translated = await translate_text(extracted_text, model, tokenizer)
|
| 346 |
-
if translated.startswith("Error"):
|
| 347 |
-
return render_template("index.html", error=translated, url=url, extracted_text=extracted_text)
|
| 348 |
-
|
| 349 |
-
logger.info(f"Total crawl and translate request took {time.time() - start_time:.2f} seconds")
|
| 350 |
-
return render_template("index.html", extracted_text=extracted_text, translated_text=translated, url=url)
|
| 351 |
-
except asyncio.TimeoutError:
|
| 352 |
-
logger.error("Crawl and translate request timed out after 60 seconds")
|
| 353 |
-
return render_template("index.html", error="Request timed out. Please try again with a smaller website.", url=url)
|
| 354 |
-
except Exception as e:
|
| 355 |
-
logger.error(f"Error in crawl_and_translate: {str(e)}")
|
| 356 |
-
return render_template("index.html", error=f"Error processing request: {str(e)}", url=url)
|
| 357 |
-
|
| 358 |
if __name__ == "__main__":
|
| 359 |
app.run(host="0.0.0.0", port=7860, debug=False)
|
|
|
|
| 45 |
# Maximum dimensions for images
|
| 46 |
MAX_IMAGE_DIMENSION = 500
|
| 47 |
|
| 48 |
+
# Timeout for OCR operation
|
| 49 |
+
OCR_TIMEOUT = 20
|
| 50 |
|
| 51 |
# Crawling settings
|
| 52 |
CRAWL_DEPTH = 1
|
|
|
|
| 73 |
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
| 74 |
logger.info(f"Resized image to: {new_width}x{new_height}")
|
| 75 |
else:
|
| 76 |
+
target_dpi = 50
|
| 77 |
image = image.resize((int(width * target_dpi / 72), int(height * target_dpi / 72)), Image.Resampling.LANCZOS)
|
| 78 |
image = image.convert("L")
|
| 79 |
return image
|
|
|
|
| 271 |
async def home():
|
| 272 |
return render_template("index.html")
|
| 273 |
|
| 274 |
+
async def process_web_translate():
|
| 275 |
+
start_time = time.time()
|
| 276 |
+
text = request.form.get("text")
|
| 277 |
+
file = request.files.get("file")
|
| 278 |
+
|
| 279 |
+
logger.info(f"Received text: {text}, file: {file.filename if file else None}")
|
| 280 |
+
|
| 281 |
+
if not text and not file:
|
| 282 |
+
return render_template("index.html", error="Please enter text or upload a file.")
|
| 283 |
+
|
| 284 |
+
if file and allowed_file(file.filename):
|
| 285 |
+
logger.info("Starting OCR extraction for uploaded file")
|
| 286 |
+
extracted_text = extract_text(file)
|
| 287 |
+
logger.info(f"OCR result: {extracted_text}")
|
| 288 |
+
if extracted_text.startswith("Error") or extracted_text.startswith("Please"):
|
| 289 |
+
return render_template("index.html", error=extracted_text, text=text)
|
| 290 |
+
text_to_translate = extracted_text
|
| 291 |
+
else:
|
| 292 |
+
text_to_translate = text
|
| 293 |
+
|
| 294 |
+
if not text_to_translate:
|
| 295 |
+
return render_template("index.html", error="No valid text to translate.", text=text)
|
| 296 |
+
|
| 297 |
+
logger.info("Starting translation")
|
| 298 |
+
translated = await translate_text(text_to_translate, model, tokenizer)
|
| 299 |
+
logger.info(f"Translation result: {translated}")
|
| 300 |
+
if translated.startswith("Error"):
|
| 301 |
+
return render_template("index.html", error=translated, text=text, extracted_text=text_to_translate)
|
| 302 |
+
|
| 303 |
+
logger.info(f"Total request took {time.time() - start_time:.2f} seconds")
|
| 304 |
+
return render_template("index.html", extracted_text=text_to_translate, translated_text=translated, text=text)
|
| 305 |
+
|
| 306 |
@app.route("/web_translate", methods=["POST"])
|
| 307 |
async def web_translate():
|
| 308 |
+
text = None # Initialize text to avoid UnboundLocalError
|
| 309 |
try:
|
| 310 |
+
return await asyncio.wait_for(process_web_translate(), timeout=60) # Timeout after 60 seconds
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
except asyncio.TimeoutError:
|
| 312 |
logger.error("Request timed out after 60 seconds")
|
| 313 |
return render_template("index.html", error="Request timed out. Please try again with a smaller input.", text=text)
|
|
|
|
| 315 |
logger.error(f"Error in web_translate: {str(e)}")
|
| 316 |
return render_template("index.html", error=f"Error processing request: {str(e)}", text=text)
|
| 317 |
|
| 318 |
+
async def process_crawl_and_translate():
|
| 319 |
+
start_time = time.time()
|
| 320 |
+
url = request.form.get("url")
|
| 321 |
+
|
| 322 |
+
if not url:
|
| 323 |
+
return render_template("index.html", error="Please enter a website URL.")
|
| 324 |
+
|
| 325 |
+
parsed_url = urlparse(url)
|
| 326 |
+
if not parsed_url.scheme or not parsed_url.netloc:
|
| 327 |
+
return render_template("index.html", error="Invalid URL format.", url=url)
|
| 328 |
+
|
| 329 |
+
logger.info(f"Starting crawl for URL: {url}")
|
| 330 |
+
extracted_text = crawl_website(url)
|
| 331 |
+
|
| 332 |
+
if not extracted_text or extracted_text.startswith("No Bangla text"):
|
| 333 |
+
return render_template("index.html", error=extracted_text or "No text found to translate.", url=url)
|
| 334 |
+
|
| 335 |
+
translated = await translate_text(extracted_text, model, tokenizer)
|
| 336 |
+
if translated.startswith("Error"):
|
| 337 |
+
return render_template("index.html", error=translated, url=url, extracted_text=extracted_text)
|
| 338 |
+
|
| 339 |
+
logger.info(f"Total crawl and translate request took {time.time() - start_time:.2f} seconds")
|
| 340 |
+
return render_template("index.html", extracted_text=extracted_text, translated_text=translated, url=url)
|
| 341 |
+
|
| 342 |
+
@app.route("/crawl_and_translate", methods=["POST"])
|
| 343 |
+
async def crawl_and_translate():
|
| 344 |
+
url = None # Initialize url to avoid UnboundLocalError
|
| 345 |
+
try:
|
| 346 |
+
return await asyncio.wait_for(process_crawl_and_translate(), timeout=60) # Timeout after 60 seconds
|
| 347 |
+
except asyncio.TimeoutError:
|
| 348 |
+
logger.error("Crawl and translate request timed out after 60 seconds")
|
| 349 |
+
return render_template("index.html", error="Request timed out. Please try again with a smaller website.", url=url)
|
| 350 |
+
except Exception as e:
|
| 351 |
+
logger.error(f"Error in crawl_and_translate: {str(e)}")
|
| 352 |
+
return render_template("index.html", error=f"Error processing request: {str(e)}", url=url)
|
| 353 |
+
|
| 354 |
@app.route("/translate", methods=["POST"])
|
| 355 |
async def translate():
|
| 356 |
try:
|
|
|
|
| 367 |
logger.error(f"Error in translate: {str(e)}")
|
| 368 |
return jsonify({"error": str(e)}), 500
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
if __name__ == "__main__":
|
| 371 |
app.run(host="0.0.0.0", port=7860, debug=False)
|