Chaitanya895 commited on
Commit
625a396
·
verified ·
1 Parent(s): 7ec03e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -61
app.py CHANGED
@@ -45,8 +45,8 @@ os.makedirs(CACHE_DIR, exist_ok=True)
45
  # Maximum dimensions for images
46
  MAX_IMAGE_DIMENSION = 500
47
 
48
- # Timeout for OCR operation (increased)
49
- OCR_TIMEOUT = 20 # Increased from 10 to 20 seconds
50
 
51
  # Crawling settings
52
  CRAWL_DEPTH = 1
@@ -73,7 +73,7 @@ def preprocess_image(image):
73
  image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
74
  logger.info(f"Resized image to: {new_width}x{new_height}")
75
  else:
76
- target_dpi = 50 # Reduced from 72 for faster OCR
77
  image = image.resize((int(width * target_dpi / 72), int(height * target_dpi / 72)), Image.Resampling.LANCZOS)
78
  image = image.convert("L")
79
  return image
@@ -271,34 +271,43 @@ except Exception as e:
271
  async def home():
272
  return render_template("index.html")
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  @app.route("/web_translate", methods=["POST"])
275
  async def web_translate():
 
276
  try:
277
- async with asyncio.timeout(60): # Timeout for the entire route
278
- start_time = time.time()
279
- text = request.form.get("text")
280
- file = request.files.get("file")
281
-
282
- if not text and not file:
283
- return render_template("index.html", error="Please enter text or upload a file.")
284
-
285
- if file and allowed_file(file.filename):
286
- extracted_text = extract_text(file)
287
- if extracted_text.startswith("Error") or extracted_text.startswith("Please"):
288
- return render_template("index.html", error=extracted_text, text=text)
289
- text_to_translate = extracted_text
290
- else:
291
- text_to_translate = text
292
-
293
- if not text_to_translate:
294
- return render_template("index.html", error="No valid text to translate.", text=text)
295
-
296
- translated = await translate_text(text_to_translate, model, tokenizer)
297
- if translated.startswith("Error"):
298
- return render_template("index.html", error=translated, text=text, extracted_text=text_to_translate)
299
-
300
- logger.info(f"Total request took {time.time() - start_time:.2f} seconds")
301
- return render_template("index.html", extracted_text=text_to_translate, translated_text=translated, text=text)
302
  except asyncio.TimeoutError:
303
  logger.error("Request timed out after 60 seconds")
304
  return render_template("index.html", error="Request timed out. Please try again with a smaller input.", text=text)
@@ -306,6 +315,42 @@ async def web_translate():
306
  logger.error(f"Error in web_translate: {str(e)}")
307
  return render_template("index.html", error=f"Error processing request: {str(e)}", text=text)
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  @app.route("/translate", methods=["POST"])
310
  async def translate():
311
  try:
@@ -322,38 +367,5 @@ async def translate():
322
  logger.error(f"Error in translate: {str(e)}")
323
  return jsonify({"error": str(e)}), 500
324
 
325
- @app.route("/crawl_and_translate", methods=["POST"])
326
- async def crawl_and_translate():
327
- try:
328
- async with asyncio.timeout(60): # Timeout for the entire route
329
- start_time = time.time()
330
- url = request.form.get("url")
331
-
332
- if not url:
333
- return render_template("index.html", error="Please enter a website URL.")
334
-
335
- parsed_url = urlparse(url)
336
- if not parsed_url.scheme or not parsed_url.netloc:
337
- return render_template("index.html", error="Invalid URL format.", url=url)
338
-
339
- logger.info(f"Starting crawl for URL: {url}")
340
- extracted_text = crawl_website(url)
341
-
342
- if not extracted_text or extracted_text.startswith("No Bangla text"):
343
- return render_template("index.html", error=extracted_text or "No text found to translate.", url=url)
344
-
345
- translated = await translate_text(extracted_text, model, tokenizer)
346
- if translated.startswith("Error"):
347
- return render_template("index.html", error=translated, url=url, extracted_text=extracted_text)
348
-
349
- logger.info(f"Total crawl and translate request took {time.time() - start_time:.2f} seconds")
350
- return render_template("index.html", extracted_text=extracted_text, translated_text=translated, url=url)
351
- except asyncio.TimeoutError:
352
- logger.error("Crawl and translate request timed out after 60 seconds")
353
- return render_template("index.html", error="Request timed out. Please try again with a smaller website.", url=url)
354
- except Exception as e:
355
- logger.error(f"Error in crawl_and_translate: {str(e)}")
356
- return render_template("index.html", error=f"Error processing request: {str(e)}", url=url)
357
-
358
  if __name__ == "__main__":
359
  app.run(host="0.0.0.0", port=7860, debug=False)
 
45
  # Maximum dimensions for images
46
  MAX_IMAGE_DIMENSION = 500
47
 
48
+ # Timeout for OCR operation
49
+ OCR_TIMEOUT = 20
50
 
51
  # Crawling settings
52
  CRAWL_DEPTH = 1
 
73
  image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
74
  logger.info(f"Resized image to: {new_width}x{new_height}")
75
  else:
76
+ target_dpi = 50
77
  image = image.resize((int(width * target_dpi / 72), int(height * target_dpi / 72)), Image.Resampling.LANCZOS)
78
  image = image.convert("L")
79
  return image
 
271
  async def home():
272
  return render_template("index.html")
273
 
274
+ async def process_web_translate():
275
+ start_time = time.time()
276
+ text = request.form.get("text")
277
+ file = request.files.get("file")
278
+
279
+ logger.info(f"Received text: {text}, file: {file.filename if file else None}")
280
+
281
+ if not text and not file:
282
+ return render_template("index.html", error="Please enter text or upload a file.")
283
+
284
+ if file and allowed_file(file.filename):
285
+ logger.info("Starting OCR extraction for uploaded file")
286
+ extracted_text = extract_text(file)
287
+ logger.info(f"OCR result: {extracted_text}")
288
+ if extracted_text.startswith("Error") or extracted_text.startswith("Please"):
289
+ return render_template("index.html", error=extracted_text, text=text)
290
+ text_to_translate = extracted_text
291
+ else:
292
+ text_to_translate = text
293
+
294
+ if not text_to_translate:
295
+ return render_template("index.html", error="No valid text to translate.", text=text)
296
+
297
+ logger.info("Starting translation")
298
+ translated = await translate_text(text_to_translate, model, tokenizer)
299
+ logger.info(f"Translation result: {translated}")
300
+ if translated.startswith("Error"):
301
+ return render_template("index.html", error=translated, text=text, extracted_text=text_to_translate)
302
+
303
+ logger.info(f"Total request took {time.time() - start_time:.2f} seconds")
304
+ return render_template("index.html", extracted_text=text_to_translate, translated_text=translated, text=text)
305
+
306
  @app.route("/web_translate", methods=["POST"])
307
  async def web_translate():
308
+ text = None # Initialize text to avoid UnboundLocalError
309
  try:
310
+ return await asyncio.wait_for(process_web_translate(), timeout=60) # Timeout after 60 seconds
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  except asyncio.TimeoutError:
312
  logger.error("Request timed out after 60 seconds")
313
  return render_template("index.html", error="Request timed out. Please try again with a smaller input.", text=text)
 
315
  logger.error(f"Error in web_translate: {str(e)}")
316
  return render_template("index.html", error=f"Error processing request: {str(e)}", text=text)
317
 
318
+ async def process_crawl_and_translate():
319
+ start_time = time.time()
320
+ url = request.form.get("url")
321
+
322
+ if not url:
323
+ return render_template("index.html", error="Please enter a website URL.")
324
+
325
+ parsed_url = urlparse(url)
326
+ if not parsed_url.scheme or not parsed_url.netloc:
327
+ return render_template("index.html", error="Invalid URL format.", url=url)
328
+
329
+ logger.info(f"Starting crawl for URL: {url}")
330
+ extracted_text = crawl_website(url)
331
+
332
+ if not extracted_text or extracted_text.startswith("No Bangla text"):
333
+ return render_template("index.html", error=extracted_text or "No text found to translate.", url=url)
334
+
335
+ translated = await translate_text(extracted_text, model, tokenizer)
336
+ if translated.startswith("Error"):
337
+ return render_template("index.html", error=translated, url=url, extracted_text=extracted_text)
338
+
339
+ logger.info(f"Total crawl and translate request took {time.time() - start_time:.2f} seconds")
340
+ return render_template("index.html", extracted_text=extracted_text, translated_text=translated, url=url)
341
+
342
+ @app.route("/crawl_and_translate", methods=["POST"])
343
+ async def crawl_and_translate():
344
+ url = None # Initialize url to avoid UnboundLocalError
345
+ try:
346
+ return await asyncio.wait_for(process_crawl_and_translate(), timeout=60) # Timeout after 60 seconds
347
+ except asyncio.TimeoutError:
348
+ logger.error("Crawl and translate request timed out after 60 seconds")
349
+ return render_template("index.html", error="Request timed out. Please try again with a smaller website.", url=url)
350
+ except Exception as e:
351
+ logger.error(f"Error in crawl_and_translate: {str(e)}")
352
+ return render_template("index.html", error=f"Error processing request: {str(e)}", url=url)
353
+
354
  @app.route("/translate", methods=["POST"])
355
  async def translate():
356
  try:
 
367
  logger.error(f"Error in translate: {str(e)}")
368
  return jsonify({"error": str(e)}), 500
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  if __name__ == "__main__":
371
  app.run(host="0.0.0.0", port=7860, debug=False)