tonic commited on
Commit
bc83a37
·
1 Parent(s): 0b2fb36

adding interface logic and audio returns

Browse files
Files changed (1) hide show
  1. app.py +0 -5
app.py CHANGED
@@ -183,7 +183,6 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
183
  final_text = text
184
  if image is not None:
185
  ocr_prediction = ocr_processor.process_image(image)
186
- # gettig text from ocr object
187
  for idx in range(len((list(ocr_prediction)[0][1]))):
188
  final_text += " "
189
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -191,13 +190,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
191
  if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
192
  pil_image = Image.open(file)
193
  ocr_prediction = ocr_processor.process_image(pil_image)
194
- # gettig text from ocr object
195
  for idx in range(len((list(ocr_prediction)[0][1]))):
196
  final_text += " "
197
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
198
  elif file.name.lower().endswith('.pdf'):
199
  ocr_prediction = ocr_processor.process_pdf(file.name)
200
- # gettig text from ocr object
201
  for idx in range(len((list(ocr_prediction)[0][1]))):
202
  final_text += " "
203
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
@@ -290,12 +287,10 @@ def main():
290
  translatefrom=input_language, translateto=target_language
291
  )
292
 
293
- # Prepare outputs for Gradio
294
  processed_text_output = final_text
295
  audio_output_native_phrases = [native for _, native in audio_outputs]
296
  audio_output_target_phrases = [target for target, _ in audio_outputs]
297
 
298
- # Assuming there are exactly 3 top phrases for simplicity
299
  longest_phrases_outputs = top_phrases[:3]
300
  translated_phrases_outputs = translations[:3]
301
  audio_outputs_native = audio_output_native_phrases[:3]
 
183
  final_text = text
184
  if image is not None:
185
  ocr_prediction = ocr_processor.process_image(image)
 
186
  for idx in range(len((list(ocr_prediction)[0][1]))):
187
  final_text += " "
188
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
 
190
  if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
191
  pil_image = Image.open(file)
192
  ocr_prediction = ocr_processor.process_image(pil_image)
 
193
  for idx in range(len((list(ocr_prediction)[0][1]))):
194
  final_text += " "
195
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
196
  elif file.name.lower().endswith('.pdf'):
197
  ocr_prediction = ocr_processor.process_pdf(file.name)
 
198
  for idx in range(len((list(ocr_prediction)[0][1]))):
199
  final_text += " "
200
  final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
 
287
  translatefrom=input_language, translateto=target_language
288
  )
289
 
 
290
  processed_text_output = final_text
291
  audio_output_native_phrases = [native for _, native in audio_outputs]
292
  audio_output_target_phrases = [target for target, _ in audio_outputs]
293
 
 
294
  longest_phrases_outputs = top_phrases[:3]
295
  translated_phrases_outputs = translations[:3]
296
  audio_outputs_native = audio_output_native_phrases[:3]