Spaces:
Runtime error
Runtime error
tonic
commited on
Commit
·
bc83a37
1
Parent(s):
0b2fb36
adding interface logic and audio returns
Browse files
app.py
CHANGED
@@ -183,7 +183,6 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
183 |
final_text = text
|
184 |
if image is not None:
|
185 |
ocr_prediction = ocr_processor.process_image(image)
|
186 |
-
# gettig text from ocr object
|
187 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
188 |
final_text += " "
|
189 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
@@ -191,13 +190,11 @@ def process_input(image=None, file=None, audio=None, text="", translateto = "Eng
|
|
191 |
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
192 |
pil_image = Image.open(file)
|
193 |
ocr_prediction = ocr_processor.process_image(pil_image)
|
194 |
-
# gettig text from ocr object
|
195 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
196 |
final_text += " "
|
197 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
198 |
elif file.name.lower().endswith('.pdf'):
|
199 |
ocr_prediction = ocr_processor.process_pdf(file.name)
|
200 |
-
# gettig text from ocr object
|
201 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
202 |
final_text += " "
|
203 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
@@ -290,12 +287,10 @@ def main():
|
|
290 |
translatefrom=input_language, translateto=target_language
|
291 |
)
|
292 |
|
293 |
-
# Prepare outputs for Gradio
|
294 |
processed_text_output = final_text
|
295 |
audio_output_native_phrases = [native for _, native in audio_outputs]
|
296 |
audio_output_target_phrases = [target for target, _ in audio_outputs]
|
297 |
|
298 |
-
# Assuming there are exactly 3 top phrases for simplicity
|
299 |
longest_phrases_outputs = top_phrases[:3]
|
300 |
translated_phrases_outputs = translations[:3]
|
301 |
audio_outputs_native = audio_output_native_phrases[:3]
|
|
|
183 |
final_text = text
|
184 |
if image is not None:
|
185 |
ocr_prediction = ocr_processor.process_image(image)
|
|
|
186 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
187 |
final_text += " "
|
188 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
|
190 |
if file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
191 |
pil_image = Image.open(file)
|
192 |
ocr_prediction = ocr_processor.process_image(pil_image)
|
|
|
193 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
194 |
final_text += " "
|
195 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
196 |
elif file.name.lower().endswith('.pdf'):
|
197 |
ocr_prediction = ocr_processor.process_pdf(file.name)
|
|
|
198 |
for idx in range(len((list(ocr_prediction)[0][1]))):
|
199 |
final_text += " "
|
200 |
final_text += list((list(ocr_prediction)[0][1])[idx])[1][1]
|
|
|
287 |
translatefrom=input_language, translateto=target_language
|
288 |
)
|
289 |
|
|
|
290 |
processed_text_output = final_text
|
291 |
audio_output_native_phrases = [native for _, native in audio_outputs]
|
292 |
audio_output_target_phrases = [target for target, _ in audio_outputs]
|
293 |
|
|
|
294 |
longest_phrases_outputs = top_phrases[:3]
|
295 |
translated_phrases_outputs = translations[:3]
|
296 |
audio_outputs_native = audio_output_native_phrases[:3]
|