Spaces:
Runtime error
Runtime error
File size: 1,728 Bytes
dea518b 8ba043d dea518b 8ba043d dea518b 8ba043d 0fa8cc2 8ba043d dea518b 8ba043d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
import shutil
import easyocr
import gradio as gr
import py3langid as langid
from PIL import Image
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
languages = ['en', 'fr', 'es']
threshold = 0.2
langid.set_languages(languages)
def read_text_from_image(img):
reader = easyocr.Reader(languages)
result = reader.readtext(img)
texts = []
for (bbox, text, prob) in result:
# print(f"Text: {text}, Probability: {prob}")
# filter by prob
if prob > threshold:
texts.append(text)
if len(texts) == 0:
raise ValueError("No text detected")
concatenated_text = " ".join(texts).lower()
return concatenated_text
def detect_language(text):
lang, prob = langid.classify(text)
# print(f"The text {text} is classify as {lang} with probability {prob}")
return lang
def translate_to_id(text, lang):
query = f"translate from {lang} to indonesia: {text}"
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B", src_lang=lang)
model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
input_ids = tokenizer(query, return_tensors="pt").input_ids
outputs = model.generate(input_ids, forced_bos_token_id=tokenizer.get_lang_id("id"))
translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return translated
def predict(img):
try:
text = read_text_from_image(img)
lang = detect_language(text)
translated = translate_to_id(text, lang)
# print(f"Text: {text}. Language: {lang}. Translated: {translated}")
return translated
except ValueError as e:
print(e)
return e
app = gr.Interface(
fn=predict,
inputs=gr.Image(label="Input Image"),
outputs='text',
)
app.launch() |