File size: 3,459 Bytes
7ebefe0
 
 
 
 
b6d591d
018f958
b6d591d
37f64c3
7ebefe0
37f64c3
7ebefe0
 
37f64c3
7ebefe0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
018f958
 
b6d591d
7ebefe0
 
 
 
018f958
7ebefe0
018f958
 
b6d591d
018f958
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
from matplotlib.pyplot import switch_backend, text
os.system('pip install paddlepaddle')
os.system('pip install paddleocr')
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import gradio as gr
import torch

from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer

model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_1.2B")
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_1.2B")

title = 'OCR Translator'
description = 'This is a gradio demo for OCR and translating using the PaddleOCR and m2m100_418M model. It takes in an input of an image, the language to be read using OCR, and the language the result will be translated to. The PaddleOCR implementation is limited to English, Chinese, Japanese, German, and French, while the results can be translated to 100 languages.'
article = '<p>This is only a demo. The official repository can be found <a href="https://github.com/PaddlePaddle/PaddleOCR">here</a></p>'
examples = [['japan_to_en.png', 'Japanese', 'English'], ['en_to_fr.png', 'English', 'French'], ['german_to_en.jpg', 'German', 'English']]

def inference(img, src_lang, tgt_lang):
    if src_lang == 'Chinese':
        img_src_lang = 'ch'
        tokenizer.src_lang = 'zh'
    elif src_lang == 'English':
        img_src_lang = 'en'
        tokenizer.src_lang = 'en'
    elif src_lang == 'French':
        img_src_lang = 'fr'
        tokenizer.src_lang = 'fr'
    elif src_lang == 'Japanese':
        img_src_lang = 'japan'
        tokenizer.src_lang = "ja"
    elif src_lang == 'German':
        img_src_lang = 'german'
        tokenizer.src_lang = "de"

    if tgt_lang == 'Chinese':
        tgt_lang = 'zh'
    elif tgt_lang == 'English':
        tgt_lang = 'en'
    elif tgt_lang == 'French':
        tgt_lang = 'fr'
    elif src_lang == 'Japanese':
        tgt_lang = 'ja'
    elif src_lang == 'German':
        tgt_lang = 'de'

    # Use OCR Model
    ocr = PaddleOCR(use_angle_cls = True, lang = img_src_lang, use_gpu = False)
    img_path = img.name
    result = ocr.ocr(img_path, cls = True)
    image = Image.open(img_path).convert('RGB')
    boxes = [line[0] for line in result]
    txts = [line[1][0] for line in result]
    im_show = draw_ocr(image, boxes, font_path = 'chinese.simfang.ttf')
    im_show = Image.fromarray(im_show)
    im_show.save('result.jpg')

    # Parse OCR Text
    input_text = ''
    for txt in txts:
        input_text = input_text + " " + txt

    # Translate to Target Language    
    encoded_src = tokenizer(input_text, return_tensors="pt")
    generated_tokens = model.generate(**encoded_src, forced_bos_token_id=tokenizer.get_lang_id(tgt_lang), use_cache=True)
    result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    return ['result.jpg', input_text, result]

gr.Interface(
    inference,
    [gr.inputs.Image(type='file', label='Input'),
    gr.inputs.Dropdown(choices=['Chinese', 'English', 'French', 'German', 'Japanese'], type="value", default='en', label='Source Language'), 
    gr.inputs.Dropdown(choices=['Chinese', 'English', 'French', 'German', 'Japanese'], type="value", default='en', label='Translate to')],
    [gr.outputs.Image(type='file', label='Output'), gr.outputs.Textbox(label = 'Output Text'), gr.outputs.Textbox(label = 'Translated Text')],
    title=title,
    examples=examples,
    description=description,
    article=article,
    enable_queue=True
    ).launch(debug=True)