File size: 4,159 Bytes
59db409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac6d958
4d4c9b0
ac6d958
4d4c9b0
 
59db409
 
 
ac6d958
 
59db409
 
 
 
 
 
 
 
 
 
ac6d958
 
 
 
59db409
 
 
 
ac6d958
 
 
 
 
59db409
 
 
 
 
 
 
 
 
ac6d958
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# import os
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# en2ko = "KoJLabs/nllb-finetuned-en2ko"
# ko2en = "KoJLabs/nllb-finetuned-ko2en"
# style = "KoJLabs/bart-speech-style-converter"

# en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
# ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
# style_model = AutoModelForSeq2SeqLM.from_pretrained(style)

# en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
# ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
# style_tokenizer = AutoTokenizer.from_pretrained(style)

# def translate(source, target, text):
#     formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
#     src = formats[source]
#     tgt = formats[target]

#     if src == "eng_Latn":
#         translator = pipeline(
#             'translation',
#             model=en2ko_model,
#             tokenizer=ko2en_tokenizer,
#             src_lang=src, 
#             tgt_lang=tgt, 
#         )

#     if src == "kor_Hang":
#         translator = pipeline(
#             'translation',
#             model=ko2en_model,
#             tokenizer=en2ko_tokenizer,
#             src_lang=src, 
#             tgt_lang=tgt
#         )

#     output = translator(text)
#     translated_text = output[0]['translation_text']

#     return translated_text

# title = 'KoTAN Translator & Speech-style converter'
# lang = ['English','Korean']

# translator_app = gr.Interface(
#     fn=translate,
#     inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
#     outputs=[gr.outputs.Textbox(label='Translated Text')],
#     title=title,
#     description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
#     article='Jisu, Kim. Juhwan, Lee',
#     enable_queue=True,
# )

# translator_app.launch()

import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline


checkpoint = 'facebook/nllb-200-distilled-1.3B'
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

def translate(source, target, text):
    formats = {'English':'eng_Latn', 'Asante': 'aka_Latn', 'Akuapem': 'twi_Latn', 'Ewe':'ewe_Latn', 'Hausa': 'hau_Latn'}
    source_fmt = formats[source]
    target_fmt = formats[target]
    translator = pipeline('translation', 
                      model=model, 
                      tokenizer=tokenizer, 
                      src_lang=source_fmt, 
                      tgt_lang=target_fmt, 
                      max_length = 400)
    
    output = translator(text)
    translated_text = output[0]['translation_text']
    return translated_text

# hf_token = os.getenv('HF_TOKEN')
title = 'Ananse AI | Ghanaian Language Translator'
# hf_writer = gr.HuggingFaceDatasetSaver(hf_token, 'crowdsourced-GLT')
lang = ['Akuapem','Asante', 'English', 'Ewe','Hausa']
translator_app = gr.Interface(
    fn=translate,
    inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
    outputs=[gr.outputs.Textbox(label='Translated Text')],
    title=title,
    description = 'The Ghanaian Languages considered now are the Asante, Akuapem, Ewe, and Hausa. Kindly use the Flag button to rate (πŸ‘πŸΌ, πŸ‘ŽπŸΌ) your translation to help us improve. Thanks',
    article='Ananse AI | hnmensah',
    examples = [['English','Asante','Kwame went to Kaneshie to buy tomates.'],
                ['English','Ewe','The event should be hosted at the Accra Mall.'],
                ['English','Akuapem','The trader is suffering from Malaria so she did not go to work.'],
                ['English','Hausa','The last person to get to the class will be sacked.']],
    #allow_flagging='manual',
    #flagging_options=['πŸ‘πŸΌ','πŸ‘ŽπŸΌ'],
    #flagging_callback=hf_writer,
    enable_queue=True,
)

translator_app.launch()