# import os | |
# import gradio as gr | |
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
# en2ko = "KoJLabs/nllb-finetuned-en2ko" | |
# ko2en = "KoJLabs/nllb-finetuned-ko2en" | |
# style = "KoJLabs/bart-speech-style-converter" | |
# en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko) | |
# ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en) | |
# style_model = AutoModelForSeq2SeqLM.from_pretrained(style) | |
# en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko) | |
# ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en) | |
# style_tokenizer = AutoTokenizer.from_pretrained(style) | |
# def translate(source, target, text): | |
# formats = {"English":"eng_Latn", "Korean":"kor_Hang"} | |
# src = formats[source] | |
# tgt = formats[target] | |
# if src == "eng_Latn": | |
# translator = pipeline( | |
# 'translation', | |
# model=en2ko_model, | |
# tokenizer=ko2en_tokenizer, | |
# src_lang=src, | |
# tgt_lang=tgt, | |
# ) | |
# if src == "kor_Hang": | |
# translator = pipeline( | |
# 'translation', | |
# model=ko2en_model, | |
# tokenizer=en2ko_tokenizer, | |
# src_lang=src, | |
# tgt_lang=tgt | |
# ) | |
# output = translator(text) | |
# translated_text = output[0]['translation_text'] | |
# return translated_text | |
# title = 'KoTAN Translator & Speech-style converter' | |
# lang = ['English','Korean'] | |
# translator_app = gr.Interface( | |
# fn=translate, | |
# inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')], | |
# outputs=[gr.outputs.Textbox(label='Translated Text')], | |
# title=title, | |
# description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)', | |
# article='Jisu, Kim. Juhwan, Lee', | |
# enable_queue=True, | |
# ) | |
# translator_app.launch() | |
import os | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline | |
checkpoint = 'facebook/nllb-200-distilled-1.3B' | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
def translate(source, target, text): | |
formats = {'English':'eng_Latn', 'Asante': 'aka_Latn', 'Akuapem': 'twi_Latn', 'Ewe':'ewe_Latn', 'Hausa': 'hau_Latn'} | |
source_fmt = formats[source] | |
target_fmt = formats[target] | |
translator = pipeline('translation', | |
model=model, | |
tokenizer=tokenizer, | |
src_lang=source_fmt, | |
tgt_lang=target_fmt, | |
max_length = 400) | |
output = translator(text) | |
translated_text = output[0]['translation_text'] | |
return translated_text | |
# hf_token = os.getenv('HF_TOKEN') | |
title = 'Ananse AI | Ghanaian Language Translator' | |
# hf_writer = gr.HuggingFaceDatasetSaver(hf_token, 'crowdsourced-GLT') | |
lang = ['Akuapem','Asante', 'English', 'Ewe','Hausa'] | |
translator_app = gr.Interface( | |
fn=translate, | |
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')], | |
outputs=[gr.outputs.Textbox(label='Translated Text')], | |
title=title, | |
description = 'The Ghanaian Languages considered now are the Asante, Akuapem, Ewe, and Hausa. Kindly use the Flag button to rate (ππΌ, ππΌ) your translation to help us improve. Thanks', | |
article='Ananse AI | hnmensah', | |
examples = [['English','Asante','Kwame went to Kaneshie to buy tomates.'], | |
['English','Ewe','The event should be hosted at the Accra Mall.'], | |
['English','Akuapem','The trader is suffering from Malaria so she did not go to work.'], | |
['English','Hausa','The last person to get to the class will be sacked.']], | |
#allow_flagging='manual', | |
#flagging_options=['ππΌ','ππΌ'], | |
#flagging_callback=hf_writer, | |
enable_queue=True, | |
) | |
translator_app.launch() |