KoTAN / app.py
jisukim8873's picture
test
59db409
raw
history blame
4.16 kB
# import os
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# en2ko = "KoJLabs/nllb-finetuned-en2ko"
# ko2en = "KoJLabs/nllb-finetuned-ko2en"
# style = "KoJLabs/bart-speech-style-converter"
# en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
# ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
# style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
# en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
# ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
# style_tokenizer = AutoTokenizer.from_pretrained(style)
# def translate(source, target, text):
# formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
# src = formats[source]
# tgt = formats[target]
# if src == "eng_Latn":
# translator = pipeline(
# 'translation',
# model=en2ko_model,
# tokenizer=ko2en_tokenizer,
# src_lang=src,
# tgt_lang=tgt,
# )
# if src == "kor_Hang":
# translator = pipeline(
# 'translation',
# model=ko2en_model,
# tokenizer=en2ko_tokenizer,
# src_lang=src,
# tgt_lang=tgt
# )
# output = translator(text)
# translated_text = output[0]['translation_text']
# return translated_text
# title = 'KoTAN Translator & Speech-style converter'
# lang = ['English','Korean']
# translator_app = gr.Interface(
# fn=translate,
# inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
# outputs=[gr.outputs.Textbox(label='Translated Text')],
# title=title,
# description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
# article='Jisu, Kim. Juhwan, Lee',
# enable_queue=True,
# )
# translator_app.launch()
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
checkpoint = 'facebook/nllb-200-distilled-1.3B'
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def translate(source, target, text):
formats = {'English':'eng_Latn', 'Asante': 'aka_Latn', 'Akuapem': 'twi_Latn', 'Ewe':'ewe_Latn', 'Hausa': 'hau_Latn'}
source_fmt = formats[source]
target_fmt = formats[target]
translator = pipeline('translation',
model=model,
tokenizer=tokenizer,
src_lang=source_fmt,
tgt_lang=target_fmt,
max_length = 400)
output = translator(text)
translated_text = output[0]['translation_text']
return translated_text
# hf_token = os.getenv('HF_TOKEN')
title = 'Ananse AI | Ghanaian Language Translator'
# hf_writer = gr.HuggingFaceDatasetSaver(hf_token, 'crowdsourced-GLT')
lang = ['Akuapem','Asante', 'English', 'Ewe','Hausa']
translator_app = gr.Interface(
fn=translate,
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
outputs=[gr.outputs.Textbox(label='Translated Text')],
title=title,
description = 'The Ghanaian Languages considered now are the Asante, Akuapem, Ewe, and Hausa. Kindly use the Flag button to rate (πŸ‘πŸΌ, πŸ‘ŽπŸΌ) your translation to help us improve. Thanks',
article='Ananse AI | hnmensah',
examples = [['English','Asante','Kwame went to Kaneshie to buy tomates.'],
['English','Ewe','The event should be hosted at the Accra Mall.'],
['English','Akuapem','The trader is suffering from Malaria so she did not go to work.'],
['English','Hausa','The last person to get to the class will be sacked.']],
#allow_flagging='manual',
#flagging_options=['πŸ‘πŸΌ','πŸ‘ŽπŸΌ'],
#flagging_callback=hf_writer,
enable_queue=True,
)
translator_app.launch()