zuhair96's picture
Update app.py
3d23b84 verified
"""
Huggingface Models: https://huggingface.co/models
Transformer dependency: https://pypi.org/project/transformers/
"""
import os
os.environ['CURL_CA_BUNDLE'] = ''
# os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
import malaya
import ctranslate2
from transformers import AutoTokenizer
import gradio as gr
import logging
import os
logging.basicConfig(level=logging.INFO)
TO_LANG = {
'Malay': 'ms',
'English': 'en',
}
TO_LANG_KEYS = list(TO_LANG.keys())
model = None
map_lang = {
'en': 'Inggeris',
'ms': 'Melayu',
'pasar ms': 'pasar Melayu',
'manglish': 'Manglish',
}
converter = ctranslate2.converters.TransformersConverter(
'mesolitica/translation-t5-small-standard-bahasa-cased-v2'
)
try:
converter.convert('t5-small-ct2', quantization='int8')
except BaseException:
pass
tokenizer = AutoTokenizer.from_pretrained(
'mesolitica/translation-t5-small-standard-bahasa-cased-v2',
use_fast=False,
)
def translate(text, to_lang):
global model
to_lang = TO_LANG[to_lang]
if model is None:
translator = ctranslate2.Translator('t5-small-ct2')
prefix = f'terjemah ke {map_lang[to_lang]}: {text}'
input_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prefix))
outputs = translator.translate_batch(
[input_tokens],
max_input_length=6144,
max_decoding_length=6144,
disable_unk=True,
)
results = []
for o in outputs:
o = o.hypotheses[0]
o = tokenizer.convert_tokens_to_ids(o)
results.append(o)
results = tokenizer.batch_decode(
[[i for i in o if i not in [0, 1, 2]] for o in results],
spaces_between_special_tokens=False,
)
return results[0]
demo = gr.Interface(
fn=translate,
inputs=[
gr.components.Textbox(label='Input Text'),
gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'),
],
outputs=[
gr.components.Textbox(label='Output Text')
],
cache_examples=False,
title='bentobytes AI Translator',
description='This translation is model able to translate malay, english, manglish to a target language. It is also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.'
)
demo.launch(server_name='0.0.0.0')