Spaces:
Runtime error
Runtime error
""" | |
Huggingface Models: https://huggingface.co/models | |
Transformer dependency: https://pypi.org/project/transformers/ | |
""" | |
import os | |
os.environ['CURL_CA_BUNDLE'] = '' | |
# os.environ['CUDA_VISIBLE_DEVICES'] = '' | |
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' | |
import malaya | |
import ctranslate2 | |
from transformers import AutoTokenizer | |
import gradio as gr | |
import logging | |
import os | |
logging.basicConfig(level=logging.INFO) | |
TO_LANG = { | |
'Malay': 'ms', | |
'English': 'en', | |
} | |
TO_LANG_KEYS = list(TO_LANG.keys()) | |
model = None | |
map_lang = { | |
'en': 'Inggeris', | |
'ms': 'Melayu', | |
'pasar ms': 'pasar Melayu', | |
'manglish': 'Manglish', | |
} | |
converter = ctranslate2.converters.TransformersConverter( | |
'mesolitica/translation-t5-small-standard-bahasa-cased-v2' | |
) | |
try: | |
converter.convert('t5-small-ct2', quantization='int8') | |
except BaseException: | |
pass | |
tokenizer = AutoTokenizer.from_pretrained( | |
'mesolitica/translation-t5-small-standard-bahasa-cased-v2', | |
use_fast=False, | |
) | |
def translate(text, to_lang): | |
global model | |
to_lang = TO_LANG[to_lang] | |
if model is None: | |
translator = ctranslate2.Translator('t5-small-ct2') | |
prefix = f'terjemah ke {map_lang[to_lang]}: {text}' | |
input_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prefix)) | |
outputs = translator.translate_batch( | |
[input_tokens], | |
max_input_length=6144, | |
max_decoding_length=6144, | |
disable_unk=True, | |
) | |
results = [] | |
for o in outputs: | |
o = o.hypotheses[0] | |
o = tokenizer.convert_tokens_to_ids(o) | |
results.append(o) | |
results = tokenizer.batch_decode( | |
[[i for i in o if i not in [0, 1, 2]] for o in results], | |
spaces_between_special_tokens=False, | |
) | |
return results[0] | |
demo = gr.Interface( | |
fn=translate, | |
inputs=[ | |
gr.components.Textbox(label='Input Text'), | |
gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'), | |
], | |
outputs=[ | |
gr.components.Textbox(label='Output Text') | |
], | |
cache_examples=False, | |
title='bentobytes AI Translator', | |
description='This translation is model able to translate malay, english, manglish to a target language. It is also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.' | |
) | |
demo.launch(server_name='0.0.0.0') |