""" Huggingface Models: https://huggingface.co/models Transformer dependency: https://pypi.org/project/transformers/ """ import os os.environ['CURL_CA_BUNDLE'] = '' # os.environ['CUDA_VISIBLE_DEVICES'] = '' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' import malaya import ctranslate2 from transformers import AutoTokenizer import gradio as gr import logging import os logging.basicConfig(level=logging.INFO) TO_LANG = { 'Malay': 'ms', 'English': 'en', } TO_LANG_KEYS = list(TO_LANG.keys()) model = None map_lang = { 'en': 'Inggeris', 'ms': 'Melayu', 'pasar ms': 'pasar Melayu', 'manglish': 'Manglish', } converter = ctranslate2.converters.TransformersConverter( 'mesolitica/translation-t5-small-standard-bahasa-cased-v2' ) try: converter.convert('t5-small-ct2', quantization='int8') except BaseException: pass tokenizer = AutoTokenizer.from_pretrained( 'mesolitica/translation-t5-small-standard-bahasa-cased-v2', use_fast=False, ) def translate(text, to_lang): global model to_lang = TO_LANG[to_lang] if model is None: translator = ctranslate2.Translator('t5-small-ct2') prefix = f'terjemah ke {map_lang[to_lang]}: {text}' input_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(prefix)) outputs = translator.translate_batch( [input_tokens], max_input_length=6144, max_decoding_length=6144, disable_unk=True, ) results = [] for o in outputs: o = o.hypotheses[0] o = tokenizer.convert_tokens_to_ids(o) results.append(o) results = tokenizer.batch_decode( [[i for i in o if i not in [0, 1, 2]] for o in results], spaces_between_special_tokens=False, ) return results[0] demo = gr.Interface( fn=translate, inputs=[ gr.components.Textbox(label='Input Text'), gr.components.Dropdown(label='Output language', choices=TO_LANG_KEYS, value='Malay'), ], outputs=[ gr.components.Textbox(label='Output Text') ], cache_examples=False, title='bentobytes AI Translator', description='This translation is model able to translate malay, english, manglish to a target language. It is also able to maintain the text structure as it is and only translate necessary texts, eg, programming code.' ) demo.launch(server_name='0.0.0.0',share=True)