abinashbordoloi's picture
Update app.py
dfacc2a
# import gradio as gr
# import time
# from transformers import NllbTokenizer, AutoModelForSeq2SeqLM, pipeline
# from supported_languages import LANGS
# def load_model():
# # model_name = 'nllb-moe-54b'
# model_name = 'nllb-200-distilled-600M'
# print('\tLoading model: %s' % model_name)
# model = AutoModelForSeq2SeqLM.from_pretrained(f'facebook/{model_name}')
# tokenizer = NllbTokenizer.from_pretrained(f'facebook/{model_name}')
# return model, tokenizer
# model, tokenizer = load_model()
# def translation(source, target, text):
# start_time = time.time()
# source_code = LANGS[source]
# target_code = LANGS[target]
# source_langauge = source
# target_language = target
# translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
# output = translator(text, max_length=400)
# input_text = text
# end_time = time.time()
# full_output = output
# output = output[0]['translation_text']
# result = {
# 'inference_time': end_time - start_time,
# 'source': source_language,
# 'target': target_language,
# 'input_text': input_text,
# 'result': output,
# 'full_output': full_output
# }
# return result
# if __name__ == '__main__':
# # Define gradio demo
# lang_codes = list(LANGS.keys())
# inputs = [
# gr.Dropdown(lang_codes, label='Source'),
# gr.Dropdown(lang_codes, label='Target'),
# gr.Textbox(lines=5, label="Input text"),
# ]
# outputs = gr.JSON()
# title = "NLLB distilled 1.3B distilled【多语言翻译器】"
# demo_status = "Demo is running on CPU"
# description = f"Details: https://github.com/facebookresearch/fairseq/tree/nllb. {demo_status}"
# gr.Interface(
# translation,
# inputs,
# outputs,
# title=title,
# description=description,
# examples_per_page=50,
# ).launch()
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores_200_codes import flores_codes
def load_models():
# build model and tokenizer
model_name_dict = {
'nllb-200-distilled-600M': 'facebook/nllb-200-distilled-600M',
}
model_dict = {}
for call_name, real_name in model_name_dict.items():
print('\tLoading model: %s' % call_name)
model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
tokenizer = AutoTokenizer.from_pretrained(real_name)
model_dict[call_name+'_model'] = model
model_dict[call_name+'_tokenizer'] = tokenizer
return model_dict
def translation(source, target, text):
model_name = 'nllb-200-distilled-600M'
if model_name+'_model' not in model_dict:
print(f"Model '{model_name}' not found in model_dict.")
return
start_time = time.time()
source = flores_codes[source]
target = flores_codes[target]
model = model_dict[model_name + '_model']
tokenizer = model_dict[model_name + '_tokenizer']
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
output = translator(text, max_length=400)
end_time = time.time()
full_output = output
output = output[0]['translation_text']
result = {'inference_time': end_time - start_time,
'source': source,
'target': target,
'result': output,
'full_output': full_output}
return result
if __name__ == '__main__':
print('\tinit models')
global model_dict
model_dict = load_models()
# define gradio demo
lang_codes = list(flores_codes.keys())
inputs = [gr.Dropdown(lang_codes, label='Source'),
gr.Dropdown(lang_codes, label='Target'),
gr.Textbox(lines=5, label="Input text"),
]
outputs = gr.JSON()
title = "NLLB distilled 1.3B distilled model for transaltion;"
demo_status = "Demo is running on CPU"
description = f"Details: https://github.com/facebookresearch/fairseq/tree/nllb. {demo_status}"
examples = [['Chinese (Simplified)', 'English', '你吃饭了吗?']]
gr.Interface(translation,
inputs,
outputs,
title=title,
description=description,
examples=examples,
examples_per_page=50,
).launch()