Spaces:
Sleeping
Sleeping
File size: 2,974 Bytes
6b03889 964a180 6b03889 964a180 6b03889 58aaf00 63bb501 3d903f8 4b0094d 6b03889 7431216 6b03889 b56b020 e64b40a 4b0094d b56b020 6b03889 7431216 6b03889 7431216 6b03889 7431216 6b03889 7431216 6b03889 964a180 cc67ce1 e64b40a cc67ce1 6b03889 f366264 6b03889 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, pipeline
# Modelo de Meta capaz de traducir a más de 200 idiomas
model = 'facebook/nllb-200-distilled-600M'
#model = 'JustFrederik/nllb-200-distilled-600M-ct2-int8'
tokenizer = 'facebook/nllb-200-distilled-600M' # El modelo Quantizado no tiene tokenizador
# Pero como traduce mal al asturiano, añadimos este otro fine-tuned
# Es del proyecto AINA: https://huggingface.co/projecte-aina
model_ast = "projecte-aina/aina-translator-es-ast"
flores_codes = {}
flores_codes["Asturianu"] = "ast_Latn"
flores_codes["Castellano"] = "spa_Latn"
flores_codes["Català"] = "cat_Latn"
flores_codes["English"] = "eng_Latn"
flores_codes["Euskera"] = "eus_Latn"
flores_codes["Galego"] = "glg_Latn"
def translation(source, target, text):
#start_time = time.time()
source = flores_codes[source]
target = flores_codes[target]
if target == flores_codes["Asturianu"]:
texto_castellano = source
if source != flores_codes["Castellano"]:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=flores_codes["Castellano"])
texto_castellano = translator(text, max_length=400)
translator_ast = pipeline('translation', model=model_ast, tokenizer=tokenizer, src_lang=flores_codes["Castellano"], tgt_lang=flores_codes["Asturianu"])
output = translator_ast(text, max_length=400)
else:
translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
output = translator(text, max_length=400)
#end_time = time.time()
output = output[0]['translation_text']
#result = {'inference_time': end_time - start_time,
# 'source': source,
# 'target': target,
# 'result': output}
#return result
return output;
if __name__ == '__main__':
print('\tIniciando...')
# define gradio demo
lang_codes = list(flores_codes.keys())
inputs = [gr.Dropdown(lang_codes, value='Castellano', label='Idioma original'),
gr.Dropdown(lang_codes, value='Asturianu', label='Traducir al...'),
gr.Textbox(label="Texto a traducir"),
]
outputs = [gr.Textbox(label="Texto traducido"),]
title = "Traductor Multilingüe"
description = """
Este traductor utiliza el siguiente modelo de lenguaje de Meta: https://github.com/facebookresearch/fairseq/tree/nllb\n
Excepto para traducir al asturiano que usa el modelo del proyecto AINA: https://huggingface.co/projecte-aina/aina-translator-es-ast\n
Adaptado de: https://huggingface.co/spaces/Azwaw/Text_Translation_Multi-languages
"""
gr.Interface(translation,
inputs,
outputs,
title=title,
description=description,
submit_btn="Traducir"
).launch()
|