Spaces:

meinvirgos
/

Traductor

Sleeping

File size: 2,974 Bytes

import os
import torch
import gradio as gr
import time
from transformers import AutoTokenizer, pipeline

# Modelo de Meta capaz de traducir a más de 200 idiomas
model = 'facebook/nllb-200-distilled-600M'
#model = 'JustFrederik/nllb-200-distilled-600M-ct2-int8'
tokenizer = 'facebook/nllb-200-distilled-600M' # El modelo Quantizado no tiene tokenizador
# Pero como traduce mal al asturiano, añadimos este otro fine-tuned
# Es del proyecto AINA: https://huggingface.co/projecte-aina
model_ast = "projecte-aina/aina-translator-es-ast"

flores_codes = {}
flores_codes["Asturianu"] = "ast_Latn"
flores_codes["Castellano"] = "spa_Latn"
flores_codes["Català"] = "cat_Latn"
flores_codes["English"] = "eng_Latn"
flores_codes["Euskera"] = "eus_Latn"
flores_codes["Galego"] = "glg_Latn"

def translation(source, target, text):

    #start_time = time.time()
    source = flores_codes[source]
    target = flores_codes[target]
    if target == flores_codes["Asturianu"]:
        texto_castellano = source
        if source != flores_codes["Castellano"]:
            translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=flores_codes["Castellano"])
            texto_castellano = translator(text, max_length=400)
        translator_ast = pipeline('translation', model=model_ast, tokenizer=tokenizer, src_lang=flores_codes["Castellano"], tgt_lang=flores_codes["Asturianu"])
        output = translator_ast(text, max_length=400)
    else:
        translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target)
        output = translator(text, max_length=400)


    #end_time = time.time()

    output = output[0]['translation_text']
    #result = {'inference_time': end_time - start_time,
    #          'source': source,
    #          'target': target,
    #          'result': output}
    #return result
    return output;

if __name__ == '__main__':
    print('\tIniciando...')

   
    # define gradio demo
    lang_codes = list(flores_codes.keys())
    inputs = [gr.Dropdown(lang_codes, value='Castellano', label='Idioma original'),
              gr.Dropdown(lang_codes, value='Asturianu', label='Traducir al...'),
              gr.Textbox(label="Texto a traducir"),
              ]

    outputs = [gr.Textbox(label="Texto traducido"),]

    title = "Traductor Multilingüe"

    description = """
        Este traductor utiliza el siguiente modelo de lenguaje de Meta: https://github.com/facebookresearch/fairseq/tree/nllb\n
        Excepto para traducir al asturiano que usa el modelo del  proyecto AINA: https://huggingface.co/projecte-aina/aina-translator-es-ast\n
        Adaptado de: https://huggingface.co/spaces/Azwaw/Text_Translation_Multi-languages
        """


    gr.Interface(translation,
                 inputs,
                 outputs,
                 title=title,
                 description=description,
                 submit_btn="Traducir"
                 ).launch()