madlad400-3b-ct2-int8

Runtime error

File size: 2,358 Bytes

d58ca71
 
7af1f35
 
 
 
 
5c57059
 
 
2df1a94
5c57059
 
7af1f35
d58ca71
 
 
 
5c57059
9f187e7
7af1f35
5c57059
 
88fecde
7af1f35
5c57059
 
 
7af1f35
 
 
 
 
 
 
 
 
 
 
 
 
 
5c57059
7af1f35
 
 
 
5c57059
 
 
 
 
d2cf737
7af1f35
 
5c57059

import os

import ctranslate2
import gradio as gr
from huggingface_hub import snapshot_download
from sentencepiece import SentencePieceProcessor

title = "MADLAD-400 Translation Demo"
description = """
<p>
Translator using <a href='https://arxiv.org/abs/2309.04662' target='_blank'>MADLAD-400</a>, a multilingual machine translation model on 250 billion tokens covering over 450 languages using publicly available data. This demo application uses  <a href="https://huggingface.co/Heng666/madlad400-3b-mt-ct2-int8">Heng666/madlad400-3b-mt-ct2-int8</a> model, which is a ctranslate2 optimized model of <a href="https://huggingface.co/google/madlad400-3b-mt">google/madlad400-3b-mt</a>
</p>
"""

# As per https://opennmt.net/CTranslate2/performance.html
# By default CTranslate2 is compiled with intel MKL.
# It is observed that this setting has a significant positive performance impact.
os.environ["CT2_USE_EXPERIMENTAL_PACKED_GEMM"] = "1"

model_name = "Heng666/madlad400-3b-mt-ct2-int8"
model_path = snapshot_download(model_name)

tokenizer = SentencePieceProcessor()
tokenizer.load(f"{model_path}/spiece.model")
translator = ctranslate2.Translator(model_path)
tokens = [tokenizer.decode(i) for i in range(460)]
lang_codes = [token[2:-1] for token in tokens if token.startswith("<2")]


def translate(input_text, target_language):
    input_tokens = tokenizer.encode(f"<2{target_language}> {input_text}", out_type=str)
    results = translator.translate_batch(
        [input_tokens],
        batch_type="tokens",
        # max_batch_size=1024,
        beam_size=1,
        no_repeat_ngram_size=1,
        # repetition_penalty=2,
    )
    translated_sentence = tokenizer.decode(results[0].hypotheses[0])
    return translated_sentence


def translate_interface(input_text, target_language):
    translated_text = translate(input_text, target_language)
    return translated_text


input_text = gr.Textbox(
    label="Input Text",
    value="Imagine a world in which every single person on the planet is given free access to the sum of all human knowledge.",
)
target_language = gr.Dropdown(lang_codes, value="ml", label="Target Language")
output_text = gr.Textbox(label="Translated Text")

gr.Interface(
    title=title,
    description=description,
    fn=translate_interface,
    inputs=[input_text, target_language],
    outputs=output_text,
).launch()