from transformers import T5ForConditionalGeneration, T5Tokenizer, GenerationConfig import gradio as gr MODEL_NAME = "jbochi/madlad400-3b-mt" default_max_length = 200 print("Using `{}`.".format(MODEL_NAME)) tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME) print("T5Tokenizer loaded from pretrained.") model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME, device_map="auto") print("T5ForConditionalGeneration loaded from pretrained.") def inference(max_length, input_text, history=[]): input_ids = tokenizer(input_text, return_tensors="pt").input_ids outputs = model.generate( input_ids=input_ids, generation_config=GenerationConfig(decoder_start_token_id=2), ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) history.append((input_text, result)) return history, history with gr.Blocks() as demo: with gr.Row(): gr.Markdown( "

Demo of {}

See more at Hugging Face: {}.

".format( MODEL_NAME, MODEL_NAME, MODEL_NAME ) ) max_length = gr.Number( value=default_max_length, label="maximum length of response" ) chatbot = gr.Chatbot(label=MODEL_NAME) state = gr.State([]) with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="<2es> text to translate" ) txt.submit(fn=inference, inputs=[max_length, txt, state], outputs=[chatbot, state]) demo.launch()