GPT-2_Instruct / app.py
NPG's picture
Update app.py
ebd7f42
raw
history blame
2.1 kB
"""#Imports"""
#!pip install transformers gradio accelerate bitsandbytes sentencepiece
#import multiprocessing
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import gradio as gr
"""#Code"""
#torch.set_default_dtype(torch.float16)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
"""##FP 16"""
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map="auto", torch_dtype=torch.float16)
"""###Interface"""
def generate(input_text, minimum_length, maximum_length, temperature, repetition_penalty):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids,
min_length=minimum_length,
max_new_tokens=maximum_length,
length_penalty=1.4,
num_beams=6,
no_repeat_ngram_size=3,
temperature=temperature,
top_k=100,
top_p=0.9,
repetition_penalty=repetition_penalty,
)
return tokenizer.decode(outputs[0], skip_special_tokens=True).capitalize()
title = "Flan-T5-XL GRADIO GUI"
def inference(input_text, minimum_length, maximum_length, temperature, repetition_penalty):
return generate(input_text, minimum_length, maximum_length, temperature, repetition_penalty)
gr.Interface(
fn=inference,
inputs=[gr.Textbox(lines=4, label="Input"), gr.Slider(0, 300, value=20, step=10, label="Minimum length"), gr.Slider(100, 2000, value=1000, step=100, label="Maximum length"), gr.Slider(0, 1, value=0.75, step=0.05, label="Temperature"), gr.Slider(1, 3, value=2.1, step=0.1, label="Repetition penalty")],
outputs=[
gr.Textbox(lines=2, label="Output")
],
title=title,
css="""
body {background-color: lightgreen}
.input_text input {
background-color: lightblue !important;
}
"""
).launch()