import gradio as gr import transformers from transformers import AutoModelForCausalLM, AutoTokenizer #checkpoint = "bigscience/bloomz" # english checkpoint = "bigscience/bloomz-560m" # english #checkpoint = "bigscience/bloomz-7b1-mt" # non english import torch device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") #tokenizer = AutoTokenizer.from_pretrained(checkpoint) #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", load_in_8bit=False).to(device) tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForCausalLM.from_pretrained(checkpoint) def get_result(prompt): # prompt = f"'''{str(prompt)}'''" inputs = tokenizer.encode(prompt, return_tensors="pt").to(device) outputs = model.generate(inputs, max_length= len(prompt)+1000) return tokenizer.decode(outputs[0], skip_special_tokens=True) title = "Bloomz (english small)" description = "Write an instruction and get the Bloomz result." examples = [["Translate to English: Je t'aime."]] demo = gr.Interface(fn=get_result, inputs="text", outputs="text", title=title, description=description, examples=examples, allow_flagging="never") demo.launch()