from transformers import AutoTokenizer, AutoModelForCausalLM import transformers import torch import gradio as gr def get_sequence(prompt): sequences = pipeline(prompt, max_length=200, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) model = "tiiuae/falcon-7b" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) demo = gr.Interface(inputs='text', outputs='text', fn = get_sequence) demo.launch()