import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import transformers import torch def falcon(input_text): model = "tiiuae/falcon-40b" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) sequences = pipeline( input_text, # "Was ist das höchste Gebäude in der Welt?" max_length=200, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) for seq in sequences: print(f"Result: {seq['generated_text']}") return sequences[0]['generated_text'] iface = gr.Interface(fn=falcon, inputs="text", outputs="text") iface.launch() # To create a public link, set `share=True`