radames's picture
Update app.py
5cc76b1
raw
history blame
914 Bytes
import gradio as gr
from transformers import (
AutoModelForCausalLM,
AutoConfig,
AutoTokenizer,
BitsAndBytesConfig,
)
import transformers
import torch
model_name = "tiiuae/falcon-40b"
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model,
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
trust_remote_code=True,
torch_dtype=torch.bfloat16,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def falcon(input_text):
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(input_ids, max_length=100, do_sample=True, top_k=10)
decoded = tokenizer.decode(outputs[0])
return decoded
iface = gr.Interface(fn=falcon, inputs="text", outputs="text")
iface.launch() # To create a public link, set `share=True`