|
import gradio as gr |
|
|
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
model = AutoModelForCausalLM.from_pretrained("cyberagent/open-calm-7b", device_map="auto", torch_dtype=torch.int8, load_in_8bit=True) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("cyberagent/open-calm-7b") |
|
|
|
def proc( inputs ): |
|
with torch.no_grad(): |
|
tokens = model.generate( |
|
**inputs, |
|
max_new_tokens=64, |
|
do_sample=True, |
|
temperature=0.7, |
|
pad_token_id=tokenizer.pad_token_id, |
|
) |
|
|
|
return tokenizer.decode(tokens[0], skip_special_tokens=True) |
|
|
|
def greet(name): |
|
inputs = tokenizer(name, return_tensors="pt").to(model.device) |
|
outputs = proc( inputs ) |
|
return( outputs ) |
|
|
|
iface = gr.Interface(fn=greet, inputs="text", outputs="text") |
|
iface.launch() |
|
|