test / app.py
jhl001's picture
Update app.py
158aee9
raw
history blame
1.11 kB
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("cyberagent/open-calm-7b", device_map="auto", torch_dtype=torch.int8, load_in_8bit=True)
#torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("cyberagent/open-calm-7b")
def proc( inputs ):
with torch.no_grad():
tokens = model.generate(
**inputs,
max_new_tokens=64, # ็”Ÿๆˆใ™ใ‚‹้•ทใ•. 128 ใจใ‹ใงใ‚‚่‰ฏใ„.
do_sample=True,
temperature=0.7, # ็”Ÿๆˆใฎใƒฉใƒณใƒ€ใƒ ๆ€ง. ้ซ˜ใ„ใปใฉๆง˜ใ€…ใชๅ˜่ชžใŒๅ‡บใฆใใ‚‹ใŒ้–ข้€ฃๆ€งใฏไธ‹ใŒใ‚‹.
pad_token_id=tokenizer.pad_token_id,
)
return tokenizer.decode(tokens[0], skip_special_tokens=True)
def greet(name):
inputs = tokenizer(name, return_tensors="pt").to(model.device)
outputs = proc( inputs )
return( outputs )
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()