Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer | |
from auto_gptq import AutoGPTQForCausalLM | |
model_path = 'vita-group/vicuna-7b-v1.3_gptq' | |
tokenizer_path = 'lmsys/vicuna-7b-v1.3' | |
model = AutoGPTQForCausalLM.from_quantized( | |
model_path, | |
disable_exllama=True, | |
device_map='auto', | |
revision='2bit_128g', | |
) | |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True) | |
def generate_response(prompt): | |
input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to('cuda') | |
outputs = model.generate(input_ids=input_ids, max_length=128) | |
decoded_output = tokenizer.decode(outputs[0]) | |
return decoded_output | |
iface = gr.Interface(fn=generate_response, inputs="text", outputs="text") | |
iface.launch() | |