Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer | |
import os | |
os.system("python -m pip install --upgrade pip") | |
os.system("pip install git+https://github.com/bigscience-workshop/petals") | |
from petals import AutoDistributedModelForCausalLM | |
import npc_data | |
# Choose any model available at https://health.petals.dev | |
model_name = "daekeun-ml/Llama-2-ko-instruct-13B" | |
#daekeun-ml/Llama-2-ko-instruct-13B | |
#quantumaikr/llama-2-70b-fb16-korean | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoDistributedModelForCausalLM.from_pretrained(model_name) | |
# Run the model as if it were on your computer | |
def chat2(id, npc, text): | |
prom = "" | |
inputs = tokenizer(prom, return_tensors="pt")["input_ids"] | |
outputs = model.generate(inputs, max_new_tokens=100) | |
print(tokenizer.decode(outputs[0])) | |
return text | |
def chat(id, npc, text): | |
return f"{text}μ λν {npc}μ μλ΅" | |
with gr.Blocks() as demo: | |
count = 0 | |
aa = gr.Interface( | |
fn=chat, | |
inputs=["text","text","text"], | |
outputs="text", | |
description="chat, ai μλ΅μ λ°νν©λλ€. λ΄λΆμ μΌλ‘ νΈλμμ μμ±. \n /run/predict", | |
) | |
demo.queue(max_size=32).launch(enable_queue=True) |