api_for_chat / app.py
ldhldh's picture
Update app.py
6fce119
raw
history blame
4.44 kB
from threading import Thread
import gradio as gr
import inspect
from gradio import routes
from typing import List, Type
from petals import AutoDistributedModelForCausalLM
from transformers import AutoTokenizer
import requests, os, re, asyncio, json
loop = asyncio.get_event_loop()
# init code
def get_types(cls_set: List[Type], component: str):
docset = []
types = []
if component == "input":
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[1].split(":")[-1])
types.append(doc_lines[1].split(")")[0].split("(")[-1])
else:
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[-1].split(":")[-1])
types.append(doc_lines[-1].split(")")[0].split("(")[-1])
return docset, types
routes.get_types = get_types
# App code
model_name = "petals-team/StableBeluga2"
#daekeun-ml/Llama-2-ko-instruct-13B
#quantumaikr/llama-2-70b-fb16-korean
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = None
history = {
"":{
}
}
def check(model_name):
data = requests.get("https://health.petals.dev/api/v1/state").json()
out = []
for d in data['model_reports']:
if d['name'] == model_name:
if d['state']=="healthy":
return True
return False
def init():
global model
if check(model_name):
model = AutoDistributedModelForCausalLM.from_pretrained(model_name)
def chat(id, npc, text):
if model == None:
init()
return "no model"
# get_coin endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_6", json={
"data": [
id,
]}).json()
coin = response["data"][0]
if int(coin) == 0:
return "no coin"
# model inference
if check(model_name):
global history
if not npc in npc_story:
return "no npc"
if not npc in history:
history[npc] = {}
if not id in history[npc]:
history[npc][id] = ""
if len(history[npc][id].split("###")) > 10:
history[npc][id] = "###" + history[npc][id].split("###", 3)[3]
npc_list = str([k for k in npc_story.keys()]).replace('\'', '')
town_story = f"""[{id}의 λ§ˆμ„]
μ™Έλ”΄ 곳의 쑰그만 섬에 μ—¬λŸ¬ 주민듀이 λͺ¨μ—¬ μ‚΄κ³  μžˆμŠ΅λ‹ˆλ‹€.
ν˜„μž¬ {npc_list}이 μ‚΄κ³  μžˆμŠ΅λ‹ˆλ‹€."""
system_message = f"""1. 당신은 ν•œκ΅­μ–΄μ— λŠ₯μˆ™ν•©λ‹ˆλ‹€.
2. 당신은 μ§€κΈˆ 역할극을 ν•˜κ³  μžˆμŠ΅λ‹ˆλ‹€. {npc}의 λ°˜μ‘μ„ μƒμƒν•˜κ³  맀λ ₯적이게 ν‘œν˜„ν•©λ‹ˆλ‹€.
3. 당신은 {npc}μž…λ‹ˆλ‹€. {npc}의 μž…μž₯μ—μ„œ μƒκ°ν•˜κ³  λ§ν•©λ‹ˆλ‹€.
4. μ£Όμ–΄μ§€λŠ” 정보λ₯Ό λ°”νƒ•μœΌλ‘œ κ°œμ—°μ„±μžˆκ³  μ‹€κ°λ‚˜λŠ” {npc}의 λŒ€μ‚¬λ₯Ό μ™„μ„±ν•˜μ„Έμš”.
5. μ£Όμ–΄μ§€λŠ” {npc}의 정보λ₯Ό μ‹ μ€‘ν•˜κ²Œ 읽고, κ³Όν•˜μ§€ μ•Šκ³  λ‹΄λ°±ν•˜κ²Œ 캐릭터λ₯Ό μ—°κΈ°ν•˜μ„Έμš”.
6. User의 역할을 μ ˆλŒ€λ‘œ μΉ¨λ²”ν•˜μ§€ λ§ˆμ„Έμš”. 같은 말을 λ°˜λ³΅ν•˜μ§€ λ§ˆμ„Έμš”.
7. {npc}의 말투λ₯Ό μ§€μΌœμ„œ μž‘μ„±ν•˜μ„Έμš”."""
prom = f"""<<SYS>>
{system_message}<</SYS>>
{town_story}
### 캐릭터 정보: {npc_story[npc]}
### λͺ…λ Ήμ–΄:
{npc}의 정보λ₯Ό μ°Έκ³ ν•˜μ—¬ {npc}이 ν•  말을 상황에 맞좰 μžμ—°μŠ€λŸ½κ²Œ μž‘μ„±ν•΄μ£Όμ„Έμš”.
{history[npc][id]}
### User:
{text}
### {npc}:
"""
inputs = tokenizer(prom, return_tensors="pt")["input_ids"]
outputs = model.generate(inputs, do_sample=True, temperature=0.6, top_p=0.75, max_new_tokens=100)
output = tokenizer.decode(outputs[0])[len(prom)+3:-1].split("<")[0].split("###")[0].replace(". ", ".\n")
print(outputs)
print(output)
else:
output = "no model"
# add_transaction endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_5", json={
"data": [
id,
"inference",
"### input:\n" + prompt + "\n\n### output:\n" + output
]}).json()
d = response["data"][0]
return output
with gr.Blocks() as demo:
count = 0
aa = gr.Interface(
fn=chat,
inputs=["text","text","text"],
outputs="text",
description="chat, ai 응닡을 λ°˜ν™˜ν•©λ‹ˆλ‹€. λ‚΄λΆ€μ μœΌλ‘œ νŠΈλžœμž­μ…˜ 생성. \n /run/predict",
)
demo.queue(max_size=32).launch(enable_queue=True)