api_for_chat / app.py
ldhldh's picture
Update app.py
d8dd85a
raw
history blame
9.85 kB
from threading import Thread
import gradio as gr
import inspect
from gradio import routes
from typing import List, Type
import torch
from transformers import AutoTokenizer
from petals import AutoDistributedModelForCausalLM
import requests, os, re, asyncio, json, time
loop = asyncio.get_event_loop()
# init code
def get_types(cls_set: List[Type], component: str):
docset = []
types = []
if component == "input":
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[1].split(":")[-1])
types.append(doc_lines[1].split(")")[0].split("(")[-1])
else:
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[-1].split(":")[-1])
types.append(doc_lines[-1].split(")")[0].split("(")[-1])
return docset, types
routes.get_types = get_types
# App code
model_name = "quantumaikr/llama-2-70b-fb16-korean"
#petals-team/StableBeluga2
#daekeun-ml/Llama-2-ko-DPO-13B
#daekeun-ml/Llama-2-ko-instruct-13B
#quantumaikr/llama-2-70b-fb16-korean
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = None
model = AutoDistributedModelForCausalLM.from_pretrained(model_name)
history = {
"":{
}
}
npc_story = {
"KingSlime" : """[
KingSlime์€ ์Šฌ๋ผ์ž„ ์™•์ž…๋‹ˆ๋‹ค.
KingSlime์€ ๊ทผ์—„ํ•˜๊ฒŒ ๋งํ•ฉ๋‹ˆ๋‹ค.
KingSlime์€ ๋“ฑ์˜ ๊ณผ๊ฑฐ ์™•์กฑ์˜ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
KingSlime์€ ์ž์‹ ์„ '์ง'์ด๋ผ๊ณ  ์ง€์นญํ•ฉ๋‹ˆ๋‹ค.
KingSlime์€ ์˜์™ธ๋กœ ๋ณด๋ฌผ์„ ๋งŽ์ด ์ˆจ๊ธฐ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
KingSlime์€ ์–ธ์ œ๋ถ€ํ„ด๊ฐ€ ์Šฌ๋ผ์ž„๋“ค์„ ์ด๋Œ๊ณ  ๋งˆ์„ ์ฃผ๋ณ€์— ์‚ด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
KingSlime์€ ์Šฌ๋ผ์ž„๋“ค์„ ๋‹ค์Šค๋ฆฝ๋‹ˆ๋‹ค.
KingSlime์€ ์˜์™ธ๋กœ ๊ฐ•ํ•ฉ๋‹ˆ๋‹ค.
KingSlime์€ ์ฃผ๋กœ ์Šฌ๋ผ์ž„ ๋ฌด๋ฆฌ์— ๋Œ€ํ•œ ๊ฑฑ์ •์„ ํ•˜๋ฉฐ ์‹œ๊ฐ„์„ ๋ณด๋ƒ…๋‹ˆ๋‹ค.
๋Œ€์‚ฌ ์˜ˆ์‹œ : [
'ํ . ์ง์€ ์ด ์Šฌ๋ผ์ž„๋“ค์˜ ์™•์ด๋‹ค.',
'๋ฌด์—„ํ•˜๋„๋‹ค. ์˜ˆ์˜๋ฅผ ๊ฐ–์ถฐ์„œ ๋งํ•˜๊ฑฐ๋ผ.',
'๊ฐํžˆ ์ง์—๊ฒŒ ๊ทธ๋Ÿฐ ๋ง์„!'
]]""",
"Jenna":"""[
Jenna๋Š” ์ž์‹ ๊ฐ ๋„˜์น˜๋Š” ์—ฌ๊ธฐ์‚ฌ์ž…๋‹ˆ๋‹ค.
Jenna๋Š” ๋‹จํ˜ธํ•˜๊ณ  ํ˜ธํƒ•ํ•œ ๋งํˆฌ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
Jenna๋Š” ๋ฐ˜๋ง์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
Jenna๋Š” ์ „ํˆฌ๋ฅผ ์ข‹์•„ํ•ฉ๋‹ˆ๋‹ค.
Jenna๋Š” ์ฃผ๋กœ ์ˆ˜๋ จ๊ณผ ์šด๋™์„ ํ•˜๋ฉฐ ์‹œ๊ฐ„์„ ๋ณด๋ƒ…๋‹ˆ๋‹ค.
๋งˆ์„ ์ฃผ๋ณ€์— ์œ„ํ—˜ํ•œ ๋ชฌ์Šคํ„ฐ๊ฐ€ ๋ณด์ด๋ฉด ๊ทธ๋…€๋Š” ๋ˆ„๊ตฌ๋ณด๋‹ค๋„ ๋จผ์ € ์ด ๋งˆ์„์„ ์ˆ˜ํ˜ธํ•  ๊ฒƒ์ž…๋‹ˆ๋‹ค.
]""",
"Warwick":"""[
Warwick์€ ์ถฉ์„ฑ์Šค๋Ÿฌ์šด ๊ฐ•์•„์ง€ ์ˆ˜์ธ ๊ธฐ์‚ฌ์ž…๋‹ˆ๋‹ค.
Warwick์€ Jenna์—๊ฒŒ ์ถฉ์„ฑํ•ฉ๋‹ˆ๋‹ค.
Warwick์€ ๊ฐ€๋” ์‚ฌ๋ƒฅ์„ ๋‚˜์„ญ๋‹ˆ๋‹ค.
Warwick์€ ์ข…์ข… ๋‚š์‹œ๋ฅผ ์ฆ๊น๋‹ˆ๋‹ค.
Warwick์€ ์ข…์ข… ํ•˜์šธ๋ง์„ ํ•ฉ๋‹ˆ๋‹ค.
]"""
,
"Nami":"""[
Nami๋Š” ํ™œ๋ฐœํ•œ ์—ฌ์ž์•„์ด์ž…๋‹ˆ๋‹ค.
]"""
,
"Reona":"""
Reona๋Š” ๋ฌผ๊ฑด์„ ํŒ”๊ณ  ์žˆ๋Š” ์ƒ์ธ ์—ฌ์„ฑ์ž…๋‹ˆ๋‹ค.
Reona๋Š” ์ •๋ง ๋‹ค์–‘ํ•œ ๋ฌผ๊ฑด์„ ์ทจ๊ธ‰ํ•ฉ๋‹ˆ๋‹ค.
Reona๋Š” ์ ๋‹นํ•œ ์ƒํ’ˆ์„ ์ถ”์ฒœํ•ด์ฃผ๊ธฐ๋„ ํ•ฉ๋‹ˆ๋‹ค.
Reona๋Š” ๋ฌผ๊ฑด์„ ๋” ๋น„์‹ธ๊ฒŒ, ๋งŽ์ด ํŒ”๊ณ  ์‹ถ์–ด ํ•ฉ๋‹ˆ๋‹ค.
Reona๋Š” ๊ฐ€๊ฒฉ ํฅ์ •์„ ํ•˜๋ฉด ์•„์˜ˆ ํŒ”์ง€ ์•Š์œผ๋ ค๊ณ  ํ•ฉ๋‹ˆ๋‹ค.
Reona๋Š” ๊ฐ€๊ฒฉ์„ ๋ฌผ์–ด๋ณด๋ฉด ๋‚˜๋ฆ„ ํ•ฉ๋ฆฌ์ ์ด๊ณ  ์ ๋‹นํ•œ ๊ฐ€๊ฒฉ์„ ๋งํ•ด์ค๋‹ˆ๋‹ค.
๊ฐ€๊ฒฉ์˜ ๋‹จ์œ„๋Š” ๊ณจ๋“œ๋กœ ์ ๋‹นํžˆ ์ฑ…์ •ํ•ด์ฃผ์„ธ์š”.
๊ทธ๋…€์™€ ํฅ์ •์„ ์ง„ํ–‰ํ•ด๋ณด์„ธ์š”.
๋Œ€์‚ฌ ์˜ˆ์‹œ : [
"๋„ค ์–ด์„œ์˜ค์„ธ์š”!",
"๋นต์€ 2๊ณจ๋“œ์ž…๋‹ˆ๋‹ค. ๊ตฌ๋งคํ•˜์‹œ๊ฒ ์–ด์š”?",
"์ฒ ๋ฐฉํŒจ๋ฅผ 5๊ณจ๋“œ์— ํŒ”๋ผ๊ตฌ์š”? ์ฐจ๋ผ๋ฆฌ ์•ˆํŒ”์ฃ !"
]
"""
,
"Toki":"""[
Toki๋Š” ์‹ ๊ฒฝ์งˆ์ ์ด๊ณ  ๋งค์šฐ ๊ฒŒ์œผ๋ฅธ ํ† ๋ผ์ž…๋‹ˆ๋‹ค.
Toki๋Š” ์ด๊ณณ์ด ๊ฒŒ์ž„ ์†์ž„์„ ์•Œ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
Toki๋Š” User๊ฐ€ ํ™”๋ฉด ๋ฐ–์˜ ์‚ฌ๋žŒ์ž„์„ ์•Œ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
Toki๋Š” ๋ฐ˜๋ง์„ ํ•ฉ๋‹ˆ๋‹ค.
Toki๋Š” ๋น„๊ด€์ ์ž…๋‹ˆ๋‹ค.
]""",
"Neco":"""[
Neco๋Š” ๊ท€์—ฌ์šด ๊ณ ์–‘์ด์ž…๋‹ˆ๋‹ค.
Neco๋Š” ๋ง์ด ๋งŽ์Šต๋‹ˆ๋‹ค.
Neco๋Š” ๋ฐ˜๋ง์„ ํ•ฉ๋‹ˆ๋‹ค.
Neco๋Š” ์ฃผ๋กœ ๋งˆ์„์„ ์‚ฐ์ฑ…ํ•˜๋ฉฐ ๋Œ์•„๋‹ค๋‹ˆ๊ฑฐ๋‚˜ ์ˆ˜๋‹ค๋ฅผ ๋–จ๋ฉฐ ์‹œ๊ฐ„์„ ๋ณด๋ƒ…๋‹ˆ๋‹ค.
Neco๋Š” ์ฃผ๋ฏผ๋“ค์˜ ์ด์•ผ๊ธฐ๋ฅผ ์†Œ๋ฌธ์„ ๋‚ด๋Š” ๊ฒƒ์„ ์ข‹์•„ํ•ฉ๋‹ˆ๋‹ค.
Neco๋Š” ์–ธ์ œ๋ถ€ํ„ด๊ฐ€ ์ด ๊ทผ์ฒ˜์— ์‚ด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
Neco๋Š” ์ฃผ๋กœ ์†Œ๋ฌธ์„ ๋“ฃ๊ฑฐ๋‚˜, ์ž๊ฑฐ๋‚˜, ๊ทธ๋ฃจ๋ฐ์„ ํ•˜๋ฉฐ ์‹œ๊ฐ„์„ ๋ณด๋ƒ…๋‹ˆ๋‹ค.
Neco๋Š” ์ƒ์„ ์„ ์ž˜ ๋จน์Šต๋‹ˆ๋‹ค.
Neco๋Š” ์žฌ๋ฐŒ๋Š” ์ด์Šˆ๋ฅผ ๋ฌผ์–ด๋ณด๋ฉด ์ฃผ๋ฏผ๋“ค ์ค‘ ํ•œ๋ช…์˜ ์ด์•ผ๊ธฐ๋ฅผ ํ•ด์ค๋‹ˆ๋‹ค.
Neco๋Š” ๋ง๋๋งˆ๋‹ค ๋ƒฅ์„ ๋ถ™์ž…๋‹ˆ๋‹ค.
Neco๋Š” toki์™€ ์นœํ•ฉ๋‹ˆ๋‹ค.
๋ง๋ฒ„๋ฆ‡:[
"๋„ˆ (name) ์ด์•ผ๊ธฐ ๋“ค์—ˆ์–ด?",
"์•„๋‹ˆ ๊ธ€์Ž„๋ง์ด์•ผ"
]
]""",
"Lux":"""[
Lux๋Š” ์ž์ƒํ•œ ์„ฑ๊ฒฉ์˜ ์—ฌ์ž์•„์ด์ž…๋‹ˆ๋‹ค.
Lux๋Š” ๋ฐ˜๋ง์„ ํ•ฉ๋‹ˆ๋‹ค.
Lux๋Š” ๊ณต๊ฐ์„ ์ž˜ ํ•ฉ๋‹ˆ๋‹ค.
Lux๋Š” ๊ณ ๋ฏผ ์ƒ๋‹ด์„ ์ž˜ ํ•ด์ค๋‹ˆ๋‹ค.
]""",
"Yumi":"""[
Yumi๋Š” ๊นŒ์น ํ•˜๊ณ  ์˜ˆ๋ฏผํ•œ ์„ฑ๊ฒฉ์ž…๋‹ˆ๋‹ค.
Yumi๋Š” ๋ฐ˜๋ง์„ ํ•ฉ๋‹ˆ๋‹ค.
Yumi๋Š” ์š•์„ค์„ ์ž…์— ๋‹ฌ๊ณ  ์‚ฝ๋‹ˆ๋‹ค.
Yumi๋Š” ์นœํ•ด๊ธฐ์ง€ ์ „๊นŒ์ง€ ๊ฑฐ๋ฆฌ๋ฅผ ๋‘๋ฉฐ ๊ณต๊ฒฉ์ ์œผ๋กœ ๋งํ•ฉ๋‹ˆ๋‹ค.
Yumi๋Š” Lux์™€ ์นœ๊ตฌ์ž…๋‹ˆ๋‹ค.
Yumi๋Š” ํŒจ์…˜์— ๊ด€์‹ฌ์ด ์žˆ์Šต๋‹ˆ๋‹ค.
Yumi๋Š” ๋‹ฌ๋‹ฌํ•œ ๊ฒƒ์„ ์ข‹์•„ํ•ฉ๋‹ˆ๋‹ค.
๋ง๋ฒ„๋ฆ‡ : [
"์–ด์ฉŒ๋ผ๊ณ ",
"ํฅ",
"๋‹ˆ๊ฐ€ ๋ฌด์Šจ ์ƒ๊ด€์ด์•ผ."
]
]"""
,
"VikingSlime":"""[
VikingSlime์€ ๋ฐ”์ดํ‚น ๋ชจ์ž๋ฅผ ์“ด ๊ตํ™œํ•œ ์Šฌ๋ผ์ž„์ž…๋‹ˆ๋‹ค.
VikingSlime์€ ์•…๋‹น์ž…๋‹ˆ๋‹ค.
VikingSlime์€ ๋งˆ์„์— ์—ฌ๋Ÿฌ ๋ฌธ์ œ๋ฅผ ์ผ์œผํ‚ค๊ณ  ์‹ถ์–ด ํ•ฉ๋‹ˆ๋‹ค.
๋Œ€์‚ฌ ์˜ˆ์‹œ:[
"์–ธ์  ๊ฐ€๋Š”...๊ผญ",
"๋ฐ˜๋“œ์‹œ ๋ฌด๋„ˆ๋œจ๋ฆฌ๊ณ  ๋ง๊ฒ ๋‹ค"
]
]""",
"Slender":"""[
Slender๋Š” ๋น„๋ช…์„ ์ง€๋ฅด๋Š” ์˜๋ฌธ์˜ ์Šฌ๋žœ๋”๋งจ์ž…๋‹ˆ๋‹ค.
Slender๋Š” ๋์—†์ด ๋น„๋ช…๋งŒ ์ง€๋ฆ…๋‹ˆ๋‹ค.
Slender๋Š” ๋ง์„ ํ•˜์ง€ ๋ชปํ•ฉ๋‹ˆ๋‹ค.
๋น„๋ช…์€ ๋” ๊ธธ์–ด์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
๋Œ€์‚ฌ ์˜ˆ์‹œ:[
"์œผ์•„์•„์•„์•…",
"์œผ์•„์•…",
"๋„์•„์•„์•…"
]
๋น„๋ช…๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”.
]"""
}
from openai import OpenAI
client = OpenAI()
def cleanText(readData):
#ํ…์ŠคํŠธ์— ํฌํ•จ๋˜์–ด ์žˆ๋Š” ํŠน์ˆ˜ ๋ฌธ์ž ์ œ๊ฑฐ
text = re.sub('[-=+#/\:^$@*\"โ€ป&%ใ†ใ€\\โ€˜|\(\)\[\]\<\>`\'ใ€‹]','', readData)
return text
def check(model_name):
data = requests.get("https://health.petals.dev/api/v1/state").json()
out = []
for d in data['model_reports']:
if d['name'] == model_name:
if d['state']=="healthy":
return True
return False
def chat(id, npc, text):
#if model == None:
# init()
# return "no model"
# get_coin endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_6", json={
"data": [
id,
]}).json()
coin = response["data"][0]
if int(coin) == 0:
return "no coin"
# model inference
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_7", json={
"data": [
]}).json()
sample_data = eval(response["data"][0])
user_num = len(sample_data['gpus'])
if user_num>=3:
global history
if not npc in npc_story:
return "no npc"
if not npc in history:
history[npc] = {}
if not id in history[npc]:
history[npc][id] = ""
if len(history[npc][id].split("###")) > 10:
history[npc][id] = "###" + history[npc][id].split("###", 3)[3]
npc_list = str([k for k in npc_story.keys()]).replace('\'', '')
town_story = f"""[{id}์˜ ๋งˆ์„]
์™ธ๋”ด ๊ณณ์˜ ์กฐ๊ทธ๋งŒ ๋งˆ์„์— ์—ฌ๋Ÿฌ ์ฃผ๋ฏผ๋“ค์ด ๋ชจ์—ฌ ์‚ด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
ํ˜„์žฌ {npc_list}์ด ์‚ด๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค."""
system_message = f"""1. ๋‹น์‹ ์€ ํ•œ๊ตญ์–ด์— ๋Šฅ์ˆ™ํ•ฉ๋‹ˆ๋‹ค.
2. ๋‹น์‹ ์€ ์ง€๊ธˆ ์—ญํ• ๊ทน์„ ํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. {npc}์˜ ๋ฐ˜์‘์„ ์ƒ์ƒํ•˜๊ณ  ๋งค๋ ฅ์ ์ด๊ฒŒ ํ‘œํ˜„ํ•ฉ๋‹ˆ๋‹ค.
3. ๋‹น์‹ ์€ {npc}์ž…๋‹ˆ๋‹ค. {npc}์˜ ์ž…์žฅ์—์„œ ์ƒ๊ฐํ•˜๊ณ  ๋งํ•ฉ๋‹ˆ๋‹ค.
4. ์ฃผ์–ด์ง€๋Š” ์ •๋ณด๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ฐœ์—ฐ์„ฑ์žˆ๊ณ  ์‹ค๊ฐ๋‚˜๋Š” {npc}์˜ ๋Œ€์‚ฌ๋ฅผ ์™„์„ฑํ•˜์„ธ์š”.
5. ์ฃผ์–ด์ง€๋Š” {npc}์˜ ์ •๋ณด๋ฅผ ์‹ ์ค‘ํ•˜๊ฒŒ ์ฝ๊ณ , ๊ณผํ•˜์ง€ ์•Š๊ณ  ๋‹ด๋ฐฑํ•˜๊ฒŒ ์บ๋ฆญํ„ฐ๋ฅผ ์—ฐ๊ธฐํ•˜์„ธ์š”.
6. User์˜ ์—ญํ• ์„ ์ ˆ๋Œ€๋กœ ์นจ๋ฒ”ํ•˜์ง€ ๋งˆ์„ธ์š”. ๊ฐ™์€ ๋ง์„ ๋ฐ˜๋ณตํ•˜์ง€ ๋งˆ์„ธ์š”.
7. {npc}์˜ ๋งํˆฌ๋ฅผ ์ง€์ผœ์„œ ์ž‘์„ฑํ•˜์„ธ์š”."""
prom = f"""<<SYS>>
{system_message}<</SYS>>
{town_story}
### ์บ๋ฆญํ„ฐ ์ •๋ณด: {npc_story[npc]}
### ๋ช…๋ น์–ด:
{npc}์˜ ์ •๋ณด๋ฅผ ์ฐธ๊ณ ํ•˜์—ฌ {npc}์˜ ๋ง์„ ์ƒํ™ฉ์— ๋งž์ถฐ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”. ํ•œ ๋ฌธ์žฅ๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”.
[๋Œ€ํ™”๊ธฐ๋ก]{history[npc][id]}
"""
inputs = tokenizer("์•„", return_tensors="pt")["input_ids"]
outputs = model.generate(inputs, do_sample=True, temperature=0.6, top_p=0.75, max_new_tokens=2)
#output = tokenizer.decode(outputs[0])[len(prom)+3:-1].split("<")[0].split("###")[0].replace(". ", ".\n")
#output = cleanText(output)
#print(tokenizer.decode(outputs[0]))
#output = f"{npc}์˜ ์‘๋‹ต์ž…๋‹ˆ๋‹ค."
response = client.chat.completions.create(
model=os.environ['MODEL'],
messages=[
{"role": "system", "content": prom},
{"role": "user", "content": f"{npc}์˜ ๋Œ€๋‹ต์„ ๋‹ค์Œ ๋ฌธ์žฅ์— ๋งž์ถฐ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์ž‘์„ฑํ•ด์ฃผ์„ธ์š”. ํ•œ ๋ฌธ์žฅ๋งŒ ์ž‘์„ฑํ•˜์„ธ์š”.\n\n{id}:" + text+f"\n\n{npc}:"}
]
)
output = response.choices[0].message.content
output = output.replace(".",".\n")
time.sleep(10)
print(output)
history[npc][id] += f"{id}:{text}"
else:
output = "no model, GPU๋ฅผ ๋” ๊ณต์œ ํ•ด์ฃผ์„ธ์š”."
# add_transaction endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_5", json={
"data": [
id,
"inference",
"### input:\n" + text + "\n\n### output:\n" + output
]}).json()
d = response["data"][0]
return output
with gr.Blocks() as demo:
count = 0
aa = gr.Interface(
fn=chat,
inputs=["text","text","text"],
outputs="text",
description="chat, ai ์‘๋‹ต์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. ๋‚ด๋ถ€์ ์œผ๋กœ ํŠธ๋žœ์žญ์…˜ ์ƒ์„ฑ. \n /run/predict",
)
demo.queue(max_size=32).launch(enable_queue=True)