GodSaveMoney / core /make_reply.py
Jeong-hun Kim
model test2
26f9554
import re
# ์ƒ์„ฑ๋œ ๋ชจ๋“  ๋ด‡ ์‘๋‹ต ๊ธฐ๋ก
def generate_reply(ctx, makePipeLine):
# ์ตœ์ดˆ ์‘๋‹ต
response = generate_valid_response(ctx, makePipeLine)
ctx.addHistory("bot", response)
# ๋ถˆ์•ˆ์ •ํ•œ ์‘๋‹ต์ด ์œ ๋„๋˜๋ฏ€๋กœ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ
'''
# ์‘๋‹ต์ด ๋Š๊ฒผ๋‹ค๋ฉด ์ถ”๊ฐ€ ์ƒ์„ฑ
if is_truncated_response(response):
continuation = generate_valid_response(ctx, makePipeLine, response)
ctx.addHistory("bot", continuation)
'''
# ๋ด‡ ์‘๋‹ต 1ํšŒ ์ƒ์„ฑ
def generate_valid_response(ctx, makePipeline) -> str:
user_name = ctx.getUserName()
bot_name = ctx.getBotName()
while True:
prompt = build_prompt(ctx.getHistory(), user_name, bot_name)
print("\n==========[DEBUG: Prompt]==========")
print(prompt)
print("===================================\n")
full_text = makePipeline.character_chat(prompt)
response = extract_response(full_text)
if is_valid_response(response, user_name, bot_name):
break
return clean_response(response, bot_name)
# ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ ์ •๋ฆฌ
def build_prompt(history, user_name, bot_name):
with open("assets/prompt/init.txt", "r", encoding="utf-8") as f:
system_prompt = f.read().strip()
# ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (ChatML ์Šคํƒ€์ผ)
prompt = f"<|system|>\n{system_prompt}\n\n"
for turn in history[-16:]:
if turn["role"] == "user":
prompt += f"<|user|>\n{turn['text']}\n\n"
else:
prompt += f"<|assistant|>\n{turn['text']}\n\n"
# ๋งˆ์ง€๋ง‰์— assistant ์‘๋‹ต ์œ ๋„
prompt += "<|assistant|>\n"
return prompt
# ์ถœ๋ ฅ์—์„œ ์‘๋‹ต ์ถ”์ถœ (HyperCLOVAX ํฌ๋งท์— ๋งž๊ฒŒ)
def extract_response(full_text):
# '### Response:' ์ดํ›„ ํ…์ŠคํŠธ ์ถ”์ถœ
if "### Response:" in full_text:
reply = full_text.split("### Response:")[-1].strip()
else:
reply = full_text.strip()
return reply
# ์‘๋‹ต ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ
def is_valid_response(text: str, user_name, bot_name) -> bool:
if user_name + ":" in text:
return False
return True
# ์ถœ๋ ฅ ์ •์ œ
def clean_response(text: str, bot_name):
# bot_name ์ œ๊ฑฐ
text = re.sub(rf"{bot_name}:\s*", "", text).strip()
# ๋ฏธ์™„์„ฑ ๋ฌธ์žฅ ์ œ๊ฑฐ
return clean_truncated_response(text)
# ๋ฏธ์™„์„ฑ ๋ฌธ์žฅ ์‚ญ์ œ
def clean_truncated_response(text: str) -> str:
"""
์‘๋‹ต ํ…์ŠคํŠธ๊ฐ€ ๋ฏธ์™„์„ฑ๋œ ๋ฌธ์žฅ์œผ๋กœ ๋๋‚˜๋ฉด ๋งˆ์ง€๋ง‰ ๋ฌธ์žฅ์„ ์ œ๊ฑฐํ•˜์—ฌ ๋ฐ˜ํ™˜,
๊ทธ๋ ‡์ง€ ์•Š์œผ๋ฉด ์›๋ฌธ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜.
"""
# ๋ฌธ์žฅ ๋ถ„๋ฆฌ ('.', '?', '!', '~' ๋“ฑ ๊ธฐ์ค€ + ์ค„๋ฐ”๊ฟˆ ํฌํ•จ)
sentence_end_pattern = r"(?<=[\.?!~])\s|\n"
segments = re.split(sentence_end_pattern, text.strip())
if not segments:
return text.strip()
cleaned = []
for s in segments:
s = s.strip()
if not s:
continue
# ๋ฌธ์žฅ ๋ถ€ํ˜ธ๋กœ ๋๋‚˜๋Š” ๊ฒฝ์šฐ๋งŒ ํฌํ•จ
if re.search(r"[.?!~โ€ฆ\u2026\u2639\u263A\u2764\uD83D\uDE0A\uD83D\uDE22]$", s):
cleaned.append(s)
else:
break # ๋ถˆ์™„์ „ํ•œ ๋ฌธ์žฅ์ด๋ฏ€๋กœ ์ดํ›„ ๋ชจ๋‘ ์ œ๊ฑฐ
# ๋งŒ์•ฝ ๋ชจ๋“  ๋ฌธ์žฅ์ด ๋๋งบ์Œ์„ ์ž˜ ํ–ˆ๋‹ค๋ฉด โ†’ ์›๋ฌธ ๋ฐ˜ํ™˜
result = " ".join(cleaned)
return result if result != "" else text.strip()