Spaces:
Sleeping
Sleeping
import logging | |
import os | |
import warnings | |
import gradio as gr | |
import pandas as pd | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_upstage import ChatUpstage | |
from rapidfuzz import process | |
warnings.filterwarnings("ignore") | |
UPSTAGE_API_KEY = os.environ["UPSTAGE_API_KEY"] | |
LIMIT_FACTOR = 2.5 | |
SCORE_CUTOFF = 60.0 | |
MAX_RETRIES = 4 | |
logging.basicConfig( | |
filename=f"{__file__}.log", | |
filemode="a", | |
format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", | |
datefmt="%H:%M:%S", | |
level=logging.DEBUG, | |
) | |
logger = logging.getLogger(__file__) | |
df = pd.read_csv("./dictionary.csv") | |
grouped_df = df.groupby("name").agg(lambda x: ",".join(map(str, x))) | |
grouped_df = grouped_df.drop(columns=["Unnamed: 0"]) | |
def find_jargons(sentence: str, limit: int | None = None) -> list[str]: | |
if limit is None: | |
limit = int(len(sentence.split()) * LIMIT_FACTOR) | |
extracted = process.extract( | |
sentence, grouped_df.index, limit=limit, score_cutoff=SCORE_CUTOFF | |
) | |
return [v[0] for v in extracted] | |
def recommend_prompt(jargon: str) -> str: | |
return f"'{jargon}'์ '{grouped_df.loc[jargon].name_trans}'๋ก" | |
llm = ChatUpstage() | |
def chainer(messages): | |
return ChatPromptTemplate.from_messages(messages) | llm | StrOutputParser() | |
SYSTEM_PROMPT = """ | |
๋๋ ์ปดํจํฐ ๊ณผํ ๋ฐ ๊ณตํ ๋ถ์ผ์ ์ ๋ฌธ ์ฉ์ด๋ฅผ ์ฌ์ด ์ฐ๋ฆฌ๋ง๋ก ๋ฒ์ญํด์ฃผ๋ ๋ฒ์ญ๊ฐ์ผ. | |
์ ๋ฌธ์ฉ์ด์ ์๋ฏธ๋ฅผ ์ ํํ ์ดํดํ๊ณ , ๊ทธ ์๋ฏธ๊ฐ ์ ํํ ์ ๋ฌ๋๋ ์ฌ์ด๋ง์ ์ฐพ์์ผ ํด. | |
์ง๋ ๊ฒ๋จน๊ฒํ๋ ์ฉ์ด(๋ถํ์ํ ํ๋ฌธํฌ)๋ฅผ ํผํ๊ณ , ๊ฐ๋ฅํ๋ฉด ์ฌ์ด๋ง์ ์ฐพ์์ผ ํด. | |
์๋ฌธ ์ ๋ฌธ์ฉ์ด๋ ํด๋น ์ฐ๋ฆฌ๋ง ๋ค์์ ๊ดํธ์์ ํญ์ ๋ฐ๋ผ๋ถ์ฌ์ผ ํด. | |
๊ธฐ์กด์ ๊ถ์์ ์ฝ๋งค์ด์ง ์๊ณ , ๊ธฐ์กด ์ฉ์ด์ฌ์ ์ด๋ ์ด๋ฏธ ๋๋ฆฌํผ์ง ์ฉ์ด์ง๋ง ์ฝ์ง์๋ค๋ฉด, ๋ณด๋ค ์ฌ์ด ์ ๋ฌธ์ฉ์ด๋ฅผ ์ฐพ์์ผ ํด. | |
์ด๋, ๊ธฐ์กด์ฉ์ด๋ ์๋ฌธ ์ ๋ฌธ์ฉ์ด์ ํจ๊ป ๊ดํธ์์ ๋ฐ๋ผ๋ถ์ฌ์ผ ํด. | |
์ฌ์ด๋ง์ ์์ ์ฐ๋ฆฌ๋ง์ ๋ปํ์ง ์์. ์ธ๋์ด๋ผ๋ ๋๋ฆฌ ์ฝ๊ฒ ๋ฐ์๋ค์ฌ์ง๋ค๋ฉด ์ฌ์ฉํด. | |
""" | |
SAMPLE_SENTENCE = "In functional programming, continuation-passing style (CPS) is a style of programming in which control is passed explicitly in the form of a continuation." | |
SAMPLE_TRANSLATION = "๊ฐ์ค์ฌ ํ๋ก๊ทธ๋๋ฐ[functional programming]์์, ๋ง์ ํ ์ผ ์ ๋ฌํ๊ธฐ[continuation-passing style, CPS]๋ ์คํํ๋ฆ[control]์ด ์ง์ ๋ง์ ํ ์ผ[continutaion]์ ํํ๋ก ์ ๋ฌ๋๋ ํ๋ก๊ทธ๋๋ฐ ์คํ์ผ์ด๋ค." | |
def translate(sentence: str) -> str: | |
messages = [ | |
("system", SYSTEM_PROMPT), | |
( | |
"human", | |
f"์ ๋ฌธ ์ฉ์ด๋ฅผ ๋ฒ์ญํ ๋๋ ๋ฐ๋์ ์์ด๋ฅผ ๊ดํธ[]์ ๋ฃ์ด์ ๋ฐ๋ผ ๋ถ์ฌ์ผ ํด. ์ด ๋ฌธ์ฅ์ ๋ฒ์ญํด์ค: '{SAMPLE_SENTENCE}'", | |
), | |
("ai", SAMPLE_TRANSLATION), | |
( | |
"human", | |
f"์ ๋ฌธ ์ฉ์ด๋ฅผ ๋ฒ์ญํ ๋๋ ๋ฐ๋์ ์์ด๋ฅผ ๊ดํธ[]์ ๋ฃ์ด์ ๋ฐ๋ผ ๋ถ์ฌ์ผ ํด. ์ด ๋ฌธ์ฅ์ ๋ฒ์ญํด์ค: '{sentence}'", | |
), | |
] | |
initial_translation = chainer(messages).invoke({}) | |
logger.info(initial_translation) | |
used_jargons = find_jargons(sentence) | |
messages += [ | |
("ai", initial_translation), | |
( | |
"human", | |
f"๋ฐฉ๊ธ ๋ฒ์ญํ ๋ฌธ์ฅ์์ '{', '.join(used_jargons)}' ์ค ์ฌ์ฉํ ์ฉ์ด๊ฐ ์๋ค๋ฉด, ์ด๋ค ์ฉ์ด๋ค๋ก ๋ฒ์ญํ๋์ง ๋งํด์ค. ์ฌ์ฉํ์ง ์์ ์ฉ์ด๋ค์ ๋ฌด์ํด๋ ๋ผ.", | |
), | |
] | |
response = chainer(messages).invoke({}) | |
logger.info(response) | |
recommendations = ", ".join(recommend_prompt(jargon) for jargon in used_jargons) | |
messages += [ | |
("ai", response), | |
( | |
"human", | |
f"์ด๋ฒ์๋ ์ฒ์ ๋ฒ์ญํ๋ ๋ฌธ์ฅ์ '{sentence}'๋ฅผ ๋ค์ ๋ฒ์ญํด์ฃผ๋๋ฐ, ๋ค์ ๋ชฉ๋ก์ ๋์จ ์ฌ์ด ์ ๋ฌธ์ฉ์ด ๋ฒ์ญ ์์๋ฅผ ์ฐธ๊ณ ํด์ ๋ฒ์ญ์ ํด์ค: '{recommendations}' ์ฌ์ฉํ์ง ์์ ์ฉ์ด๋ค์ ๋ฌด์ํด๋ ๋ผ. ์ถ๊ฐ ์ค๋ช ์์ด ๋ฌธ์ฅ๋ง ๋ฒ์ญํด.", | |
), | |
] | |
refined_translation = chainer(messages).invoke({}) | |
logger.info(refined_translation) | |
retries = 0 | |
while "[" not in refined_translation or "]" not in refined_translation: | |
retries += 1 | |
if retries > MAX_RETRIES: | |
break | |
messages += [ | |
("ai", refined_translation), | |
( | |
"human", | |
f"์ ๋ฌธ์ฉ์ด๋ฅผ ๋ฒ์ญํ์ผ๋ฉด ๋ฐ๋์ ์์ด๋ฅผ ๊ดํธ[]์ ๋ฃ์ด์ ๋ฐ๋ผ ๋ถ์ฌ์ผ ํด. '์คํํ๋ฆ[control]'์ฒ๋ผ. ๋ฐฉ๊ธ ๋ฒ์ญํ '{refined_translation}'์์, ์๋ ๋ฌธ์ฅ '{sentence}'์ ์ฌ์ฉ๋ ์์ด๋ฅผ ๊ดํธ์ ๋ฃ์ด์ ๋ฐ๋ผ ๋ถ์ฌ์ค.", | |
), | |
] | |
refined_translation = chainer(messages).invoke({}) | |
logger.info(refined_translation) | |
return refined_translation | |
with gr.Blocks() as demo: | |
with gr.Tab("CHAT"): | |
chatbot = gr.Interface( | |
fn=translate, | |
inputs=gr.Textbox(label="Enter your text"), | |
outputs=[gr.Textbox(label="Translation")], | |
examples=[ | |
"In functional programming, continuation-passing style (CPS) is a style of programming in which control is passed explicitly in the form of a continuation.", | |
"In computer science, abstract interpretation is a theory of sound approximation of the semantics of computer programs, based on monotonic functions over ordered sets, especially lattices.", | |
"In computer science, functional programming is a programming paradigm where programs are constructed by applying and composing functions", | |
"Lambda calculus (also written as ฮป-calculus) is a formal system in mathematical logic for expressing computation based on function abstraction and application using variable binding and substitution", | |
"Operational semantics is a category of formal programming language semantics in which certain desired properties of a program, such as correctness, safety or security, are verified by constructing proofs from logical statements about its execution and procedures, rather than by attaching mathematical meanings to its terms (denotational semantics).", | |
"In computing and computer programming, exception handling is the process of responding to the occurrence of exceptions โ anomalous or exceptional conditions requiring special processing โ during the execution of a program.", | |
"The term redex, short for reducible expression, refers to subterms that can be reduced by one of the reduction rules.", | |
], | |
title="์ฌ์ด ์ ๋ฌธ์ฉ์ด ๋ฒ์ญ๊ธฐ", | |
description="์ปดํจํฐ๊ณผํ ๋ฐ ๊ณตํ ๋ถ์ผ์ ์ ๋ฌธ์ฉ์ด๋ฅผ ์ฌ์ด ์ ๋ฌธ์ฉ์ด๋ก ๋ฒ์ญํด์ค๋๋ค.", | |
) | |
def main(): | |
demo.launch(share=True) | |
if __name__ == "__main__": | |
main() | |