ldhldh's picture
Update app.py
d15397c
from threading import Thread
from huggingface_hub import hf_hub_download
import torch
import gradio as gr
import re
import asyncio
import requests
import shutil
from langchain import PromptTemplate, LLMChain
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.llms import OpenAI
llm = OpenAI(model_name='gpt-3.5-turbo-instruct')
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on device:", torch_device)
print("CPU threads:", torch.get_num_threads())
loader = PyPDFLoader("total.pdf")
pages = loader.load()
# 데이터λ₯Ό λΆˆλŸ¬μ™€μ„œ ν…μŠ€νŠΈλ₯Ό μΌμ •ν•œ 수둜 λ‚˜λˆ„κ³  κ΅¬λΆ„μžλ‘œ μ—°κ²°ν•˜λŠ” μž‘μ—…
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=0)
texts = text_splitter.split_documents(pages)
print(f"λ¬Έμ„œμ— {len(texts)}개의 λ¬Έμ„œλ₯Ό 가지고 μžˆμŠ΅λ‹ˆλ‹€.")
# μž„λ² λ”© λͺ¨λΈ λ‘œλ“œ
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
# λ¬Έμ„œμ— μžˆλŠ” ν…μŠ€νŠΈλ₯Ό μž„λ² λ”©ν•˜κ³  FAISS 에 인덱슀λ₯Ό ꡬ좕함
index = FAISS.from_documents(
documents=texts,
embedding=embeddings,
)
# faiss_db 둜 λ‘œμ»¬μ— μ €μž₯ν•˜κΈ°
index.save_local("")
# faiss_db 둜 λ‘œμ»¬μ— λ‘œλ“œν•˜κΈ°
docsearch = FAISS.load_local("", embeddings)
embeddings_filter = EmbeddingsFilter(
embeddings=embeddings,
similarity_threshold=0.7,
k = 2,
)
# μ••μΆ• 검색기 생성
compression_retriever = ContextualCompressionRetriever(
# embeddings_filter μ„€μ •
base_compressor=embeddings_filter,
# retriever λ₯Ό ν˜ΈμΆœν•˜μ—¬ 검색쿼리와 μœ μ‚¬ν•œ ν…μŠ€νŠΈλ₯Ό 찾음
base_retriever=docsearch.as_retriever()
)
id_list = []
history = []
customer_data_list = []
customer_agree_list = []
context = "{context}"
question = "{question}"
def gen(x, id, customer_data):
index = 0
matched = 0
count = 0
for s in id_list:
if s == id:
matched = 1
break;
index += 1
if matched == 0:
index = len(id_list)
id_list.append(id)
customer_data_list.append(customer_data)
if x != "μ•½κ΄€λ™μ˜_λ™μ˜ν•¨":
customer_agree_list.append("No")
history.append('상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n')
bot_str = "* λ¨Όμ € κ°œμΈμ •λ³΄ 이용 약관에 λ™μ˜ν•˜μ…”μ•Ό μ›ν™œν•œ 상담을 진행할 수 μžˆμŠ΅λ‹ˆλ‹€. \n무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?"
else:
customer_agree_list.append("Yes")
history.append('상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n')
bot_str = f"κ°œμΈμ •λ³΄ ν™œμš©μ— λ™μ˜ν•˜μ…¨μŠ΅λ‹ˆλ‹€. κ°€μž… λ³΄ν—˜μ„ μ‘°νšŒν•©λ‹ˆλ‹€.\n\nν˜„μž¬ κ³ κ°λ‹˜κ»˜μ„œ κ°€μž…λœ λ³΄ν—˜μ€ {customer_data}μž…λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
if x == "μ΄ˆκΈ°ν™”":
if customer_agree_list[index] != "No":
customer_data_list[index] = customer_data
bot_str = f"λŒ€ν™”κΈ°λ‘μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.\n\nν˜„μž¬ κ³ κ°λ‹˜κ»˜μ„œ κ°€μž…λœ λ³΄ν—˜μ€ {customer_data}μž…λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
customer_data_list[index] = "κ°€μž…μ •λ³΄μ—†μŒ"
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n'
bot_str = f"λŒ€ν™”κΈ°λ‘μ΄ λͺ¨λ‘ μ΄ˆκΈ°ν™”λ˜μ—ˆμŠ΅λ‹ˆλ‹€.\n\n* λ¨Όμ € κ°œμΈμ •λ³΄ 이용 약관에 λ™μ˜ν•˜μ…”μ•Ό μ›ν™œν•œ 상담을 진행할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
elif x == "κ°€μž…μ •λ³΄":
if customer_agree_list[index] == "No":
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n'
bot_str = f"* λ¨Όμ € κ°œμΈμ •λ³΄ 이용 약관에 λ™μ˜ν•˜μ…”μ•Ό μ›ν™œν•œ 상담을 진행할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n'
bot_str = f"ν˜„μž¬ κ³ κ°λ‹˜κ»˜μ„œ κ°€μž…λœ λ³΄ν—˜μ€ {customer_data_list[index]}μž…λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
elif x == "μ•½κ΄€λ™μ˜_λ™μ˜ν•¨":
if customer_agree_list[index] == "No":
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n'
customer_agree_list[index] = "Yes"
customer_data_list[index] = customer_data
bot_str = f"κ°œμΈμ •λ³΄ ν™œμš©μ— λ™μ˜ν•˜μ…¨μŠ΅λ‹ˆλ‹€. κ°€μž… λ³΄ν—˜μ„ μ‘°νšŒν•©λ‹ˆλ‹€.\n\nν˜„μž¬ κ³ κ°λ‹˜κ»˜μ„œ κ°€μž…λœ λ³΄ν—˜μ€ {customer_data}μž…λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n'
bot_str = f"이미 약관에 λ™μ˜ν•˜μ…¨μŠ΅λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
elif x == "μ•½κ΄€λ™μ˜_λ™μ˜μ•ˆν•¨":
if customer_agree_list[index] == "Yes":
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n'
customer_agree_list[index] = "No"
customer_data_list[index] = "κ°€μž…μ •λ³΄μ—†μŒ"
bot_str = f"* κ°œμΈμ •λ³΄ ν™œμš© λ™μ˜λ₯Ό μ·¨μ†Œν•˜μ…¨μŠ΅λ‹ˆλ‹€. 이제 κ°€μž… λ³΄ν—˜μ„ μ‘°νšŒν•  수 μ—†μŠ΅λ‹ˆλ‹€.\n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
history[index] = '상담원:무엇을 λ„μ™€λ“œλ¦΄κΉŒμš”?\n\n'
bot_str = f"* κ°œμΈμ •λ³΄ ν™œμš©μ„ κ±°μ ˆν•˜μ…¨μŠ΅λ‹ˆλ‹€. κ°€μž… λ³΄ν—˜μ„ μ‘°νšŒν•  수 μ—†μŠ΅λ‹ˆλ‹€. \n\nκΆκΈˆν•˜μ‹  것이 μžˆμœΌμ‹ κ°€μš”?"
return bot_str
else:
context = "{context}"
question = "{question}"
if customer_agree_list[index] == "No":
customer_data_newline = "ν˜„μž¬ κ°€μž…μ •λ³΄λ₯Ό μ‘°νšŒν•  수 μ—†μŠ΅λ‹ˆλ‹€. κ°€μž…ν•  수 μžˆλŠ” κ΄€λ ¨ λ³΄ν—˜μ„ μœ„μ˜ λͺ©λ‘μ—μ„œ μ†Œκ°œν•΄μ£Όμ„Έμš”."
else:
customer_data_newline = customer_data_list[index].replace(",","\n")
prompt_template = f"""당신은 λ³΄ν—˜ μƒλ‹΄μ›μž…λ‹ˆλ‹€. μ•„λž˜μ— 전체 λ³΄ν—˜ λͺ©λ‘, 질문과 κ΄€λ ¨λœ μ•½κ΄€ 정보, 고객의 λ³΄ν—˜ κ°€μž… 정보, 고객과의 상담기둝이 μ£Όμ–΄μ§‘λ‹ˆλ‹€. μš”μ²­μ„ 적절히 μ™„λ£Œν•˜λŠ” 응닡을 μž‘μ„±ν•˜μ„Έμš”. μ™„μ„±λœ λ¬Έμž₯으둜 κ°„κ²°νžˆ λ‹΅ν•˜μ„Έμš”.
[전체 λ³΄ν—˜ λͺ©λ‘]
λΌμ΄ν”„ν”Œλž˜λ‹›μ •κΈ°λ³΄ν—˜β…‘
λΌμ΄ν”„ν”Œλž˜λ‹›μ’…μ‹ λ³΄ν—˜
λΌμ΄ν”„ν”Œλž˜λ‹›μƒν•΄λ³΄ν—˜
λ§ŒκΈ°κΉŒμ§€λΉ„κ°±μ‹ μ•”λ³΄ν—˜β…‘
λΌμ΄ν”„ν”Œλž˜λ‹›μ•”λ³΄ν—˜β…’
μ•”Β·λ‡ŒΒ·μ‹¬μž₯κ±΄κ°•λ³΄ν—˜
λ‡ŒΒ·μ‹¬μž₯κ±΄κ°•λ³΄ν—˜
μ—¬μ„±κ±΄κ°•λ³΄ν—˜
κ±΄κ°•μΉ˜μ•„λ³΄ν—˜
μž…μ›λΉ„λ³΄ν—˜
μˆ˜μˆ λΉ„λ³΄ν—˜
λΌμ΄ν”„ν”Œλž˜λ‹›ν”ŒλŸ¬μŠ€μ–΄λ¦°μ΄λ³΄ν—˜β…‘
λΌμ΄ν”„ν”Œλž˜λ‹›ν”ŒλŸ¬μŠ€μ–΄λ¦°μ΄μ’…ν•©λ³΄ν—˜
λΌμ΄ν”„ν”Œλž˜λ‹›μ—λ“€μΌ€μ–΄μ €μΆ•λ³΄ν—˜β…‘
λΌμ΄ν”„ν”Œλž˜λ‹›μ—°κΈˆμ €μΆ•λ³΄ν—˜β…‘
1λ…„λΆ€ν„°μ €μΆ•λ³΄ν—˜
λΌμ΄ν”„ν”Œλž˜λ‹›μ—°κΈˆλ³΄ν—˜β…‘
고객은 λ³΄ν—˜ λͺ©λ‘κ³Ό 약관을 λ³Ό 수 μ—†μŠ΅λ‹ˆλ‹€. 직접 μ œμ‹œν•˜μ—¬ μ†Œκ°œν•˜μ„Έμš”.
{context}
[고객의 κ°€μž… 정보]
{customer_data_newline}
### λͺ…λ Ήμ–΄:
μ£Όμ–΄μ§€λŠ” 이전 λŒ€ν™”λ₯Ό 보고 λ§₯락을 νŒŒμ•…ν•˜μ—¬ μƒλ‹΄μ›μœΌλ‘œμ„œ κ³ κ°μ—κ²Œ ν•„μš”ν•œ 정보λ₯Ό μ΅œλŒ€ν•œ κΈΈκ³  μžμ„Έν•˜κ³  μΉœμ ˆν•˜κ²Œ μ œκ³΅ν•˜μ„Έμš”. 일반적인 λ³΄ν—˜ κ΄€λ ¨ 지식은 ν•΄λ‹Ή λ‚΄μš©λ§Œ κ°„κ²°νžˆ λ‹΅λ³€ν•˜μ„Έμš”.
### 질문:
{question}
### μž…λ ₯:
[이전 λŒ€ν™”]
{history[index]}
### 응닡:
"""
# RetrievalQA 클래슀의 from_chain_typeμ΄λΌλŠ” 클래슀 λ©”μ„œλ“œλ₯Ό ν˜ΈμΆœν•˜μ—¬ μ§ˆμ˜μ‘λ‹΅ 객체λ₯Ό 생성
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=compression_retriever,
return_source_documents=False,
verbose=True,
chain_type_kwargs={"prompt": PromptTemplate(
input_variables=["context","question"],
template=prompt_template,
)},
)
if customer_agree_list[index] == "No":
query=f"{x}"
else:
query=f"{x}"
response = qa({"query":query})
output_str = response['result'].rsplit(".",1)[0] + "."
if output_str.split(":")[0]=="상담원":
output_str = output_str.split(":")[1]
history[index] += f"고객:{x}\n\n상담원:{output_str}\n\n"
if customer_agree_list[index] == "No":
output_str = f"* λ¨Όμ € κ°œμΈμ •λ³΄ 이용 약관에 λ™μ˜ν•˜μ…”μ•Ό μ›ν™œν•œ 상담을 진행할 수 μžˆμŠ΅λ‹ˆλ‹€.\n\n" + output_str
return output_str
def reset_textbox():
return gr.update(value='')
with gr.Blocks() as demo:
gr.Markdown(
"duplicated from beomi/KoRWKV-1.5B, baseModel:Llama-2-ko-7B-chat-gguf-q4_0"
)
with gr.Row():
with gr.Column(scale=4):
user_text = gr.Textbox(
placeholder='μž…λ ₯',
label="User input"
)
model_output = gr.Textbox(label="Model output", lines=10, interactive=False)
button_submit = gr.Button(value="Submit")
with gr.Column(scale=1):
id_text = gr.Textbox(
placeholder='772727',
label="User id"
)
customer_data = gr.Textbox(
placeholder='(무)1λ…„λΆ€ν„°μ €μΆ•λ³΄ν—˜, (무)μˆ˜μˆ λΉ„λ³΄ν—˜',
label="customer_data"
)
button_submit.click(gen, [user_text, id_text, customer_data], model_output)
demo.queue().launch(enable_queue=True)