Spaces:
Runtime error
Runtime error
File size: 4,528 Bytes
6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 6a11f08 396c3e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import openai
import os
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from dotenv import load_dotenv
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
from PyPDF2 import PdfWriter
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv('my_secret')
openai.api_key = os.getenv('my_secret')
## Load PDF file
loader = PyPDFLoader("docs.pdf")
documents = loader.load()
## Split Document
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
## token -> Vector Embedding
embeddings = OpenAIEmbeddings()
vector_store = Chroma.from_documents(texts, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
## Build LLM Chain
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever = retriever,
return_source_documents=True)
system_template="""Use the following pieces of context to answer the users question shortly.
Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
If you don't know the answer, just say that "I don't know", don't try to make up an answer.
----------------
{summaries}
You MUST answer in Korean and in Markdown format:"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
############################
## Local μμ μ λλμ§ νμΈ
chain_type_kwargs = {"prompt": prompt}
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
chain = RetrievalQAWithSourcesChain.from_chain_type(
llm=llm,
chain_type="stuff",
retriever = retriever,
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs
)
query = "ν볡ν μΈμμ΄λ?"
result = chain(query)
for doc in result['source_documents']:
print('λ΄μ© : ' + doc.page_content[0:100].replace('\n', ' '))
print('νμΌ : ' + doc.metadata['source'])
print('νμ΄μ§ : ' + str(doc.metadata['page']))
##############################
## Define response method
def respond(message, chat_history): # μ±ν
λ΄μ μλ΅μ μ²λ¦¬νλ ν¨μλ₯Ό μ μν©λλ€.
result = chain(message)
bot_message = result['answer']
for i, doc in enumerate(result['source_documents']):
bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
chat_history.append((message, bot_message)) # μ±ν
κΈ°λ‘μ μ¬μ©μμ λ©μμ§μ λ΄μ μλ΅μ μΆκ°ν©λλ€.
return "", chat_history # μμ λ μ±ν
κΈ°λ‘μ λ°νν©λλ€.
## Build Gradio App
with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ¬μ©νμ¬ μΈν°νμ΄μ€λ₯Ό μμ±ν©λλ€.
gr.Markdown("# μλ
νμΈμ. μΈμ΄λ
Έμ λνν΄λ³΄μΈμ.")
chatbot = gr.Chatbot(label="μ±ν
μ°½") # 'μ±ν
μ°½'μ΄λΌλ λ μ΄λΈμ κ°μ§ μ±ν
λ΄ μ»΄ν¬λνΈλ₯Ό μμ±ν©λλ€.
msg = gr.Textbox(label="μ
λ ₯") # 'μ
λ ₯'μ΄λΌλ λ μ΄λΈμ κ°μ§ ν
μ€νΈλ°μ€λ₯Ό μμ±ν©λλ€.
clear = gr.Button("μ΄κΈ°ν") # 'μ΄κΈ°ν'λΌλ λ μ΄λΈμ κ°μ§ λ²νΌμ μμ±ν©λλ€.
msg.submit(respond, [msg, chatbot], [msg, chatbot]) # ν
μ€νΈλ°μ€μ λ©μμ§λ₯Ό μ
λ ₯νκ³ μ μΆνλ©΄ respond ν¨μκ° νΈμΆλλλ‘ ν©λλ€.
clear.click(lambda: None, None, chatbot, queue=False) # 'μ΄κΈ°ν' λ²νΌμ ν΄λ¦νλ©΄ μ±ν
κΈ°λ‘μ μ΄κΈ°νν©λλ€.
demo.launch(debug=True) # μΈν°νμ΄μ€λ₯Ό μ€νν©λλ€. μ€ννλ©΄ μ¬μ©μλ 'μ
λ ₯' ν
μ€νΈλ°μ€μ λ©μμ§λ₯Ό μμ±νκ³ μ μΆν μ μμΌλ©°, 'μ΄κΈ°ν' λ²νΌμ ν΅ν΄ μ±ν
κΈ°λ‘μ μ΄κΈ°ν ν μ μμ΅λλ€.
|