Ridealist commited on
Commit
6a11f08
β€’
1 Parent(s): ab3bb98

refactor: Add Comments for explainig codes and adjust linting

Browse files
Files changed (1) hide show
  1. app.py +27 -27
app.py CHANGED
@@ -1,38 +1,45 @@
 
 
 
 
1
  from langchain.chat_models import ChatOpenAI
2
  from langchain.document_loaders import PyPDFLoader
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
- from langchain.embeddings.cohere import CohereEmbeddings
5
  from langchain.text_splitter import CharacterTextSplitter
6
- from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
7
  from langchain.vectorstores import Chroma
8
- from PyPDF2 import PdfWriter
9
- import gradio as gr
10
- import os
11
  from dotenv import load_dotenv
12
- import openai
 
 
 
13
 
14
  load_dotenv()
15
- #λΉ„λ°€ν‚€ κ°€μ Έμ˜€κΈ° μ‹œλ„μ€‘
16
- # api_key = os.getenv('OPENAI_API_KEY') ## .env 파일 μ—…λ‘œλ“œν•˜λ©΄ μˆ¨κ²¨μ§€μ§€ μ•ŠμŒ μ•ˆλ¨
17
- # api_key = os.environ['my_secret'] ## μ•ˆλΆˆλŸ¬μ™€μ§
18
- # api_key = os.getenv('my_secret') ## 3트 .env λŒ€μ‹  secretν‚€λ₯Ό λΆˆλŸ¬μ˜€λŠ” ν˜•νƒœλ‘œ 도전
19
- os.environ["OPENAI_API_KEY"] = os.environ['my_secret']
20
 
21
- loader = PyPDFLoader("/home/user/app/docs.pdf")
 
22
  documents = loader.load()
23
 
 
24
  text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
25
  texts = text_splitter.split_documents(documents)
26
 
27
- #vector embedding
28
  embeddings = OpenAIEmbeddings()
29
  vector_store = Chroma.from_documents(texts, embeddings)
30
  retriever = vector_store.as_retriever(search_kwargs={"k": 2})
31
 
32
  from langchain.chat_models import ChatOpenAI
33
  from langchain.chains import RetrievalQAWithSourcesChain
 
 
 
 
 
34
 
35
- llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
 
36
 
37
  chain = RetrievalQAWithSourcesChain.from_chain_type(
38
  llm=llm,
@@ -40,12 +47,6 @@ chain = RetrievalQAWithSourcesChain.from_chain_type(
40
  retriever = retriever,
41
  return_source_documents=True)
42
 
43
- from langchain.prompts.chat import (
44
- ChatPromptTemplate,
45
- SystemMessagePromptTemplate,
46
- HumanMessagePromptTemplate,
47
- )
48
-
49
  system_template="""Use the following pieces of context to answer the users question shortly.
50
  Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
51
  If you don't know the answer, just say that "I don't know", don't try to make up an answer.
@@ -61,12 +62,11 @@ messages = [
61
 
62
  prompt = ChatPromptTemplate.from_messages(messages)
63
 
64
- from langchain.chat_models import ChatOpenAI
65
- from langchain.chains import RetrievalQAWithSourcesChain
66
-
67
  chain_type_kwargs = {"prompt": prompt}
68
 
69
- llm = ChatOpenAI(model_name="gpt-4", temperature=0) # Modify model_name if you have access to GPT-4
70
 
71
  chain = RetrievalQAWithSourcesChain.from_chain_type(
72
  llm=llm,
@@ -84,21 +84,21 @@ for doc in result['source_documents']:
84
  print('λ‚΄μš© : ' + doc.page_content[0:100].replace('\n', ' '))
85
  print('파일 : ' + doc.metadata['source'])
86
  print('νŽ˜μ΄μ§€ : ' + str(doc.metadata['page']))
 
87
 
88
-
89
  def respond(message, chat_history): # μ±„νŒ…λ΄‡μ˜ 응닡을 μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ •μ˜ν•©λ‹ˆλ‹€.
90
 
91
  result = chain(message)
92
-
93
  bot_message = result['answer']
94
 
95
  for i, doc in enumerate(result['source_documents']):
96
  bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
97
-
98
  chat_history.append((message, bot_message)) # μ±„νŒ… 기둝에 μ‚¬μš©μžμ˜ λ©”μ‹œμ§€μ™€ λ΄‡μ˜ 응닡을 μΆ”κ°€ν•©λ‹ˆλ‹€.
99
 
100
  return "", chat_history # μˆ˜μ •λœ μ±„νŒ… 기둝을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
101
 
 
102
  with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ‚¬μš©ν•˜μ—¬ μΈν„°νŽ˜μ΄μŠ€λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
103
  gr.Markdown("# μ•ˆλ…•ν•˜μ„Έμš”. 세이노와 λŒ€ν™”ν•΄λ³΄μ„Έμš”.")
104
  chatbot = gr.Chatbot(label="μ±„νŒ…μ°½") # 'μ±„νŒ…μ°½'μ΄λΌλŠ” λ ˆμ΄λΈ”μ„ 가진 μ±„νŒ…λ΄‡ μ»΄ν¬λ„ŒνŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
 
1
+ import gradio as gr
2
+ import openai
3
+ import os
4
+
5
  from langchain.chat_models import ChatOpenAI
6
  from langchain.document_loaders import PyPDFLoader
7
  from langchain.embeddings.openai import OpenAIEmbeddings
 
8
  from langchain.text_splitter import CharacterTextSplitter
 
9
  from langchain.vectorstores import Chroma
 
 
 
10
  from dotenv import load_dotenv
11
+
12
+ from langchain.embeddings.cohere import CohereEmbeddings
13
+ from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
14
+ from PyPDF2 import PdfWriter
15
 
16
  load_dotenv()
17
+ os.environ["OPENAI_API_KEY"] = os.getenv('my_secret')
18
+ openai.api_key = os.getenv('my_secret')
 
 
 
19
 
20
+ ## Load PDF file
21
+ loader = PyPDFLoader("docs.pdf")
22
  documents = loader.load()
23
 
24
+ ## Split Document
25
  text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
26
  texts = text_splitter.split_documents(documents)
27
 
28
+ ## token -> Vector Embedding
29
  embeddings = OpenAIEmbeddings()
30
  vector_store = Chroma.from_documents(texts, embeddings)
31
  retriever = vector_store.as_retriever(search_kwargs={"k": 2})
32
 
33
  from langchain.chat_models import ChatOpenAI
34
  from langchain.chains import RetrievalQAWithSourcesChain
35
+ from langchain.prompts.chat import (
36
+ ChatPromptTemplate,
37
+ SystemMessagePromptTemplate,
38
+ HumanMessagePromptTemplate,
39
+ )
40
 
41
+ ## Build LLM Chain
42
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
43
 
44
  chain = RetrievalQAWithSourcesChain.from_chain_type(
45
  llm=llm,
 
47
  retriever = retriever,
48
  return_source_documents=True)
49
 
 
 
 
 
 
 
50
  system_template="""Use the following pieces of context to answer the users question shortly.
51
  Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
52
  If you don't know the answer, just say that "I don't know", don't try to make up an answer.
 
62
 
63
  prompt = ChatPromptTemplate.from_messages(messages)
64
 
65
+ ############################
66
+ ## Local μ—μ„œ 잘 λ˜λŠ”μ§€ 확인
 
67
  chain_type_kwargs = {"prompt": prompt}
68
 
69
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
70
 
71
  chain = RetrievalQAWithSourcesChain.from_chain_type(
72
  llm=llm,
 
84
  print('λ‚΄μš© : ' + doc.page_content[0:100].replace('\n', ' '))
85
  print('파일 : ' + doc.metadata['source'])
86
  print('νŽ˜μ΄μ§€ : ' + str(doc.metadata['page']))
87
+ ##############################
88
 
89
+ ## Define response method
90
  def respond(message, chat_history): # μ±„νŒ…λ΄‡μ˜ 응닡을 μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜λ₯Ό μ •μ˜ν•©λ‹ˆλ‹€.
91
 
92
  result = chain(message)
 
93
  bot_message = result['answer']
94
 
95
  for i, doc in enumerate(result['source_documents']):
96
  bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
 
97
  chat_history.append((message, bot_message)) # μ±„νŒ… 기둝에 μ‚¬μš©μžμ˜ λ©”μ‹œμ§€μ™€ λ΄‡μ˜ 응닡을 μΆ”κ°€ν•©λ‹ˆλ‹€.
98
 
99
  return "", chat_history # μˆ˜μ •λœ μ±„νŒ… 기둝을 λ°˜ν™˜ν•©λ‹ˆλ‹€.
100
 
101
+ ## Build Gradio App
102
  with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ‚¬μš©ν•˜μ—¬ μΈν„°νŽ˜μ΄μŠ€λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
103
  gr.Markdown("# μ•ˆλ…•ν•˜μ„Έμš”. 세이노와 λŒ€ν™”ν•΄λ³΄μ„Έμš”.")
104
  chatbot = gr.Chatbot(label="μ±„νŒ…μ°½") # 'μ±„νŒ…μ°½'μ΄λΌλŠ” λ ˆμ΄λΈ”μ„ 가진 μ±„νŒ…λ΄‡ μ»΄ν¬λ„ŒνŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.