Zwea Htet commited on
Commit
a43a4a7
1 Parent(s): 781a2e4

updated code

Browse files
Files changed (2) hide show
  1. app.py +47 -25
  2. pdf/NDA for Student Interns.pdf +0 -0
app.py CHANGED
@@ -3,16 +3,17 @@
3
 
4
  import streamlit as st
5
  from langchain_community.document_loaders.pdf import PyPDFLoader
6
- from langchain_community.vectorstores import pinecone
7
- from langchain_openai import OpenAIEmbeddings, OpenAI
 
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain_core.prompts import ChatPromptTemplate
10
- from langchain.chains import ConversationalRetrievalChain, RetrievalQA
11
  import openai
12
  from dotenv import load_dotenv
13
  import os
14
 
15
- # import pinecone
16
 
17
  load_dotenv()
18
 
@@ -27,31 +28,47 @@ SAVE_DIR = "pdf"
27
 
28
 
29
  def generate_response(pages, query_text, k, chain_type):
30
- if pages is not None:
31
  pinecone.init(
32
  api_key=os.getenv("PINECONE_API_KEY"),
33
  environment=os.getenv("PINECONE_ENV_NAME"),
34
  )
35
 
36
- vector_db = pinecone.Pinecone.from_documents(
37
- documents=pages, embedding=OpenAIEmbeddings(), index_name="openai-index"
38
  )
39
 
40
  retriever = vector_db.as_retriever(
41
  search_type="similarity", search_kwards={"k": k}
42
  )
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # create a chain to answer questions
45
- qa = RetrievalQA.from_chain_type(
46
- llm=OpenAI(),
47
  chain_type=chain_type,
48
  retriever=retriever,
49
- return_source_documents=True
 
50
  )
51
 
52
- response = qa({"query": query_text})
53
  return response
54
 
 
55
  def visual_annotate(document, answer):
56
  # Implement this function according to your specific requirements
57
  # Highlight the part of the document where the answer was found
@@ -80,18 +97,19 @@ with st.sidebar.form(key="sidebar-form"):
80
  )
81
  os.environ["PINECONE_API_KEY"] = pinecone_api_key
82
 
83
- pinecone_env_name = st.text_input("Enter your Pinecone environment name)")
84
  os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
85
 
86
- submitted = st.sidebar.form_submit_button(
87
  label="Submit",
88
- disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
89
  )
90
 
91
  left_column, right_column = st.columns(2)
92
 
93
  with left_column:
94
  uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
 
95
 
96
  if uploaded_file is not None:
97
  # save the uploaded file to the specified directory
@@ -101,7 +119,8 @@ with left_column:
101
  st.success(f"File {uploaded_file.name} is saved at path {file_path}")
102
 
103
  loader = PyPDFLoader(file_path=file_path)
104
- pages = loader.load_and_split()
 
105
 
106
  query_text = st.text_input(
107
  "Enter your question:", placeholder="Please provide a short summary."
@@ -115,20 +134,23 @@ with left_column:
115
 
116
  with st.spinner("Retrieving and generating a response ..."):
117
  response = generate_response(
118
- pages=pages,
119
- query_text=query_text,
120
- k=k,
121
- chain_type=chain_type
122
  )
123
 
124
  with right_column:
125
  st.write("Output of your question")
126
 
127
- st.subheader("Result")
128
- st.write(response['result'])
129
-
130
- st.subheader("source_documents")
131
- st.write(response['source_documents'][0])
 
 
 
 
 
 
132
 
133
 
134
  # with st.form("myform", clear_on_submit=True):
 
3
 
4
  import streamlit as st
5
  from langchain_community.document_loaders.pdf import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_community.vectorstores.pinecone import Pinecone
8
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain_core.prompts import ChatPromptTemplate
11
+ from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
12
  import openai
13
  from dotenv import load_dotenv
14
  import os
15
 
16
+ import pinecone
17
 
18
  load_dotenv()
19
 
 
28
 
29
 
30
  def generate_response(pages, query_text, k, chain_type):
31
+ if pages:
32
  pinecone.init(
33
  api_key=os.getenv("PINECONE_API_KEY"),
34
  environment=os.getenv("PINECONE_ENV_NAME"),
35
  )
36
 
37
+ vector_db = Pinecone.from_documents(
38
+ documents=pages, embedding=OpenAIEmbeddings(), index_name="document-chat"
39
  )
40
 
41
  retriever = vector_db.as_retriever(
42
  search_type="similarity", search_kwards={"k": k}
43
  )
44
+
45
+ prompt_template = ChatPromptTemplate.from_messages(
46
+ [
47
+ (
48
+ "system",
49
+ "You are a helpful assistant that can answer questions regarding to a document provided by the user.",
50
+ ),
51
+ ("human", "Hello, how are you doing?"),
52
+ ("ai", "I'm doing well, thanks!"),
53
+ ("human", "{user_input}"),
54
+ ]
55
+ )
56
+
57
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
58
+
59
  # create a chain to answer questions
60
+ qa = RetrievalQAWithSourcesChain.from_chain_type(
61
+ llm=llm,
62
  chain_type=chain_type,
63
  retriever=retriever,
64
+ return_source_documents=True,
65
+ # prompt_template=prompt_template,
66
  )
67
 
68
+ response = qa({"question": query_text})
69
  return response
70
 
71
+
72
  def visual_annotate(document, answer):
73
  # Implement this function according to your specific requirements
74
  # Highlight the part of the document where the answer was found
 
97
  )
98
  os.environ["PINECONE_API_KEY"] = pinecone_api_key
99
 
100
+ pinecone_env_name = st.text_input("Enter your Pinecone environment name")
101
  os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
102
 
103
+ submitted = st.form_submit_button(
104
  label="Submit",
105
+ # disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
106
  )
107
 
108
  left_column, right_column = st.columns(2)
109
 
110
  with left_column:
111
  uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
112
+ pages = []
113
 
114
  if uploaded_file is not None:
115
  # save the uploaded file to the specified directory
 
119
  st.success(f"File {uploaded_file.name} is saved at path {file_path}")
120
 
121
  loader = PyPDFLoader(file_path=file_path)
122
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
123
+ pages = loader.load_and_split(text_splitter=text_splitter)
124
 
125
  query_text = st.text_input(
126
  "Enter your question:", placeholder="Please provide a short summary."
 
134
 
135
  with st.spinner("Retrieving and generating a response ..."):
136
  response = generate_response(
137
+ pages=pages, query_text=query_text, k=k, chain_type=chain_type
 
 
 
138
  )
139
 
140
  with right_column:
141
  st.write("Output of your question")
142
 
143
+ if response:
144
+ st.subheader("Result")
145
+ st.write(response["answer"])
146
+ print("response: ", response)
147
+
148
+ st.subheader("source_documents")
149
+ for each in response["source_documents"]:
150
+ st.write("page: ", each.metadata["page"])
151
+ st.write("source: ", each.metadata["source"])
152
+ else:
153
+ st.write("response not showing at the moment")
154
 
155
 
156
  # with st.form("myform", clear_on_submit=True):
pdf/NDA for Student Interns.pdf ADDED
Binary file (530 kB). View file