Spaces:
Sleeping
Sleeping
Zwea Htet
commited on
Commit
·
a43a4a7
1
Parent(s):
781a2e4
updated code
Browse files- app.py +47 -25
- pdf/NDA for Student Interns.pdf +0 -0
app.py
CHANGED
@@ -3,16 +3,17 @@
|
|
3 |
|
4 |
import streamlit as st
|
5 |
from langchain_community.document_loaders.pdf import PyPDFLoader
|
6 |
-
from
|
7 |
-
from
|
|
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain_core.prompts import ChatPromptTemplate
|
10 |
-
from langchain.chains import ConversationalRetrievalChain,
|
11 |
import openai
|
12 |
from dotenv import load_dotenv
|
13 |
import os
|
14 |
|
15 |
-
|
16 |
|
17 |
load_dotenv()
|
18 |
|
@@ -27,31 +28,47 @@ SAVE_DIR = "pdf"
|
|
27 |
|
28 |
|
29 |
def generate_response(pages, query_text, k, chain_type):
|
30 |
-
if pages
|
31 |
pinecone.init(
|
32 |
api_key=os.getenv("PINECONE_API_KEY"),
|
33 |
environment=os.getenv("PINECONE_ENV_NAME"),
|
34 |
)
|
35 |
|
36 |
-
vector_db =
|
37 |
-
documents=pages, embedding=OpenAIEmbeddings(), index_name="
|
38 |
)
|
39 |
|
40 |
retriever = vector_db.as_retriever(
|
41 |
search_type="similarity", search_kwards={"k": k}
|
42 |
)
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# create a chain to answer questions
|
45 |
-
qa =
|
46 |
-
llm=
|
47 |
chain_type=chain_type,
|
48 |
retriever=retriever,
|
49 |
-
return_source_documents=True
|
|
|
50 |
)
|
51 |
|
52 |
-
response = qa({"
|
53 |
return response
|
54 |
|
|
|
55 |
def visual_annotate(document, answer):
|
56 |
# Implement this function according to your specific requirements
|
57 |
# Highlight the part of the document where the answer was found
|
@@ -80,18 +97,19 @@ with st.sidebar.form(key="sidebar-form"):
|
|
80 |
)
|
81 |
os.environ["PINECONE_API_KEY"] = pinecone_api_key
|
82 |
|
83 |
-
pinecone_env_name = st.text_input("Enter your Pinecone environment name
|
84 |
os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
|
85 |
|
86 |
-
submitted = st.
|
87 |
label="Submit",
|
88 |
-
disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
|
89 |
)
|
90 |
|
91 |
left_column, right_column = st.columns(2)
|
92 |
|
93 |
with left_column:
|
94 |
uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
|
|
|
95 |
|
96 |
if uploaded_file is not None:
|
97 |
# save the uploaded file to the specified directory
|
@@ -101,7 +119,8 @@ with left_column:
|
|
101 |
st.success(f"File {uploaded_file.name} is saved at path {file_path}")
|
102 |
|
103 |
loader = PyPDFLoader(file_path=file_path)
|
104 |
-
|
|
|
105 |
|
106 |
query_text = st.text_input(
|
107 |
"Enter your question:", placeholder="Please provide a short summary."
|
@@ -115,20 +134,23 @@ with left_column:
|
|
115 |
|
116 |
with st.spinner("Retrieving and generating a response ..."):
|
117 |
response = generate_response(
|
118 |
-
pages=pages,
|
119 |
-
query_text=query_text,
|
120 |
-
k=k,
|
121 |
-
chain_type=chain_type
|
122 |
)
|
123 |
|
124 |
with right_column:
|
125 |
st.write("Output of your question")
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
|
134 |
# with st.form("myform", clear_on_submit=True):
|
|
|
3 |
|
4 |
import streamlit as st
|
5 |
from langchain_community.document_loaders.pdf import PyPDFLoader
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from langchain_community.vectorstores.pinecone import Pinecone
|
8 |
+
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
11 |
+
from langchain.chains import ConversationalRetrievalChain, RetrievalQAWithSourcesChain
|
12 |
import openai
|
13 |
from dotenv import load_dotenv
|
14 |
import os
|
15 |
|
16 |
+
import pinecone
|
17 |
|
18 |
load_dotenv()
|
19 |
|
|
|
28 |
|
29 |
|
30 |
def generate_response(pages, query_text, k, chain_type):
|
31 |
+
if pages:
|
32 |
pinecone.init(
|
33 |
api_key=os.getenv("PINECONE_API_KEY"),
|
34 |
environment=os.getenv("PINECONE_ENV_NAME"),
|
35 |
)
|
36 |
|
37 |
+
vector_db = Pinecone.from_documents(
|
38 |
+
documents=pages, embedding=OpenAIEmbeddings(), index_name="document-chat"
|
39 |
)
|
40 |
|
41 |
retriever = vector_db.as_retriever(
|
42 |
search_type="similarity", search_kwards={"k": k}
|
43 |
)
|
44 |
+
|
45 |
+
prompt_template = ChatPromptTemplate.from_messages(
|
46 |
+
[
|
47 |
+
(
|
48 |
+
"system",
|
49 |
+
"You are a helpful assistant that can answer questions regarding to a document provided by the user.",
|
50 |
+
),
|
51 |
+
("human", "Hello, how are you doing?"),
|
52 |
+
("ai", "I'm doing well, thanks!"),
|
53 |
+
("human", "{user_input}"),
|
54 |
+
]
|
55 |
+
)
|
56 |
+
|
57 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
58 |
+
|
59 |
# create a chain to answer questions
|
60 |
+
qa = RetrievalQAWithSourcesChain.from_chain_type(
|
61 |
+
llm=llm,
|
62 |
chain_type=chain_type,
|
63 |
retriever=retriever,
|
64 |
+
return_source_documents=True,
|
65 |
+
# prompt_template=prompt_template,
|
66 |
)
|
67 |
|
68 |
+
response = qa({"question": query_text})
|
69 |
return response
|
70 |
|
71 |
+
|
72 |
def visual_annotate(document, answer):
|
73 |
# Implement this function according to your specific requirements
|
74 |
# Highlight the part of the document where the answer was found
|
|
|
97 |
)
|
98 |
os.environ["PINECONE_API_KEY"] = pinecone_api_key
|
99 |
|
100 |
+
pinecone_env_name = st.text_input("Enter your Pinecone environment name")
|
101 |
os.environ["PINECONE_ENV_NAME"] = pinecone_env_name
|
102 |
|
103 |
+
submitted = st.form_submit_button(
|
104 |
label="Submit",
|
105 |
+
# disabled=not (openai_api_key and pinecone_api_key and pinecone_env_name),
|
106 |
)
|
107 |
|
108 |
left_column, right_column = st.columns(2)
|
109 |
|
110 |
with left_column:
|
111 |
uploaded_file = st.file_uploader("Choose a pdf file", type="pdf")
|
112 |
+
pages = []
|
113 |
|
114 |
if uploaded_file is not None:
|
115 |
# save the uploaded file to the specified directory
|
|
|
119 |
st.success(f"File {uploaded_file.name} is saved at path {file_path}")
|
120 |
|
121 |
loader = PyPDFLoader(file_path=file_path)
|
122 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
123 |
+
pages = loader.load_and_split(text_splitter=text_splitter)
|
124 |
|
125 |
query_text = st.text_input(
|
126 |
"Enter your question:", placeholder="Please provide a short summary."
|
|
|
134 |
|
135 |
with st.spinner("Retrieving and generating a response ..."):
|
136 |
response = generate_response(
|
137 |
+
pages=pages, query_text=query_text, k=k, chain_type=chain_type
|
|
|
|
|
|
|
138 |
)
|
139 |
|
140 |
with right_column:
|
141 |
st.write("Output of your question")
|
142 |
|
143 |
+
if response:
|
144 |
+
st.subheader("Result")
|
145 |
+
st.write(response["answer"])
|
146 |
+
print("response: ", response)
|
147 |
+
|
148 |
+
st.subheader("source_documents")
|
149 |
+
for each in response["source_documents"]:
|
150 |
+
st.write("page: ", each.metadata["page"])
|
151 |
+
st.write("source: ", each.metadata["source"])
|
152 |
+
else:
|
153 |
+
st.write("response not showing at the moment")
|
154 |
|
155 |
|
156 |
# with st.form("myform", clear_on_submit=True):
|
pdf/NDA for Student Interns.pdf
ADDED
Binary file (530 kB). View file
|
|