Spaces:
Sleeping
Sleeping
dual tabs again
Browse files- app.py +54 -20
- tmp_file.pdf +3 -0
app.py
CHANGED
@@ -2,6 +2,13 @@ import os
|
|
2 |
|
3 |
import streamlit as st
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
st.set_page_config(page_title="CoreMind AI", layout="wide")
|
6 |
|
7 |
st.header("CoreMind AI")
|
@@ -94,7 +101,6 @@ from langchain.llms import OpenAI
|
|
94 |
# embeddings = OpenAIEmbeddings()
|
95 |
# docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
|
96 |
|
97 |
-
from langchain.chains import RetrievalQA
|
98 |
|
99 |
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())
|
100 |
|
@@ -102,31 +108,59 @@ from langchain.chains import RetrievalQA
|
|
102 |
|
103 |
# st.markdown("----")
|
104 |
|
105 |
-
|
106 |
|
107 |
if openai_key:
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
-
|
127 |
-
|
128 |
|
129 |
-
|
130 |
|
131 |
-
|
132 |
-
|
|
|
2 |
|
3 |
import streamlit as st
|
4 |
|
5 |
+
from langchain.chains import RetrievalQA
|
6 |
+
from langchain.document_loaders import PyPDFLoader
|
7 |
+
|
8 |
+
from langchain.vectorstores import Chroma
|
9 |
+
from langchain.document_loaders import TextLoader
|
10 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
11 |
+
|
12 |
st.set_page_config(page_title="CoreMind AI", layout="wide")
|
13 |
|
14 |
st.header("CoreMind AI")
|
|
|
101 |
# embeddings = OpenAIEmbeddings()
|
102 |
# docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
|
103 |
|
|
|
104 |
|
105 |
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())
|
106 |
|
|
|
108 |
|
109 |
# st.markdown("----")
|
110 |
|
111 |
+
|
112 |
|
113 |
if openai_key:
|
114 |
+
def question_answer(user_text, qa_temperature):
|
115 |
+
qa = RetrievalQA.from_chain_type(
|
116 |
+
llm=OpenAI(temperature=qa_temperature, model_name="gpt-3.5-turbo"),
|
117 |
+
retriever=docsearch.as_retriever()
|
118 |
+
)
|
119 |
+
response = qa.run(user_text)
|
120 |
+
return response
|
121 |
+
|
122 |
+
|
123 |
+
loader = TextLoader("raw_data.txt")
|
124 |
+
embeddings = OpenAIEmbeddings()
|
125 |
+
docsearch = Chroma(persist_directory="data", embedding_function=embeddings)
|
126 |
+
|
127 |
+
qa_tab, understanding_tab = st.tabs(["Database Understanding", "PDF Understanding"])
|
128 |
+
|
129 |
+
with qa_tab:
|
130 |
+
st.header("Question Answering")
|
131 |
+
st.write("Dataset is Berkshire Hathaway's end of year reports for 1995, 1996 and 1997.")
|
132 |
+
|
133 |
+
qa_query = st.text_area("Enter your query", key="qa_query", help="Got a question you think your docs can answer? Just ask!")
|
134 |
+
|
135 |
+
if qa_query:
|
136 |
+
response = question_answer(qa_query, 0.9)
|
137 |
+
st.write(response)
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
with understanding_tab:
|
142 |
+
llm = OpenAI(temperature=0.9)
|
143 |
|
144 |
+
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file")
|
145 |
+
if pdf_file:
|
146 |
+
with open("tmp_file.pdf", "wb") as f:
|
147 |
+
f.write(pdf_file.getbuffer())
|
148 |
|
149 |
+
loader = PyPDFLoader("tmp_file.pdf")
|
150 |
+
pages = loader.load_and_split()
|
151 |
|
152 |
+
from langchain.vectorstores import FAISS
|
153 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
154 |
|
155 |
+
faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())
|
156 |
+
# docs = faiss_index.similarity_search("How will the community be engaged?", k=2)
|
157 |
+
# for doc in docs:
|
158 |
+
# st.write(str(doc.metadata["page"]) + ":", doc.page_content[:300])
|
159 |
|
160 |
+
qa_prompt = st.text_area("Query your pdf", key="qa_prompt")
|
161 |
+
# qa_button = st.button("Let's go!", disabled=not (openai_key and qa_prompt), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.")
|
162 |
|
163 |
+
if qa_prompt:
|
164 |
|
165 |
+
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=faiss_index.as_retriever())
|
166 |
+
st.write(qa(qa_prompt))
|
tmp_file.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52138f7464c477b10bf5068b8a211f059f8b90ba6b9c4d16c8ee434193a70be4
|
3 |
+
size 1638808
|