Zenne commited on
Commit
478194b
1 Parent(s): 233a41d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -83
app.py CHANGED
@@ -19,14 +19,19 @@ import shutil
19
  OPENAI_API_KEY = ''
20
  PINECONE_API_KEY = ''
21
  PINECONE_API_ENV = ''
22
- pinecone_index_name = ''
23
- chroma_collection_name = ''
24
- persist_directory = ''
25
- docsearch_ready = False
26
- directory_name = 'tmp_docs'
27
  langchain.verbose = False
28
 
29
 
 
 
 
 
 
 
 
 
 
 
30
  @st.cache_data()
31
  def save_file(files):
32
  # Remove existing files in the directory
@@ -122,7 +127,7 @@ def setup_docsearch(use_pinecone, pinecone_index_name, embeddings, chroma_collec
122
  return docsearch, n_texts
123
 
124
 
125
- def get_response(query, chat_history):
126
  result = CRqa({"question": query, "chat_history": chat_history})
127
  return result['answer'], result['source_documents']
128
 
@@ -137,83 +142,94 @@ def setup_em_llm(OPENAI_API_KEY, temperature):
137
  return embeddings, llm
138
 
139
 
140
- # Get user input of whether to use Pinecone or not
141
- col1, col2, col3 = st.columns([1, 1, 1])
142
- # create the radio buttons and text input fields
143
- with col1:
144
- r_pinecone = st.radio('Use Pinecone?', ('Yes', 'No'))
145
- r_ingest = st.radio(
146
- 'Ingest file(s)?', ('Yes', 'No'))
147
- with col2:
148
- OPENAI_API_KEY = st.text_input(
149
- "OpenAI API key:", type="password")
150
- temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
151
- k_sources = st.slider('# source(s) to print out', 0, 20, 2)
152
- with col3:
153
- if OPENAI_API_KEY:
154
- embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature)
155
- if r_pinecone.lower() == 'yes':
156
- use_pinecone = True
157
- PINECONE_API_KEY = st.text_input(
158
- "Pinecone API key:", type="password")
159
- PINECONE_API_ENV = st.text_input(
160
- "Pinecone API env:", type="password")
161
- pinecone_index_name = st.text_input('Pinecone index:')
162
- pinecone.init(api_key=PINECONE_API_KEY,
163
- environment=PINECONE_API_ENV)
164
- else:
165
- use_pinecone = False
166
- chroma_collection_name = st.text_input(
167
- '''Chroma collection name of 3-63 characters:''')
168
- persist_directory = "./vectorstore"
169
 
170
- if pinecone_index_name or chroma_collection_name:
 
 
171
  chat_history = []
172
- if r_ingest.lower() == 'yes':
173
- files = st.file_uploader('Upload Files', accept_multiple_files=True)
174
- if files:
175
- save_file(files)
176
- all_texts, n_texts = load_files()
177
- docsearch = ingest(all_texts, use_pinecone, embeddings, pinecone_index_name,
178
- chroma_collection_name, persist_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  docsearch_ready = True
180
- else:
181
- st.write(
182
- 'No data is to be ingested. Make sure the Pinecone index or Chroma collection name you provided contains data.')
183
- docsearch, n_texts = setup_docsearch(use_pinecone, pinecone_index_name,
184
- embeddings, chroma_collection_name, persist_directory)
185
- docsearch_ready = True
186
- if docsearch_ready:
187
- # number of sources (split-documents when ingesting files); default is 4
188
- k = min([20, n_texts])
189
- retriever = setup_retriever(docsearch, k)
190
- CRqa = ConversationalRetrievalChain.from_llm(
191
- llm, retriever=retriever, return_source_documents=True)
192
-
193
- st.title('Chatbot')
194
- # Get user input
195
- query = st.text_area('Enter your question:', height=10,
196
- placeholder='Summarize the context.')
197
- if query:
198
- # Generate a reply based on the user input and chat history
199
- reply, source = get_response(query, chat_history)
200
- # Update the chat history with the user input and system response
201
- chat_history.append(('User', query))
202
- chat_history.append(('Bot', reply))
203
- chat_history_str = '\n'.join(
204
- [f'{x[0]}: {x[1]}' for x in chat_history])
205
- st.text_area('Chat record:', value=chat_history_str, height=250)
206
- # Display sources
207
- for i, source_i in enumerate(source):
208
- if i < k_sources:
209
- if len(source_i.page_content) > 400:
210
- page_content = source_i.page_content[:400]
211
- else:
212
- page_content = source_i.page_content
213
- if source_i.metadata:
214
- metadata_source = source_i.metadata['source']
215
- st.write(
216
- f"**_Source {i+1}:_** {metadata_source}: {page_content}")
217
- st.write(source_i.metadata)
218
- else:
219
- st.write(f"**_Source {i+1}:_** {page_content}")
 
19
  OPENAI_API_KEY = ''
20
  PINECONE_API_KEY = ''
21
  PINECONE_API_ENV = ''
 
 
 
 
 
22
  langchain.verbose = False
23
 
24
 
25
+ @st.cache_data()
26
+ def init():
27
+ pinecone_index_name = ''
28
+ chroma_collection_name = ''
29
+ persist_directory = ''
30
+ docsearch_ready = False
31
+ directory_name = 'tmp_docs'
32
+ return pinecone_index_name, chroma_collection_name, persist_directory, docsearch_ready, directory_name
33
+
34
+
35
  @st.cache_data()
36
  def save_file(files):
37
  # Remove existing files in the directory
 
127
  return docsearch, n_texts
128
 
129
 
130
+ def get_response(query, chat_history, CRqa):
131
  result = CRqa({"question": query, "chat_history": chat_history})
132
  return result['answer'], result['source_documents']
133
 
 
142
  return embeddings, llm
143
 
144
 
145
+ pinecone_index_name, chroma_collection_name, persist_directory, docsearch_ready, directory_name = init()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+
148
+ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsearch_ready, directory_name):
149
+ docsearch_ready = False
150
  chat_history = []
151
+ # Get user input of whether to use Pinecone or not
152
+ col1, col2, col3 = st.columns([1, 1, 1])
153
+ # create the radio buttons and text input fields
154
+ with col1:
155
+ r_pinecone = st.radio('Use Pinecone?', ('Yes', 'No'))
156
+ r_ingest = st.radio(
157
+ 'Ingest file(s)?', ('Yes', 'No'))
158
+ with col2:
159
+ OPENAI_API_KEY = st.text_input(
160
+ "OpenAI API key:", type="password")
161
+ temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
162
+ k_sources = st.slider('# source(s) to print out', 0, 20, 2)
163
+ with col3:
164
+ if OPENAI_API_KEY:
165
+ embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature)
166
+ if r_pinecone.lower() == 'yes':
167
+ use_pinecone = True
168
+ PINECONE_API_KEY = st.text_input(
169
+ "Pinecone API key:", type="password")
170
+ PINECONE_API_ENV = st.text_input(
171
+ "Pinecone API env:", type="password")
172
+ pinecone_index_name = st.text_input('Pinecone index:')
173
+ pinecone.init(api_key=PINECONE_API_KEY,
174
+ environment=PINECONE_API_ENV)
175
+ else:
176
+ use_pinecone = False
177
+ chroma_collection_name = st.text_input(
178
+ '''Chroma collection name of 3-63 characters:''')
179
+ persist_directory = "./vectorstore"
180
+
181
+ if pinecone_index_name or chroma_collection_name:
182
+ if r_ingest.lower() == 'yes':
183
+ files = st.file_uploader(
184
+ 'Upload Files', accept_multiple_files=True)
185
+ if files:
186
+ save_file(files)
187
+ all_texts, n_texts = load_files()
188
+ docsearch = ingest(all_texts, use_pinecone, embeddings, pinecone_index_name,
189
+ chroma_collection_name, persist_directory)
190
+ docsearch_ready = True
191
+ else:
192
+ st.write(
193
+ 'No data is to be ingested. Make sure the Pinecone index or Chroma collection name you provided contains data.')
194
+ docsearch, n_texts = setup_docsearch(use_pinecone, pinecone_index_name,
195
+ embeddings, chroma_collection_name, persist_directory)
196
  docsearch_ready = True
197
+ if docsearch_ready:
198
+ # number of sources (split-documents when ingesting files); default is 4
199
+ k = min([20, n_texts])
200
+ retriever = setup_retriever(docsearch, k)
201
+ CRqa = ConversationalRetrievalChain.from_llm(
202
+ llm, retriever=retriever, return_source_documents=True)
203
+
204
+ st.title('Chatbot')
205
+ # Get user input
206
+ query = st.text_area('Enter your question:', height=10,
207
+ placeholder='Summarize the context.')
208
+ if query:
209
+ # Generate a reply based on the user input and chat history
210
+ reply, source = get_response(query, chat_history, CRqa)
211
+ # Update the chat history with the user input and system response
212
+ chat_history.append(('User', query))
213
+ chat_history.append(('Bot', reply))
214
+ chat_history_str = '\n'.join(
215
+ [f'{x[0]}: {x[1]}' for x in chat_history])
216
+ st.text_area('Chat record:', value=chat_history_str, height=250)
217
+ # Display sources
218
+ for i, source_i in enumerate(source):
219
+ if i < k_sources:
220
+ if len(source_i.page_content) > 400:
221
+ page_content = source_i.page_content[:400]
222
+ else:
223
+ page_content = source_i.page_content
224
+ if source_i.metadata:
225
+ metadata_source = source_i.metadata['source']
226
+ st.write(
227
+ f"**_Source {i+1}:_** {metadata_source}: {page_content}")
228
+ st.write(source_i.metadata)
229
+ else:
230
+ st.write(f"**_Source {i+1}:_** {page_content}")
231
+
232
+
233
+ if __name__ == '__main__':
234
+ main(pinecone_index_name, chroma_collection_name, persist_directory,
235
+ docsearch_ready, directory_name)