Chintan Donda commited on
Commit
ac64082
·
1 Parent(s): e11921b

Adding new widget to display data sources from where indices have been created

Browse files
Files changed (2) hide show
  1. app.py +57 -1
  2. src/langchain_utils.py +22 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import os
3
  import datetime
 
4
 
5
  import src.constants as constants_utils
6
  import src.kkms_kssw as kkms_kssw
@@ -32,6 +33,7 @@ class DomState:
32
  self.weather_forecast = ''
33
  self.weather_forecast_summary = ''
34
  self.indic_translation = ''
 
35
 
36
  # Initialize index (vector store) - This will create a new index from scratch if load_from_existing_index_file == False
37
  self.kkms_kssw_obj = kkms_kssw.KKMS_KSSW()
@@ -160,6 +162,30 @@ class DomState:
160
  return self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
161
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  def _upload_file(self, files):
164
  file_paths = [file.name for file in files]
165
  return file_paths
@@ -176,6 +202,7 @@ class DomState:
176
  gr.update(visible=False),
177
  gr.update(visible=False),
178
  gr.update(visible=False),
 
179
  ]
180
 
181
  elif choice == "General (AgGPT)":
@@ -185,6 +212,7 @@ class DomState:
185
  gr.update(visible=False),
186
  gr.update(visible=False),
187
  gr.update(visible=False),
 
188
  ]
189
 
190
  elif choice == "Mandi Price":
@@ -194,6 +222,7 @@ class DomState:
194
  gr.update(visible=True),
195
  gr.update(visible=False),
196
  gr.update(visible=False),
 
197
  ]
198
 
199
  elif choice == "Weather":
@@ -203,10 +232,22 @@ class DomState:
203
  gr.update(visible=False),
204
  gr.update(visible=True),
205
  gr.update(visible=False),
 
206
  ]
207
 
208
  elif choice == "Load Custom Data":
209
  return [
 
 
 
 
 
 
 
 
 
 
 
210
  gr.update(visible=False),
211
  gr.update(visible=False),
212
  gr.update(visible=False),
@@ -276,7 +317,8 @@ with gr.Blocks(title='KKMS-Smart-Search-Demo') as demo:
276
  "General (AgGPT)",
277
  "Mandi Price",
278
  "Weather",
279
- "Load Custom Data"
 
280
  ],
281
  label="Query related to",
282
  value="Custom Query"
@@ -519,6 +561,19 @@ with gr.Blocks(title='KKMS-Smart-Search-Demo') as demo:
519
  rowUploadUrls,
520
  ],
521
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
522
 
523
 
524
  widgets.change(
@@ -530,6 +585,7 @@ with gr.Blocks(title='KKMS-Smart-Search-Demo') as demo:
530
  rowMandiPrice,
531
  rowWeather,
532
  rowLoadCustomData,
 
533
  ],
534
  )
535
 
 
1
  import gradio as gr
2
  import os
3
  import datetime
4
+ import json
5
 
6
  import src.constants as constants_utils
7
  import src.kkms_kssw as kkms_kssw
 
33
  self.weather_forecast = ''
34
  self.weather_forecast_summary = ''
35
  self.indic_translation = ''
36
+ self.kb_sources = {}
37
 
38
  # Initialize index (vector store) - This will create a new index from scratch if load_from_existing_index_file == False
39
  self.kkms_kssw_obj = kkms_kssw.KKMS_KSSW()
 
162
  return self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
163
 
164
 
165
+ def click_handler_for_get_kb_sources(
166
+ self
167
+ ):
168
+ def _serialize_sets(obj):
169
+ if isinstance(obj, set):
170
+ return list(obj)
171
+ return obj
172
+ self.kb_sources = self.kkms_kssw_obj.langchain_utils_obj.get_index_category_wise_data_sources()
173
+ # return json.dumps(self.kb_sources, default=_serialize_sets)
174
+ kb_sources = ''
175
+ for index_category, doc_type in self.kb_sources.items():
176
+ kb_sources = '\n' + f'='*100 + '\n'
177
+ kb_sources += f'Question Category: {index_category}'
178
+ for dt, source in doc_type.items():
179
+ kb_sources += '\n' + f'='*50 + '\n'
180
+ if not source:
181
+ continue
182
+ kb_sources += f'Document type: {dt}'
183
+ for doc in source:
184
+ kb_sources += f'\t\t\t\n{doc}'
185
+ self.kb_sources = kb_sources
186
+ return self.kb_sources
187
+
188
+
189
  def _upload_file(self, files):
190
  file_paths = [file.name for file in files]
191
  return file_paths
 
202
  gr.update(visible=False),
203
  gr.update(visible=False),
204
  gr.update(visible=False),
205
+ gr.update(visible=False),
206
  ]
207
 
208
  elif choice == "General (AgGPT)":
 
212
  gr.update(visible=False),
213
  gr.update(visible=False),
214
  gr.update(visible=False),
215
+ gr.update(visible=False),
216
  ]
217
 
218
  elif choice == "Mandi Price":
 
222
  gr.update(visible=True),
223
  gr.update(visible=False),
224
  gr.update(visible=False),
225
+ gr.update(visible=False),
226
  ]
227
 
228
  elif choice == "Weather":
 
232
  gr.update(visible=False),
233
  gr.update(visible=True),
234
  gr.update(visible=False),
235
+ gr.update(visible=False),
236
  ]
237
 
238
  elif choice == "Load Custom Data":
239
  return [
240
+ gr.update(visible=False),
241
+ gr.update(visible=False),
242
+ gr.update(visible=False),
243
+ gr.update(visible=False),
244
+ gr.update(visible=True),
245
+ gr.update(visible=False),
246
+ ]
247
+
248
+ elif choice == "Display Data Sources":
249
+ return [
250
+ gr.update(visible=False),
251
  gr.update(visible=False),
252
  gr.update(visible=False),
253
  gr.update(visible=False),
 
317
  "General (AgGPT)",
318
  "Mandi Price",
319
  "Weather",
320
+ "Load Custom Data",
321
+ "Display Data Sources",
322
  ],
323
  label="Query related to",
324
  value="Custom Query"
 
561
  rowUploadUrls,
562
  ],
563
  )
564
+
565
+
566
+ #############################################################################
567
+ # Widget to display what all PDFs/Text files, URLs are ingested and indexed for querying in the KB (Knowledge Base)
568
+ with gr.Row(visible=False) as rowDisplayDataSources:
569
+ with gr.Column(scale=1, min_width=600):
570
+ with gr.Tab(label='Following PDFs, Text files, and URLs have been ingested and indexed in the Knowledge Base and are available for querying.'):
571
+ kb_sources = gr.Textbox(label=f"Data loaded from:", value=dom.kb_sources, interactive=False)
572
+ b_kb_sources = gr.Button("Display Data Sources").style(size='sm')
573
+ b_kb_sources.click(
574
+ fn=dom.click_handler_for_get_kb_sources,
575
+ outputs=kb_sources
576
+ )
577
 
578
 
579
  widgets.change(
 
585
  rowMandiPrice,
586
  rowWeather,
587
  rowLoadCustomData,
588
+ rowDisplayDataSources,
589
  ],
590
  )
591
 
src/langchain_utils.py CHANGED
@@ -75,6 +75,9 @@ class LANGCHAIN_UTILS:
75
  }
76
  ]
77
 
 
 
 
78
 
79
  def generate_prompt_template(
80
  self,
@@ -889,3 +892,22 @@ class LANGCHAIN_UTILS:
889
 
890
  # Or just nuke the persist directory
891
  # !rm -rf self.index_filepath
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  }
76
  ]
77
 
78
+ # Index category - doc_type wise data sources to display in widget
79
+ self.index_category_doc_type_wise_data_sources = {}
80
+
81
 
82
  def generate_prompt_template(
83
  self,
 
892
 
893
  # Or just nuke the persist directory
894
  # !rm -rf self.index_filepath
895
+
896
+
897
+ def get_index_category_wise_data_sources(
898
+ self
899
+ ):
900
+ # self.index_category_doc_type_wise_data_sources
901
+ for index_category, doc_type in self.index_category_doc_type_wise_index.items():
902
+ self.index_category_doc_type_wise_data_sources.setdefault(index_category, {})
903
+ for dt in doc_type.keys():
904
+ if dt == 'master':
905
+ continue
906
+ self.index_category_doc_type_wise_data_sources[index_category].setdefault(dt, set())
907
+ if doc_type[dt]:
908
+ docs = doc_type[dt].docstore._dict
909
+ for doc, val in docs.items():
910
+ if 'source' in val.metadata and val.metadata['source']:
911
+ self.index_category_doc_type_wise_data_sources[index_category][dt].add(val.metadata['source'])
912
+
913
+ return self.index_category_doc_type_wise_data_sources