Spaces:
Runtime error
Runtime error
Chintan Donda
commited on
Commit
•
1921a14
1
Parent(s):
8f40cff
Updating widgets, automatically populate dropdowns/textboxes upon event trigger
Browse files- app.py +99 -52
- requirements.txt +2 -1
- src/constants.py +53 -46
- src/data_loader.py +5 -4
- src/langchain_utils.py +4 -4
- src/weather.py +48 -162
app.py
CHANGED
@@ -4,6 +4,7 @@ import datetime
|
|
4 |
|
5 |
import src.constants as constants_utils
|
6 |
import src.kkms_kssw as kkms_kssw
|
|
|
7 |
|
8 |
os.environ["CURL_CA_BUNDLE"] = ""
|
9 |
|
@@ -140,6 +141,25 @@ class DomState:
|
|
140 |
return self.indic_translation
|
141 |
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def _upload_file(self, files):
|
144 |
file_paths = [file.name for file in files]
|
145 |
return file_paths
|
@@ -269,7 +289,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
|
269 |
with gr.Tab(label='Relevant paragraphs'):
|
270 |
question_category = gr.Dropdown(
|
271 |
constants_utils.INDEX_CATEGORY,
|
272 |
-
label="Select
|
273 |
question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
|
274 |
# Get the Relevant paragraphs for the question asked
|
275 |
relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
|
@@ -284,34 +304,37 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
|
284 |
with gr.Tab(label='Sources of relevant paragraphs'):
|
285 |
# Get the Sources of relevant paragraphs
|
286 |
sources_relevant_paragraphs = gr.Textbox(label="Sources of relevant paragraphs are:", interactive=False)
|
287 |
-
|
288 |
-
|
|
|
|
|
|
|
289 |
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
|
316 |
|
317 |
#############################################################################
|
@@ -362,52 +385,76 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
|
362 |
label="Select state"
|
363 |
)
|
364 |
|
365 |
-
#
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
with gr.Column(scale=1, min_width=600):
|
377 |
with gr.Tab(label='Weather Forecast Summary'):
|
378 |
# Get the summary of the weather forecast
|
379 |
-
weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:",
|
380 |
-
|
381 |
-
|
|
|
|
|
|
|
382 |
|
383 |
# Covert the weather forcast summary in Indian language
|
384 |
with gr.Column(scale=1, min_width=600):
|
385 |
with gr.Tab(label='Weather Forecast Summary in selected language'):
|
386 |
# Select the language
|
387 |
language = gr.Dropdown(
|
388 |
-
|
389 |
label="Select language")
|
390 |
-
indic_weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary in the selected language is:",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
b_indic_weather_forecast_summary = gr.Button("Get answer in selected language").style(size='sm')
|
392 |
b_indic_weather_forecast_summary.click(fn=dom.click_handler_for_get_indic_translation, inputs=[weather_forecast_summary, language], outputs=[indic_weather_forecast_summary])
|
393 |
|
394 |
with gr.Column(scale=1, min_width=600):
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
400 |
|
401 |
|
402 |
#############################################################################
|
403 |
# Widget to load and process from the custom data source
|
404 |
with gr.Row(visible=False) as rowLoadCustomData:
|
405 |
with gr.Column(scale=1, min_width=600):
|
406 |
-
with gr.Tab(label='Load Custom Data'):
|
407 |
question_category = gr.Dropdown(
|
408 |
constants_utils.INDEX_CATEGORY,
|
409 |
label="Select Query Type")
|
410 |
-
|
411 |
doc_type = gr.Radio(
|
412 |
list(constants_utils.DATA_SOURCES.keys()),
|
413 |
label="Select data source (Supports uploading multiple Files/URLs)",
|
@@ -431,7 +478,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
|
431 |
|
432 |
with gr.Row(visible=False) as rowUploadOnlinePdf:
|
433 |
with gr.Column(scale=1, min_width=600):
|
434 |
-
urls = gr.Textbox(label="Enter URLs for Online PDF (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format)", placeholder='Type the URLs here')
|
435 |
b_urls = gr.Button("Load Online PDFs").style(size='sm')
|
436 |
b_urls.click(
|
437 |
fn=dom.click_handler_for_load_files_urls,
|
@@ -455,7 +502,7 @@ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
|
455 |
|
456 |
with gr.Row(visible=False) as rowUploadUrls:
|
457 |
with gr.Column(scale=1, min_width=600):
|
458 |
-
urls = gr.Textbox(label="Enter URLs (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format)", placeholder='Type the URLs here')
|
459 |
b_urls = gr.Button("Load URLs").style(size='sm')
|
460 |
b_urls.click(
|
461 |
fn=dom.click_handler_for_load_files_urls,
|
|
|
4 |
|
5 |
import src.constants as constants_utils
|
6 |
import src.kkms_kssw as kkms_kssw
|
7 |
+
import src.weather as weather_utils
|
8 |
|
9 |
os.environ["CURL_CA_BUNDLE"] = ""
|
10 |
|
|
|
141 |
return self.indic_translation
|
142 |
|
143 |
|
144 |
+
def click_handler_for_weather_forecast_districts_dropdown_list_update(
|
145 |
+
self,
|
146 |
+
state,
|
147 |
+
district
|
148 |
+
):
|
149 |
+
return gr.update(
|
150 |
+
choices=self.kkms_kssw_obj.weather_utils_obj.get_district_names(state)
|
151 |
+
)
|
152 |
+
|
153 |
+
|
154 |
+
def click_handler_for_weather_forecast_district(
|
155 |
+
self,
|
156 |
+
state,
|
157 |
+
district,
|
158 |
+
weather
|
159 |
+
):
|
160 |
+
return self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
|
161 |
+
|
162 |
+
|
163 |
def _upload_file(self, files):
|
164 |
file_paths = [file.name for file in files]
|
165 |
return file_paths
|
|
|
289 |
with gr.Tab(label='Relevant paragraphs'):
|
290 |
question_category = gr.Dropdown(
|
291 |
constants_utils.INDEX_CATEGORY,
|
292 |
+
label="Select Question Category")
|
293 |
question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
|
294 |
# Get the Relevant paragraphs for the question asked
|
295 |
relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
|
|
|
304 |
with gr.Tab(label='Sources of relevant paragraphs'):
|
305 |
# Get the Sources of relevant paragraphs
|
306 |
sources_relevant_paragraphs = gr.Textbox(label="Sources of relevant paragraphs are:", interactive=False)
|
307 |
+
relevant_paragraphs.change(
|
308 |
+
dom.click_handler_for_relevant_paragraphs_source,
|
309 |
+
relevant_paragraphs,
|
310 |
+
sources_relevant_paragraphs
|
311 |
+
)
|
312 |
|
313 |
+
# Get the exact answer for the question asked from the retrieved Relevant paragraphs
|
314 |
+
with gr.Column(scale=1, min_width=600):
|
315 |
+
with gr.Tab(label='Answer'):
|
316 |
+
answer = gr.Textbox(label="Answer is:", value=dom.answer, interactive=False)
|
317 |
+
relevant_paragraphs.change(
|
318 |
+
dom.click_handler_for_get_answer,
|
319 |
+
[relevant_paragraphs, question],
|
320 |
+
answer
|
321 |
+
)
|
322 |
+
|
323 |
+
# Covert the answer to Indian language
|
324 |
+
with gr.Column(scale=1, min_width=600):
|
325 |
+
with gr.Tab(label='Answer in selected language'):
|
326 |
+
# Select the language
|
327 |
+
language = gr.Dropdown(
|
328 |
+
list(constants_utils.INDIC_LANGUAGE.keys()),
|
329 |
+
label="Select language")
|
330 |
+
indic_lang_answer = gr.Textbox(label="Answer in the selected language is:", interactive=False)
|
331 |
+
answer.change(
|
332 |
+
dom.click_handler_for_get_indic_translation,
|
333 |
+
answer,
|
334 |
+
indic_lang_answer
|
335 |
+
)
|
336 |
+
b_indic_lang_answer = gr.Button("Get answer in selected language").style(size='sm')
|
337 |
+
b_indic_lang_answer.click(fn=dom.click_handler_for_get_indic_translation, inputs=[answer, language], outputs=[indic_lang_answer])
|
338 |
|
339 |
|
340 |
#############################################################################
|
|
|
385 |
label="Select state"
|
386 |
)
|
387 |
|
388 |
+
# Select District
|
389 |
+
district = gr.Dropdown(
|
390 |
+
choices=[],
|
391 |
+
label="Select District"
|
392 |
+
)
|
393 |
|
394 |
+
# Get districts of the selected state
|
395 |
+
state.change(
|
396 |
+
dom.click_handler_for_weather_forecast_districts_dropdown_list_update,
|
397 |
+
state,
|
398 |
+
district
|
399 |
+
)
|
400 |
+
|
401 |
+
# Get weather forecast on district selection event
|
402 |
+
district_weather = gr.Textbox(label=f"Weather forecast is:", interactive=False)
|
403 |
+
district.change(
|
404 |
+
dom.click_handler_for_weather_forecast_district,
|
405 |
+
[state, district],
|
406 |
+
district_weather
|
407 |
+
)
|
408 |
+
|
409 |
with gr.Column(scale=1, min_width=600):
|
410 |
with gr.Tab(label='Weather Forecast Summary'):
|
411 |
# Get the summary of the weather forecast
|
412 |
+
weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:", interactive=False)
|
413 |
+
district.change(
|
414 |
+
dom.click_handler_for_weather_forecast_summary,
|
415 |
+
district_weather,
|
416 |
+
weather_forecast_summary
|
417 |
+
)
|
418 |
|
419 |
# Covert the weather forcast summary in Indian language
|
420 |
with gr.Column(scale=1, min_width=600):
|
421 |
with gr.Tab(label='Weather Forecast Summary in selected language'):
|
422 |
# Select the language
|
423 |
language = gr.Dropdown(
|
424 |
+
list(constants_utils.INDIC_LANGUAGE.keys()),
|
425 |
label="Select language")
|
426 |
+
indic_weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary in the selected language is:", interactive=False)
|
427 |
+
|
428 |
+
# By default display weather forecast summary in Hindi. User can change it later on.
|
429 |
+
weather_forecast_summary.change(
|
430 |
+
dom.click_handler_for_get_indic_translation,
|
431 |
+
weather_forecast_summary,
|
432 |
+
indic_weather_forecast_summary
|
433 |
+
)
|
434 |
+
|
435 |
+
# User can get the weather forecast summary in their preferred language as well
|
436 |
b_indic_weather_forecast_summary = gr.Button("Get answer in selected language").style(size='sm')
|
437 |
b_indic_weather_forecast_summary.click(fn=dom.click_handler_for_get_indic_translation, inputs=[weather_forecast_summary, language], outputs=[indic_weather_forecast_summary])
|
438 |
|
439 |
with gr.Column(scale=1, min_width=600):
|
440 |
+
with gr.Tab(label='Weather Info'):
|
441 |
+
weather = gr.Textbox(label=f"Current weather is:", interactive=False)
|
442 |
+
district.change(
|
443 |
+
dom.click_handler_for_get_weather,
|
444 |
+
district,
|
445 |
+
weather
|
446 |
+
)
|
447 |
|
448 |
|
449 |
#############################################################################
|
450 |
# Widget to load and process from the custom data source
|
451 |
with gr.Row(visible=False) as rowLoadCustomData:
|
452 |
with gr.Column(scale=1, min_width=600):
|
453 |
+
with gr.Tab(label='Load Custom Data (Do not upload data from the same file/url again. Once it is uploaded, it gets stored forever.)'):
|
454 |
question_category = gr.Dropdown(
|
455 |
constants_utils.INDEX_CATEGORY,
|
456 |
label="Select Query Type")
|
457 |
+
|
458 |
doc_type = gr.Radio(
|
459 |
list(constants_utils.DATA_SOURCES.keys()),
|
460 |
label="Select data source (Supports uploading multiple Files/URLs)",
|
|
|
478 |
|
479 |
with gr.Row(visible=False) as rowUploadOnlinePdf:
|
480 |
with gr.Column(scale=1, min_width=600):
|
481 |
+
urls = gr.Textbox(label="Enter URLs for Online PDF (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format.)", placeholder='Type the URLs here')
|
482 |
b_urls = gr.Button("Load Online PDFs").style(size='sm')
|
483 |
b_urls.click(
|
484 |
fn=dom.click_handler_for_load_files_urls,
|
|
|
502 |
|
503 |
with gr.Row(visible=False) as rowUploadUrls:
|
504 |
with gr.Column(scale=1, min_width=600):
|
505 |
+
urls = gr.Textbox(label="Enter URLs (Supports uploading from multiple URLs. Enter the URLs in comma (,) separated format.)", placeholder='Type the URLs here')
|
506 |
b_urls = gr.Button("Load URLs").style(size='sm')
|
507 |
b_urls.click(
|
508 |
fn=dom.click_handler_for_load_files_urls,
|
requirements.txt
CHANGED
@@ -18,4 +18,5 @@ tiktoken
|
|
18 |
googletrans==3.1.0a0
|
19 |
BeautifulSoup4
|
20 |
pypdf
|
21 |
-
PyPDF2
|
|
|
|
18 |
googletrans==3.1.0a0
|
19 |
BeautifulSoup4
|
20 |
pypdf
|
21 |
+
PyPDF2
|
22 |
+
html2text
|
src/constants.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import src.web_crawler as web_crawler_utils
|
|
|
3 |
|
4 |
LOAD_FROM_EXISTING_INDEX_STORE = False
|
5 |
INDEX_TYPE = 'FAISS'
|
@@ -17,10 +18,10 @@ if not os.path.exists(OUTPUT_PATH):
|
|
17 |
INDEX_CATEGORY = [
|
18 |
'crops',
|
19 |
'fruits',
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
'general',
|
25 |
'vegetables'
|
26 |
]
|
@@ -28,25 +29,27 @@ INDEX_CATEGORY = [
|
|
28 |
# Doctype of the master index of each index category. Master index for each index category would be stored under this key.
|
29 |
INDEX_CATEGORY_MASTER_INDEX_DOC_TYPE = 'master'
|
30 |
|
31 |
-
# Output index name if creating the index/vector store using GPTSimpleVectorIndex
|
32 |
-
INDEX_FILENAME = os.path.join(OUTPUT_PATH, 'index.json')
|
33 |
-
|
34 |
# List of data sources/types & from where to load the data and create the index/vector store
|
35 |
# 2nd item is the type of source from where the data would be loaded. Currently it could come from either a file or URL.
|
36 |
DATA_SOURCES = {
|
37 |
'PDF': 'pdf',
|
38 |
'Text File': 'textfile',
|
39 |
'Online PDF': 'online_pdf', # web_crawler_utils.get_ipm_packages_pdfs_urls()[:1]
|
40 |
-
|
41 |
}
|
42 |
|
43 |
# LangChain related constants
|
|
|
|
|
|
|
44 |
TEXT_SPLITTER_CHUNK_SIZE = 1000
|
45 |
TEXT_SPLITTER_CHUNK_OVERLAP = 0
|
46 |
TEXT_SPLITTER_SEPARATOR = '\n\n'
|
47 |
|
48 |
|
49 |
URLS = [
|
|
|
|
|
50 |
'https://agricoop.nic.in/#gsc.tab=0',
|
51 |
|
52 |
'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
|
@@ -120,44 +123,48 @@ MANDI_PRICE_STATES = [
|
|
120 |
]
|
121 |
|
122 |
# State list used in the Weather forecast widget dropdown list
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
'
|
128 |
-
'
|
129 |
-
'
|
130 |
-
'
|
131 |
-
'
|
132 |
-
'
|
133 |
-
'
|
134 |
-
'
|
135 |
-
'
|
136 |
-
'
|
137 |
-
'
|
138 |
-
'
|
139 |
-
'
|
140 |
-
|
141 |
-
'
|
142 |
-
'
|
143 |
-
'
|
144 |
-
'
|
145 |
-
'
|
146 |
-
'
|
147 |
-
'
|
148 |
-
'
|
149 |
-
'
|
150 |
-
'
|
151 |
-
'
|
152 |
-
'
|
153 |
-
'
|
154 |
-
'
|
155 |
-
'
|
156 |
-
'
|
157 |
-
'
|
158 |
-
'
|
159 |
-
'
|
160 |
-
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# LIST OF PESTICIDES WHICH ARE BANNED AND RESTRICTED USE (List created from: https://pib.gov.in/PressReleaseIframePage.aspx?PRID=1896140)
|
163 |
BANNED_PESTICIDES_FORMULATIONS = [
|
|
|
1 |
import os
|
2 |
import src.web_crawler as web_crawler_utils
|
3 |
+
import src.weather as weather_utils
|
4 |
|
5 |
LOAD_FROM_EXISTING_INDEX_STORE = False
|
6 |
INDEX_TYPE = 'FAISS'
|
|
|
18 |
INDEX_CATEGORY = [
|
19 |
'crops',
|
20 |
'fruits',
|
21 |
+
'pest_management',
|
22 |
+
'govt_policy',
|
23 |
+
'insurance',
|
24 |
+
'soil',
|
25 |
'general',
|
26 |
'vegetables'
|
27 |
]
|
|
|
29 |
# Doctype of the master index of each index category. Master index for each index category would be stored under this key.
|
30 |
INDEX_CATEGORY_MASTER_INDEX_DOC_TYPE = 'master'
|
31 |
|
|
|
|
|
|
|
32 |
# List of data sources/types & from where to load the data and create the index/vector store
|
33 |
# 2nd item is the type of source from where the data would be loaded. Currently it could come from either a file or URL.
|
34 |
DATA_SOURCES = {
|
35 |
'PDF': 'pdf',
|
36 |
'Text File': 'textfile',
|
37 |
'Online PDF': 'online_pdf', # web_crawler_utils.get_ipm_packages_pdfs_urls()[:1]
|
38 |
+
'URLs': 'urls',
|
39 |
}
|
40 |
|
41 |
# LangChain related constants
|
42 |
+
SIMILARITY_TOP_K = 1
|
43 |
+
MODE = 'embedding'
|
44 |
+
RESPONSE_MODE = 'default'
|
45 |
TEXT_SPLITTER_CHUNK_SIZE = 1000
|
46 |
TEXT_SPLITTER_CHUNK_OVERLAP = 0
|
47 |
TEXT_SPLITTER_SEPARATOR = '\n\n'
|
48 |
|
49 |
|
50 |
URLS = [
|
51 |
+
# Govt. Schemes
|
52 |
+
'https://agricoop.nic.in/en/Major#gsc.tab=0'
|
53 |
'https://agricoop.nic.in/#gsc.tab=0',
|
54 |
|
55 |
'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
|
|
|
123 |
]
|
124 |
|
125 |
# State list used in the Weather forecast widget dropdown list
|
126 |
+
weather_utils_obj = weather_utils.WEATHER()
|
127 |
+
WEATHER_FORECAST_STATES = weather_utils_obj.get_state_names()
|
128 |
+
|
129 |
+
WEATHER_FORECAST_STATE_CODES = {
|
130 |
+
'Andaman-Nicobar': '01',
|
131 |
+
'Andhra-Pradesh': '02',
|
132 |
+
'Arunachal-Pradesh': '03',
|
133 |
+
'Assam': '04',
|
134 |
+
'Bihar': '05',
|
135 |
+
'Chandigarh': '06',
|
136 |
+
'Chhattisgarh': '07',
|
137 |
+
'Dadra-and-Nagar-Haveli': '08',
|
138 |
+
'Daman-and-Diu': '09',
|
139 |
+
'Delhi': '10',
|
140 |
+
'Goa': '11',
|
141 |
+
'Gujarat': '12',
|
142 |
+
'Haryana': '13',
|
143 |
+
# 14
|
144 |
+
'Himachal-Pradesh': '15',
|
145 |
+
'Jammu-Kashmir': '16',
|
146 |
+
'Jharkhand': '17',
|
147 |
+
'Karnataka': '18',
|
148 |
+
'Kerala': '19',
|
149 |
+
'Lakshadweep': '20',
|
150 |
+
'Madhya-Pradesh': '21',
|
151 |
+
'Maharashtra': '22',
|
152 |
+
'Manipur': '23',
|
153 |
+
'Meghalaya': '24',
|
154 |
+
'Mizoram': '25',
|
155 |
+
'Nagaland': '26',
|
156 |
+
'Odisha': '27',
|
157 |
+
'Pondicherry': '28',
|
158 |
+
'Punjab': '29',
|
159 |
+
'Rajasthan': '30',
|
160 |
+
'Sikkim': '31',
|
161 |
+
'Tamilnadu': '32',
|
162 |
+
'Telangana': '33',
|
163 |
+
'Tripura': '34',
|
164 |
+
'Uttar-Pradesh': '35',
|
165 |
+
'Uttarakhand': '36',
|
166 |
+
'West-Bengal': '37',
|
167 |
+
}
|
168 |
|
169 |
# LIST OF PESTICIDES WHICH ARE BANNED AND RESTRICTED USE (List created from: https://pib.gov.in/PressReleaseIframePage.aspx?PRID=1896140)
|
170 |
BANNED_PESTICIDES_FORMULATIONS = [
|
src/data_loader.py
CHANGED
@@ -4,11 +4,12 @@ import pandas as pd
|
|
4 |
from pathlib import Path
|
5 |
import glob
|
6 |
|
7 |
-
from llama_index import GPTSimpleVectorIndex, download_loader, SimpleDirectoryReader
|
8 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
9 |
from langchain.agents import initialize_agent, Tool
|
10 |
from langchain.llms import OpenAI
|
11 |
from langchain.chains.conversation.memory import ConversationBufferMemory
|
|
|
12 |
|
13 |
import src.utils as utils
|
14 |
|
@@ -115,10 +116,10 @@ class DATA_LOADER:
|
|
115 |
urls=urls,
|
116 |
url_type=doc_type
|
117 |
)
|
118 |
-
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
|
119 |
-
loader = BeautifulSoupWebReader()
|
120 |
# Load data from URLs
|
121 |
-
|
|
|
|
|
122 |
|
123 |
# Load data from text file(s)
|
124 |
elif doc_type == 'textfile':
|
|
|
4 |
from pathlib import Path
|
5 |
import glob
|
6 |
|
7 |
+
from llama_index import GPTSimpleVectorIndex, download_loader, SimpleDirectoryReader, SimpleWebPageReader
|
8 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
9 |
from langchain.agents import initialize_agent, Tool
|
10 |
from langchain.llms import OpenAI
|
11 |
from langchain.chains.conversation.memory import ConversationBufferMemory
|
12 |
+
from langchain.docstore.document import Document
|
13 |
|
14 |
import src.utils as utils
|
15 |
|
|
|
116 |
urls=urls,
|
117 |
url_type=doc_type
|
118 |
)
|
|
|
|
|
119 |
# Load data from URLs
|
120 |
+
docs = SimpleWebPageReader(html_to_text=True).load_data(valid_urls)
|
121 |
+
docs = [Document(page_content=doc.text) for doc in docs]
|
122 |
+
documents.extend(docs)
|
123 |
|
124 |
# Load data from text file(s)
|
125 |
elif doc_type == 'textfile':
|
src/langchain_utils.py
CHANGED
@@ -645,9 +645,9 @@ class LANGCHAIN_UTILS:
|
|
645 |
def query(self,
|
646 |
question,
|
647 |
question_category,
|
648 |
-
mode=
|
649 |
-
response_mode=
|
650 |
-
similarity_top_k=
|
651 |
required_keywords=[],
|
652 |
exclude_keywords=[],
|
653 |
verbose=False
|
@@ -759,7 +759,7 @@ class LANGCHAIN_UTILS:
|
|
759 |
files_or_urls,
|
760 |
index_category
|
761 |
):
|
762 |
-
logger.info(f'Uploading data for: {index_category}
|
763 |
|
764 |
self.documents = []
|
765 |
self.index = None
|
|
|
645 |
def query(self,
|
646 |
question,
|
647 |
question_category,
|
648 |
+
mode=constants_utils.MODE,
|
649 |
+
response_mode=constants_utils.RESPONSE_MODE,
|
650 |
+
similarity_top_k=constants_utils.SIMILARITY_TOP_K,
|
651 |
required_keywords=[],
|
652 |
exclude_keywords=[],
|
653 |
verbose=False
|
|
|
759 |
files_or_urls,
|
760 |
index_category
|
761 |
):
|
762 |
+
logger.info(f'Uploading data for: {index_category}; from: {doc_type}')
|
763 |
|
764 |
self.documents = []
|
765 |
self.index = None
|
src/weather.py
CHANGED
@@ -1,178 +1,61 @@
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup as bs
|
|
|
3 |
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
'Arunachal-Pradesh': '03',
|
9 |
-
'Assam': '04',
|
10 |
-
'Bihar': '05',
|
11 |
-
'Chandigarh': '06',
|
12 |
-
'Chhattisgarh': '07',
|
13 |
-
'Dadra-and-Nagar-Haveli': '08',
|
14 |
-
'Daman-and-Diu': '09',
|
15 |
-
'Delhi': '10',
|
16 |
-
'Goa': '11',
|
17 |
-
'Gujarat': '12',
|
18 |
-
'Haryana': '13',
|
19 |
-
# 14
|
20 |
-
'Himachal-Pradesh': '15',
|
21 |
-
'Jammu-Kashmir': '16',
|
22 |
-
'Jharkhand': '17',
|
23 |
-
'Karnataka': '18',
|
24 |
-
'Kerala': '19',
|
25 |
-
'Lakshadweep': '20',
|
26 |
-
'Madhya-Pradesh': '21',
|
27 |
-
'Maharashtra': '22',
|
28 |
-
'Manipur': '23',
|
29 |
-
'Meghalaya': '24',
|
30 |
-
'Mizoram': '25',
|
31 |
-
'Nagaland': '26',
|
32 |
-
'Odisha': '27',
|
33 |
-
'Pondicherry': '28',
|
34 |
-
'Punjab': '29',
|
35 |
-
'Rajasthan': '30',
|
36 |
-
'Sikkim': '31',
|
37 |
-
'Tamilnadu': '32',
|
38 |
-
'Telangana': '33',
|
39 |
-
'Tripura': '34',
|
40 |
-
'Uttar-Pradesh': '35',
|
41 |
-
'Uttarakhand': '36',
|
42 |
-
'West-Bengal': '37',
|
43 |
-
}
|
44 |
-
|
45 |
-
# List of states that are given as the input selection to https://nwp.imd.gov.in/blf/blf_temp/ to get the weather forecast
|
46 |
-
STATES = {
|
47 |
-
'Andaman-Nicobar': {},
|
48 |
-
|
49 |
-
'Andhra-Pradesh': {},
|
50 |
-
|
51 |
-
'Arunachal-Pradesh': {},
|
52 |
-
|
53 |
-
'Assam': {},
|
54 |
-
|
55 |
-
'Bihar': {},
|
56 |
-
|
57 |
-
'Chandigarh': {},
|
58 |
-
|
59 |
-
'Chhattisgarh': {},
|
60 |
-
|
61 |
-
'Dadra-and-Nagar-Haveli': {},
|
62 |
-
|
63 |
-
'Daman-and-Diu': {},
|
64 |
-
|
65 |
-
'Delhi': {
|
66 |
-
'CENTRAL-DELHI': ['CENTRAL-DELHI'],
|
67 |
-
'EAST-DELHI': ['EAST-DELHI'],
|
68 |
-
'NEW-DELHI': ['NEW-DELHI'],
|
69 |
-
'NORTH-DELHI': ['NORTH-DELHI'],
|
70 |
-
'NORTH-EAST-DELHI': ['NORTH-EAST-DELHI'],
|
71 |
-
'NORTH-WEST-DELHI': ['NORTH-WEST-DELHI'],
|
72 |
-
'SHAHDARA': ['SHAHDARA'],
|
73 |
-
'SOUTH-DELHI': ['SOUTH-DELHI'],
|
74 |
-
'SOUTH-EAST-DELHI': ['SOUTH-EAST-DELHI'],
|
75 |
-
'SOUTH-WEST-DELHI': ['SOUTH-WEST-DELHI'],
|
76 |
-
'WEST-DELHI': ['WEST-DELHI'],
|
77 |
-
},
|
78 |
-
|
79 |
-
'Goa': {},
|
80 |
-
|
81 |
-
'Gujarat': {
|
82 |
-
'AHMADABAD': ['AHMEDABAD-CITY', 'BAVLA', 'DASKROI', 'DETROJ-RAMPURA', 'DHANDHUKA', 'DHOLERA', 'DHOLKA', 'MANDAL', 'SANAND', 'VIRAMGAM'],
|
83 |
-
'AMRELI': ['AMRELI', 'BABRA', 'BAGASARA', 'DHARI', 'JAFRABAD', 'KHAMBHA', 'KUNKAVAV-VADIA', 'LATHI', 'LILIA', 'RAJULA', 'SAVERKUNDLA'],
|
84 |
-
'ANAND': [],
|
85 |
-
'ARVALLI': [],
|
86 |
-
'BANASKANTHA': [],
|
87 |
-
'BHARUCH': [],
|
88 |
-
'BHAVNAGAR': [],
|
89 |
-
'BOTAD': [],
|
90 |
-
'CHHOTAUDEPUR': [],
|
91 |
-
'DANG': [],
|
92 |
-
'DEVBHUMI-DWARKA': [],
|
93 |
-
'DOHAD': [],
|
94 |
-
'GANDHINAGAR': [],
|
95 |
-
'GIR-SOMNATH': [],
|
96 |
-
'JAMNAGAR': [],
|
97 |
-
'JUNAGADH': [],
|
98 |
-
'KACHCHH': [],
|
99 |
-
'KHEDA': [],
|
100 |
-
'MAHESANA': [],
|
101 |
-
'MAHISAGAR': [],
|
102 |
-
'MORBI': [],
|
103 |
-
'NARMADA': [],
|
104 |
-
'NAVSARI': [],
|
105 |
-
'PANCH-MAHALS': [],
|
106 |
-
'PATAN': [],
|
107 |
-
'PORBANDAR': [],
|
108 |
-
'RAJKOT': [],
|
109 |
-
'SABAR-KANTHA': [],
|
110 |
-
'SURAT': ['BARDOLI', 'CHORASI', 'KAMREJ', 'MAHUVA', 'MANDVI', 'MANGROL', 'OLPAD', 'PALSANA', 'SURAT-CITY', 'UMARPADA'],
|
111 |
-
'SURENDRANAGAR': [],
|
112 |
-
'TAPI': [],
|
113 |
-
'VADODARA': [],
|
114 |
-
'VALSAD': [],
|
115 |
-
},
|
116 |
-
|
117 |
-
'Haryana': {},
|
118 |
-
|
119 |
-
'Himachal-Pradesh': {},
|
120 |
-
|
121 |
-
'Jammu-Kashmir': {},
|
122 |
-
|
123 |
-
'Jharkhand': {},
|
124 |
-
|
125 |
-
'Karnataka': {},
|
126 |
-
|
127 |
-
'Kerala': {},
|
128 |
-
|
129 |
-
'Lakshadweep': {},
|
130 |
-
|
131 |
-
'Madhya-Pradesh': {},
|
132 |
-
|
133 |
-
'Maharashtra': {},
|
134 |
-
|
135 |
-
'Manipur': {},
|
136 |
-
|
137 |
-
'Meghalaya': {},
|
138 |
-
|
139 |
-
'Mizoram': {},
|
140 |
-
|
141 |
-
'Nagaland': {},
|
142 |
-
|
143 |
-
'Odisha': {},
|
144 |
-
|
145 |
-
'Pondicherry': {},
|
146 |
-
|
147 |
-
'Punjab': {},
|
148 |
-
|
149 |
-
'Rajasthan': {},
|
150 |
-
|
151 |
-
'Sikkim': {},
|
152 |
-
|
153 |
-
'Tamilnadu': {},
|
154 |
-
|
155 |
-
'Telangana': {},
|
156 |
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
'Uttar-Pradesh': {},
|
160 |
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
-
|
164 |
-
}
|
|
|
|
|
165 |
|
166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
self.
|
|
|
|
|
171 |
|
172 |
|
173 |
# Weather forecast from Govt. website
|
174 |
-
def get_weather_forecast(
|
175 |
-
self
|
|
|
|
|
|
|
|
|
|
|
176 |
self.block_url = f'{self.base_url}/table2.php'
|
177 |
|
178 |
response = requests.get(self.district_url if not is_block_level else self.block_url)
|
@@ -182,7 +65,10 @@ class WEATHER:
|
|
182 |
|
183 |
|
184 |
# Weather using Google weather API
|
185 |
-
def get_weather(
|
|
|
|
|
|
|
186 |
city = city + " weather"
|
187 |
city = city.replace(" ", "+")
|
188 |
|
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup as bs
|
3 |
+
import src.constants as constants_utils
|
4 |
|
5 |
|
6 |
+
class WEATHER:
|
7 |
+
def __init__(self):
|
8 |
+
self.base_url = 'https://nwp.imd.gov.in/blf/blf_temp'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
self.states = []
|
11 |
+
self.districts = []
|
12 |
+
self.states_districts = dict(
|
13 |
+
(ds, None) for ds in list(constants_utils.DATA_SOURCES.values()))
|
14 |
+
|
15 |
+
self.headers = {
|
16 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
17 |
+
}
|
18 |
|
|
|
19 |
|
20 |
+
def get_state_names(
|
21 |
+
self
|
22 |
+
):
|
23 |
+
response = requests.get(
|
24 |
+
self.base_url,
|
25 |
+
headers=self.headers,
|
26 |
+
)
|
27 |
|
28 |
+
soup = bs(response.text, 'html.parser')
|
29 |
+
self.states = soup.findAll('select', {'onchange': 'window.location.href=this.value'}, limit=None)
|
30 |
+
self.states = [state.strip() for state in self.states[0].text.split('\n') if state and state != 'Select']
|
31 |
+
return self.states
|
32 |
|
33 |
|
34 |
+
def get_district_names(
|
35 |
+
self,
|
36 |
+
state_name
|
37 |
+
):
|
38 |
+
url = f"{self.base_url}/dis.php?value={constants_utils.WEATHER_FORECAST_STATE_CODES.get(state_name, '') + state_name}"
|
39 |
+
response = requests.get(
|
40 |
+
url,
|
41 |
+
headers=self.headers,
|
42 |
+
)
|
43 |
|
44 |
+
soup = bs(response.text, 'html.parser')
|
45 |
+
self.districts = soup.findAll('select', {'name': 'dis'}, limit=None)
|
46 |
+
self.districts = [district.strip() for district in self.districts[0].text.split('\n') if district and district != 'Select']
|
47 |
+
# self.districts = [district for district in self.districts[0].text.split('\n\n') if district]
|
48 |
+
return self.districts
|
49 |
|
50 |
|
51 |
# Weather forecast from Govt. website
|
52 |
+
def get_weather_forecast(
|
53 |
+
self,
|
54 |
+
state,
|
55 |
+
district,
|
56 |
+
is_block_level=False
|
57 |
+
):
|
58 |
+
self.district_url = f"{self.base_url}/block.php?dis={constants_utils.WEATHER_FORECAST_STATE_CODES.get(state, '') + district}"
|
59 |
self.block_url = f'{self.base_url}/table2.php'
|
60 |
|
61 |
response = requests.get(self.district_url if not is_block_level else self.block_url)
|
|
|
65 |
|
66 |
|
67 |
# Weather using Google weather API
|
68 |
+
def get_weather(
|
69 |
+
self,
|
70 |
+
city
|
71 |
+
):
|
72 |
city = city + " weather"
|
73 |
city = city.replace(" ", "+")
|
74 |
|