Chintan Donda commited on
Commit
a447435
1 Parent(s): e52b65f

Application file, KKMS repo and utils files

Browse files
app.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import os
4
+ import datetime
5
+ import kkms_kssw as kkms_kssw
6
+ import utils.constants as constants_utils
7
+
8
+
9
+ # Create and launch Gradio Web UI
10
+ class DomState:
11
+ def __init__(self):
12
+ self.relevant_paragraphs = ''
13
+ self.answer = ''
14
+ self.summary = ''
15
+ self.mandi_price = ''
16
+ self.mandi_from_date = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime('%Y-%m-%d')
17
+ self.mandi_to_date = datetime.datetime.now().strftime('%Y-%m-%d')
18
+ self.weather_info = ''
19
+ self.weather_forecast = ''
20
+ self.weather_forecast_summary = ''
21
+ self.indic_lang_answer = ''
22
+
23
+
24
+ # Initialize index (vector store)
25
+ self.kkms_kssw_obj = kkms_kssw.KKMS_KSSW()
26
+ self.kkms_kssw_obj.initialize_index(constants_utils.INDEX_FILENAME, index_type='GPTSimpleVectorIndex')
27
+
28
+
29
+ def click_handler_for_get_relevant_paragraphs(self,
30
+ question,
31
+ mode='default',
32
+ response_mode='default',
33
+ similarity_top_k=2,
34
+ ):
35
+ self.relevant_paragraphs = self.kkms_kssw_obj.query(question,
36
+ mode=mode,
37
+ response_mode=response_mode,
38
+ similarity_top_k=similarity_top_k,
39
+ # required_keywords=required_keywords_list,
40
+ # exclude_keywords=exclude_keywords_list,
41
+ )
42
+ return self.relevant_paragraphs
43
+
44
+
45
+ def click_handler_for_summary(self, answer):
46
+ self.sumamry = self.kkms_kssw_obj.langchain_utils_obj.get_textual_summary(answer)
47
+ return self.sumamry
48
+
49
+
50
+ def click_handler_for_get_answer(self,
51
+ relevant_paragraphs,
52
+ question
53
+ ):
54
+ self.answer = self.kkms_kssw_obj.langchain_utils_obj.get_answer_from_para(relevant_paragraphs, question)
55
+ return self.answer
56
+
57
+
58
+ def click_handler_for_mandi_price(self,
59
+ state_name,
60
+ apmc_name,
61
+ commodity_name,
62
+ from_date,
63
+ to_date
64
+ ):
65
+ if state_name and apmc_name and commodity_name and from_date and to_date:
66
+ self.mandi_price = self.kkms_kssw_obj.mandi_utils_obj.get_mandi_price(state_name, apmc_name, commodity_name, from_date, to_date)
67
+ return self.mandi_price
68
+
69
+
70
+ def click_handler_for_get_weather(self, city):
71
+ time, info, temperature = self.kkms_kssw_obj.weather_utils_obj.get_weather(city)
72
+ self.weather_info = f'Weather in {city.capitalize()} on {time} is {temperature} with {info}.'
73
+ return self.weather_info
74
+
75
+
76
+ def click_handler_for_get_weather_forecast(self, state, district):
77
+ self.weather_forecast = self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
78
+ return self.weather_forecast
79
+
80
+
81
+ def click_handler_for_weather_forecast_summary(self, weather_forecast):
82
+ self.weather_forecast_summary = self.kkms_kssw_obj.langchain_utils_obj.get_weather_forecast_summary(weather_forecast)
83
+ return self.weather_forecast_summary
84
+
85
+
86
+ def click_handler_for_get_indic_answer(self, eng_ans, language='Hindi'):
87
+ self.indic_lang_answer = self.kkms_kssw_obj.translator_utils_obj.get_indic_google_translate(eng_ans, language)
88
+ return self.indic_lang_answer
89
+
90
+
91
+ def select_widget(self, choice):
92
+ if choice == "General":
93
+ return [
94
+ gr.update(visible=True),
95
+ gr.update(visible=False),
96
+ gr.update(visible=False)
97
+ ]
98
+
99
+ elif choice == "Mandi Price":
100
+ return [
101
+ gr.update(visible=False),
102
+ gr.update(visible=True),
103
+ gr.update(visible=False)
104
+ ]
105
+
106
+ elif choice == "Weather":
107
+ return [
108
+ gr.update(visible=False),
109
+ gr.update(visible=False),
110
+ gr.update(visible=True)
111
+ ]
112
+
113
+ else:
114
+ return gr.update(visible=False)
115
+
116
+
117
+ with gr.Blocks(title='KKMS-KSSW Demo') as demo:
118
+ dom = DomState()
119
+
120
+ radio = gr.Radio(
121
+ ["General", "Mandi Price", "Weather"], label="Query related to"
122
+ )
123
+
124
+ ########################### Widget for Govt. Policy #################################################
125
+ with gr.Row(visible=True) as rowGeneral:
126
+ with gr.Column(scale=1, min_width=600):
127
+ with gr.Tab(label='Relevant paragraphs'):
128
+ question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
129
+ # Get the Relevant paragraphs for the question asked
130
+ relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
131
+ b_relevant_paragraphs = gr.Button("Get Relevant paragraphs").style(size='sm')
132
+ b_relevant_paragraphs.click(fn=dom.click_handler_for_get_relevant_paragraphs, inputs=question, outputs=[relevant_paragraphs])
133
+
134
+ with gr.Column(scale=1, min_width=600):
135
+ with gr.Tab(label='Extractive Summary'):
136
+ # Get the extractive text summary from the retrieved Relevant paragraphs
137
+ summary = gr.Textbox(label="Extractive Summary is:", value=dom.summary, interactive=False)
138
+ b_summary = gr.Button("Extract Summary").style(size='sm')
139
+ b_summary.click(fn=dom.click_handler_for_summary, inputs=relevant_paragraphs, outputs=[summary])
140
+
141
+ # Get the exact answer for the question asked from the retrieved Relevant paragraphs
142
+ with gr.Row():
143
+ with gr.Column(scale=1, min_width=600):
144
+ with gr.Tab(label='Answer'):
145
+ answer = gr.Textbox(label="Answer is:", value=dom.answer, interactive=False)
146
+ b_answer = gr.Button("Get Answer").style(size='sm')
147
+ b_answer.click(fn=dom.click_handler_for_get_answer, inputs=[relevant_paragraphs, question], outputs=[answer])
148
+
149
+ # Covert the answer to Indian language
150
+ with gr.Row():
151
+ with gr.Column(scale=1, min_width=600):
152
+ with gr.Tab(label='Answer in selected language'):
153
+ # Select the language
154
+ language = gr.Dropdown(
155
+ ['English', 'Hindi', 'Gujarati', 'Marathi', 'Kannada', 'Bengali', 'Panjabi', 'Telugu', 'Tamil', 'Malayalam'],
156
+ label="Select language")
157
+ indic_lang_answer = gr.Textbox(label="Answer in the selected language is:", value=dom.indic_lang_answer, interactive=False)
158
+ b_indic_lang_answer = gr.Button("Get answer in selected language").style(size='sm')
159
+ b_indic_lang_answer.click(fn=dom.click_handler_for_get_indic_answer, inputs=[answer, language], outputs=[indic_lang_answer])
160
+
161
+
162
+ ########################## Widget for Mandi Price ###################################################
163
+ with gr.Row(visible=False) as rowMandiPrice:
164
+ with gr.Column(scale=1, min_width=600):
165
+ # Select State
166
+ state_name = gr.Dropdown(['ANDAMAN AND NICOBAR ISLANDS', 'ANDHRA PRADESH', 'ASSAM', 'BIHAR', 'CHANDIGARH', 'CHHATTISGARH', 'GOA', 'GUJARAT', 'HARYANA', 'HIMACHAL PRADESH', 'JAMMU AND KASHMIR', 'JHARKHAND', 'KARNATAKA', 'KERALA', 'MADHYA PRADESH', 'MAHARASHTRA', 'NAGALAND', 'ODISHA', 'PUDUCHERRY', 'PUNJAB', 'RAJASTHAN', 'TAMIL NADU', 'TELANGANA', 'TRIPURA', 'UTTAR PRADESH', 'UTTARAKHAND', 'WEST BENGAL'], label="Select state")
167
+
168
+ # APMC name
169
+ apmc_name = gr.Textbox(label="Enter APMC name", placeholder='Type the APMC name here')
170
+
171
+ # APMC name
172
+ commodity_name = gr.Textbox(label="Enter Commodity name", placeholder='Type the Commodity name here')
173
+
174
+ # From/To date in yyyy-mm-dd format
175
+ from_date = gr.Textbox(label="From date?", value=dom.mandi_from_date, placeholder='Please enter the From date here in yyyy-mm-dd format')
176
+ to_date = gr.Textbox(label="To date?", value=dom.mandi_to_date, placeholder='Please enter the To date here in yyyy-mm-dd format')
177
+
178
+ with gr.Column(scale=1, min_width=600):
179
+ mandi_price = gr.Textbox(label=f"Mandi Price is:", value=dom.mandi_price, interactive=False)
180
+ b_summary = gr.Button("Get Mandi Price").style(size='sm')
181
+ b_summary.click(fn=dom.click_handler_for_mandi_price, inputs=[state_name, apmc_name, commodity_name, from_date, to_date], outputs=[mandi_price])
182
+
183
+
184
+ ########################## Widget for Weather Info ###################################################
185
+ with gr.Row(visible=False) as rowWeather:
186
+ with gr.Column(scale=1, min_width=600):
187
+ with gr.Tab(label='Weather Info'):
188
+ city = gr.Textbox(label="Enter city name", placeholder='Type the city name here')
189
+ weather = gr.Textbox(label=f"Current weather is:", value=dom.weather_info, interactive=False)
190
+ b_weather = gr.Button("Get weather info").style(size='sm')
191
+ b_weather.click(fn=dom.click_handler_for_get_weather, inputs=city, outputs=[weather])
192
+
193
+ ########### Weather Forecast ###########
194
+ with gr.Column(scale=1, min_width=600):
195
+ with gr.Tab(label='Weather Forecast for next 5 days'):
196
+ # Select the State
197
+ state = gr.Dropdown(
198
+ ['Andaman-Nicobar', 'Andhra-Pradesh', 'Arunachal-Pradesh', 'Assam', 'Bihar', 'Chandigarh', 'Chhattisgarh', 'Dadra-and-Nagar-Haveli', 'Daman-and-Diu', 'Delhi', 'Goa', 'Gujarat', 'Haryana', 'Himachal-Pradesh', 'Jammu-Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Lakshadweep', 'Madhya-Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Pondicherry', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamilnadu', 'Telangana', 'Tripura', 'Uttar-Pradesh', 'Uttarakhand', 'West-Bengal'],
199
+ label="Select state"
200
+ )
201
+
202
+ # # Select district
203
+ # district = gr.Dropdown(
204
+ # weather_utils.STATES.get(state, {}),
205
+ # label="Select district"
206
+ # )
207
+
208
+ district = gr.Textbox(label="Enter district name", placeholder='Type the district name here')
209
+ district_weather = gr.Textbox(label=f"Weather forecast is:", value=dom.weather_forecast, interactive=False)
210
+ bd_weather = gr.Button("Get weather forecast").style(size='sm')
211
+ bd_weather.click(fn=dom.click_handler_for_get_weather_forecast, inputs=[state, district], outputs=[district_weather])
212
+
213
+
214
+ with gr.Column(scale=1, min_width=600):
215
+ with gr.Tab(label='Weather Forecast Summary'):
216
+ # Get the summary of the weather forecast
217
+ weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:", value=dom.weather_forecast_summary, interactive=False)
218
+ b_weather_forecast_summary = gr.Button("Get Weather Forecast Summary").style(size='sm')
219
+ b_weather_forecast_summary.click(fn=dom.click_handler_for_weather_forecast_summary, inputs=district_weather, outputs=[weather_forecast_summary])
220
+
221
+
222
+ radio.change(fn=dom.select_widget, inputs=radio, outputs=[rowGeneral, rowMandiPrice, rowWeather])
223
+
224
+
225
+ demo.launch(share=True)
226
+ run()
kkms_kssw.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import utils.constants as constants_utils
4
+ import utils.data_loader as data_loader_utils
5
+ import utils.langchain_utils as langchain_utils
6
+ import utils.weather as weather_utils
7
+ import utils.mandi_price as mandi_utils
8
+ import utils.translator as translator_utils
9
+
10
+ from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, GPTListIndex
11
+ from langchain.indexes import VectorstoreIndexCreator
12
+ from langchain.embeddings.openai import OpenAIEmbeddings
13
+ from langchain.vectorstores import FAISS
14
+
15
+ import warnings
16
+ warnings.filterwarnings('ignore')
17
+
18
+
19
+
20
+ class KKMS_KSSW:
21
+ def __init__(self):
22
+ self.index = None
23
+ self.documents = []
24
+ self.response = None
25
+
26
+ # Instantiate langchain_utils class object
27
+ self.langchain_utils_obj = langchain_utils.LANGCHAIN_UTILS()
28
+ # Instantiate Mandi Price utils class object
29
+ self.mandi_utils_obj = mandi_utils.MANDI_PRICE()
30
+ # Instantiate Weather class object
31
+ self.weather_utils_obj = weather_utils.WEATHER()
32
+ # Instantiate translator_utils class object
33
+ self.translator_utils_obj = translator_utils.TRANSLATOR()
34
+
35
+
36
+ # Initialize index (vector store)
37
+ def initialize_index(self, save_index_to_disk=True, index_type='GPTSimpleVectorIndex'):
38
+ # Delete the old file
39
+ if os.path.exists(constants_utils.INDEX_FILENAME):
40
+ # os.system(f'rm -rf {constants_utils.INDEX_FILENAME}')
41
+ print(f'{constants_utils.INDEX_FILENAME} deleted.')
42
+
43
+ if os.path.exists(constants_utils.INDEX_FILENAME):
44
+ # Load the index from the saved index.json file
45
+ print(f'Loading pre-generated index from: {constants_utils.INDEX_FILENAME}')
46
+ self.index = self.langchain_utils_obj.load_index(index_type='GPTSimpleVectorIndex', filepath=constants_utils.INDEX_FILENAME)
47
+ else:
48
+ # Load data from Docs
49
+ if os.path.exists(constants_utils.DATA_PATH):
50
+ doc_documents = SimpleDirectoryReader(constants_utils.DATA_PATH).load_data()
51
+
52
+ # Load data from PDFs only
53
+ # pdf_documents = data_loader_utils.load_document(doc_type='pdf', doc_filepath=doc_filepath)
54
+
55
+ # Load data from URLs & append it to the documents that we read from PDFs
56
+ # url_documents = data_loader_utils.load_document(doc_type='url', urls=urls)
57
+
58
+ # Merge documents of different data sources
59
+ self.documents = doc_documents[:]
60
+ # self.documents.extend(url_documents)
61
+
62
+ # Build the Vector store for docs
63
+ if index_type == 'GPTSimpleVectorIndex':
64
+ self.index = GPTSimpleVectorIndex.from_documents(self.documents)
65
+ elif index_type == 'FAISS':
66
+ self.index = FAISS.from_documents(
67
+ self.documents,
68
+ OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
69
+ )
70
+
71
+
72
+ def merge_documents_from_different_sources(doc_documents, url_documents):
73
+ # Build the Vector store for docs
74
+ doc_index = GPTSimpleVectorIndex.from_documents(doc_documents)
75
+ # Build the Vector store for URLs
76
+ url_index = GPTSimpleVectorIndex.from_documents(url_documents)
77
+
78
+ # Set summary of each index
79
+ doc_index.set_text("index_from_docs")
80
+ url_index.set_text("index_from_urls")
81
+
82
+ # Merge index of different data sources
83
+ self.index = GPTListIndex([doc_index])
84
+ self.index.insert(url_index) # can also be passed directly as GPTListIndex([doc_index, url_index])
85
+
86
+ return self.index
87
+
88
+
89
+ if save_index_to_disk:
90
+ # Save index to a index.json file
91
+ print(f'Saving newly generated index: {constants_utils.INDEX_FILENAME}')
92
+
93
+ if index_type == 'GPTSimpleVectorIndex':
94
+ self.index.save_to_disk(constants_utils.INDEX_FILENAME)
95
+ elif index_type == 'FAISS':
96
+ self.index.save_local(constants_utils.INDEX_FILENAME)
97
+
98
+
99
+
100
+ # Define query on index to retrieve the most relevant top K documents from the vector store
101
+ def query(self,
102
+ question,
103
+ mode='default',
104
+ response_mode="default",
105
+ similarity_top_k=1,
106
+ required_keywords=[],
107
+ exclude_keywords=[],
108
+ verbose=False
109
+ ):
110
+ '''
111
+ Args:
112
+ mode: can be any of [default, embedding]
113
+ response_mode: can be any of [default, compact, tree_summarize]
114
+ '''
115
+
116
+ # Querying the index
117
+ self.response = self.index.query(question,
118
+ mode=mode,
119
+ response_mode=response_mode,
120
+ similarity_top_k=similarity_top_k,
121
+ required_keywords=required_keywords,
122
+ exclude_keywords=exclude_keywords,
123
+ verbose=verbose)
124
+
125
+ return self.response
utils/constants.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA_PATH = './data/crops/'
2
+ OUTPUT_PATH = './output'
3
+ INDEX_FILENAME = f'{OUTPUT_PATH}/index.json'
4
+
5
+ URLS = [
6
+ 'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
7
+ 'https://dmi.gov.in/Documents/organicfaq.pdf',
8
+ 'https://dmi.gov.in/Documents/CAGMOrganic-III.pdf',
9
+ 'https://dmi.gov.in/GradesStandard.aspx',
10
+ 'https://www.india.gov.in/topics/agriculture',
11
+ 'https://www.india.gov.in/farmers-portal',
12
+
13
+ # Pest Management related
14
+ 'https://niphm.gov.in/IPMPackages/Maize.pdf',
15
+
16
+ # Mandi Price related
17
+ 'https://agmarknet.gov.in/',
18
+ 'https://enam.gov.in/web/dashboard/trade-data',
19
+
20
+ # General information related: Information of interests are present on the 2nd level url
21
+ 'https://agricoop.nic.in/#gsc.tab=0',
22
+ 'https://www.manage.gov.in/nf/nf.asp',
23
+
24
+ # Weather forecast related
25
+ 'https://nwp.imd.gov.in/blf/blf_temp/', # need to select state -> district (on the new page) -> displays detailed table -> can get info at the block level as well from the same page on selection
26
+ 'https://nwp.imd.gov.in/blf/blf_temp/dis.php?value=12gujarat', # to get weather forecast for the given state
27
+ 'https://nwp.imd.gov.in/blf/blf_temp/block.php?dis=12BHAVNAGAR', # to get the weather forecast for the given district
28
+ ]
29
+
30
+
31
+ # Supported Indian laguages for translating the English text to Indian language
32
+ INDIC_LANGUAGE = {
33
+ 'Hindi': 'hi',
34
+ 'Gujarati': 'gu',
35
+ 'Kannada': 'kn',
36
+ 'Marathi': 'mr',
37
+ 'Panjabi': 'pa',
38
+ 'Bengali': "bn",
39
+ 'Telugu': 'te',
40
+ 'Tamil': 'ta',
41
+ 'Malayalam': 'ml',
42
+ }
utils/data_loader.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from llama_index import GPTSimpleVectorIndex, download_loader
5
+ from langchain.agents import initialize_agent, Tool
6
+ from langchain.llms import OpenAI
7
+ from langchain.chains.conversation.memory import ConversationBufferMemory
8
+
9
+
10
+
11
+ class DATA_LOADER:
12
+ def __init__(self):
13
+ print()
14
+
15
+
16
+ def clean_df(self, df, dropna=True, fillna=False):
17
+ if fillna:
18
+ df.fillna('', inplace=True)
19
+ if dropna:
20
+ df.dropna(inplace=True)
21
+ # df = df[~df.isna()]
22
+ df = df.drop_duplicates().reset_index(drop=True)
23
+ return df
24
+
25
+
26
+ def load_external_links_used_by_FTAs(self,
27
+ sheet_filepath='./data/urls_used_by_ftas/external_links_used_by_FTAs.xlsx'
28
+ ):
29
+ xls = pd.ExcelFile(sheet_filepath)
30
+ df = pd.DataFrame(columns=['S.No.', 'Link used for', 'Link type', 'Link'])
31
+ for sheet_name in xls.sheet_names:
32
+ sheet = pd.read_excel(xls, sheet_name)
33
+ if sheet.shape[0] > 0:
34
+ df = pd.concat([df, sheet])
35
+ else:
36
+ print(f'{sheet_name} has no content.')
37
+
38
+ df = df[['Link used for', 'Link type', 'Link']]
39
+ # Clean df
40
+ df = clean_df(df)
41
+ print(f'Total links available across all cities: {df.shape[0]}')
42
+ return df
43
+
44
+
45
+ def load_document(self,
46
+ doc_type='pdf',
47
+ doc_filepath='',
48
+ urls=[]
49
+ ):
50
+ documents = []
51
+
52
+ if doc_type == 'pdf':
53
+ PDFReader = download_loader("PDFReader")
54
+ loader = PDFReader()
55
+ if os.path.exists(doc_filepath):
56
+ documents = loader.load_data(file=Path(doc_filepath))
57
+
58
+ elif doc_type == 'url':
59
+ BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
60
+ loader = BeautifulSoupWebReader()
61
+ if len(urls) > 0:
62
+ # Load data from URLs
63
+ documents = loader.load_data(urls=urls)
64
+
65
+ elif doc_type == 'url-kb':
66
+ KnowledgeBaseWebReader = download_loader("KnowledgeBaseWebReader")
67
+ loader = KnowledgeBaseWebReader()
68
+ for url in urls:
69
+ doc = loader.load_data(
70
+ root_url=url,
71
+ link_selectors=['.article-list a', '.article-list a'],
72
+ article_path='/articles',
73
+ body_selector='.article-body',
74
+ title_selector='.article-title',
75
+ subtitle_selector='.article-subtitle',
76
+ )
77
+ documents.extend(doc)
78
+
79
+ elif doc_type == 'url-chatgpt':
80
+ BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
81
+ loader = BeautifulSoupWebReader()
82
+ if len(urls) > 0:
83
+ # Load data from URLs
84
+ documents = loader.load_data(urls=urls)
85
+ # Build the Vector database
86
+ index = GPTSimpleVectorIndex(documents)
87
+ tools = [
88
+ Tool(
89
+ name="Website Index",
90
+ func=lambda q: index.query(q),
91
+ description=f"Useful when you want answer questions about the text retrieved from websites.",
92
+ ),
93
+ ]
94
+
95
+ # Call ChatGPT API
96
+ llm = OpenAI(temperature=0) # Keep temperature=0 to search from the given urls only
97
+ memory = ConversationBufferMemory(memory_key="chat_history")
98
+ agent_chain = initialize_agent(
99
+ tools, llm, agent="zero-shot-react-description", memory=memory
100
+ )
101
+
102
+ output = agent_chain.run(input="What language is on this website?")
103
+
104
+ return documents
utils/langchain_utils.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.llms import OpenAI
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain.chains.summarize import load_summarize_chain
4
+ from langchain.docstore.document import Document
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
9
+ from langchain.prompts import PromptTemplate
10
+ from llama_index import GPTSimpleVectorIndex
11
+ from langchain.vectorstores import FAISS
12
+
13
+ import pickle
14
+ import os
15
+ os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
16
+ os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
17
+
18
+
19
+
20
+ class LANGCHAIN_UTILS:
21
+ def __init__(self):
22
+ print()
23
+
24
+
25
+ def generate_prompt_template(self, prompt_type='general'):
26
+ prompt_template = ''
27
+
28
+ if prompt_type == 'general':
29
+ prompt_template = """Write a concise summary of the following:
30
+
31
+ {text}
32
+
33
+ CONCISE SUMMARY IN ENGLISH:"""
34
+
35
+ elif prompt_type == 'weather':
36
+ prompt_template = """
37
+ What would be the weather based on the below data:
38
+ {text}
39
+ """
40
+
41
+ return prompt_template
42
+
43
+
44
+
45
+ def get_textual_summary(self,
46
+ text,
47
+ chain_type="stuff",
48
+ custom_prompt=True,
49
+ prompt_type='general'
50
+ ):
51
+ texts = [text]
52
+ docs = [Document(page_content=t) for t in texts[:3]]
53
+
54
+ llm = OpenAI(temperature=0)
55
+ if custom_prompt:
56
+ prompt_template = self.generate_prompt_template(prompt_type)
57
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
58
+ chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
59
+ else:
60
+ chain = load_summarize_chain(llm, chain_type=chain_type)
61
+
62
+ text_summary = chain.run(docs)
63
+ return text_summary
64
+
65
+
66
+ def get_weather_forecast_summary(self,
67
+ text,
68
+ chain_type="stuff"
69
+ ):
70
+ text = f"""
71
+ What would be the weather based on the below data:
72
+ {text}
73
+
74
+ Give simple response without technical numbers which can be explained to human.
75
+ """
76
+ texts = [text]
77
+ docs = [Document(page_content=t) for t in texts[:3]]
78
+
79
+ llm = OpenAI(temperature=0)
80
+ chain = load_summarize_chain(llm, chain_type=chain_type)
81
+ text_summary = chain.run(docs)
82
+
83
+ return text_summary
84
+
85
+
86
+ def get_answer_from_para(self,
87
+ para,
88
+ question,
89
+ chain_type="stuff",
90
+ custom_prompt=True
91
+ ):
92
+ # Prepare data (Split paragraph into chunks of small documents)
93
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
94
+ texts = text_splitter.split_text(para)
95
+
96
+ # Find similar docs that are relevant to the question
97
+ embeddings = OpenAIEmbeddings()
98
+ docsearch = Chroma.from_texts(
99
+ texts, embeddings,
100
+ metadatas=[{"source": str(i)} for i in range(len(texts))]
101
+ )
102
+
103
+ # Search for the similar docs
104
+ docs = docsearch.similarity_search(question, k=1)
105
+
106
+ llm = OpenAI(temperature=0)
107
+ # Create a Chain for question answering
108
+ if custom_prompt:
109
+ prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
110
+
111
+ {context}
112
+
113
+ Question: {question}
114
+ Answer in English:"""
115
+
116
+ PROMPT = PromptTemplate(
117
+ template=prompt_template, input_variables=["context", "question"]
118
+ )
119
+ chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
120
+ else:
121
+ # chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
122
+ chain = load_qa_chain(llm, chain_type=chain_type)
123
+ # chain.run(input_documents=docs, question=question)
124
+
125
+ out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
126
+ return out_dict['output_text']
127
+
128
+
129
+ def store_index(self,
130
+ index,
131
+ index_type='GPTSimpleVectorIndex',
132
+ filepath='./output/index.json'
133
+ ):
134
+ if index_type == 'GPTSimpleVectorIndex':
135
+ index.save_to_disk(filepath)
136
+
137
+ elif index_type == 'pickle':
138
+ with open(filepath, "wb") as f:
139
+ pickle.dump(index, f)
140
+
141
+ elif index_type == 'FAISS':
142
+ index.save_local(filepath)
143
+
144
+
145
+ def load_index(self,
146
+ index_type='GPTSimpleVectorIndex',
147
+ filepath='./output/index.json'
148
+ ):
149
+ if index_type == 'GPTSimpleVectorIndex':
150
+ index = GPTSimpleVectorIndex.load_from_disk(filepath)
151
+
152
+ elif index_type == 'pickle':
153
+ with open(filepath, "rb") as f:
154
+ index = pickle.load(f)
155
+
156
+ elif index_type == 'FAISS':
157
+ index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings?
158
+
159
+ return index
160
+
161
+
162
+ def convert_text_to_documents(self, text_list=[]):
163
+ """
164
+ Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
165
+ """
166
+
167
+ from llama_index import Document
168
+ documents = [Document(t) for t in text_list]
169
+ return documents
utils/mandi_price.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+
4
+ class MANDI_PRICE:
5
+ def __init__(self):
6
+ self.base_url = "https://enam.gov.in/web/Ajax_ctrl/trade_data_list"
7
+ # "https://enam.gov.in/web/dashboard/trade-data",
8
+ # "https://enam.gov.in/web/dashboard/trade_data_list",
9
+
10
+
11
+ def get_mandi_price(self,
12
+ state_name,
13
+ apmc_name,
14
+ commodity_name,
15
+ from_date,
16
+ to_date
17
+ ):
18
+ # Prepare the payload for POST request
19
+ payload = f"language=en&stateName={state_name}&apmcName={apmc_name}&commodityName={commodity_name}&fromDate={from_date}&toDate={to_date}"
20
+
21
+ headers = {
22
+ "Content-type": "application/x-www-form-urlencoded; charset=UTF-8",
23
+ "Referer": "https://enam.gov.in/web/dashboard/trade-data",
24
+ "Accept": "application/json, text/javascript, */*; q=0.01",
25
+ }
26
+
27
+ response = requests.post(
28
+ self.base_url,
29
+ json=payload,
30
+ headers=headers,
31
+ )
32
+
33
+ return response.json()
utils/translator.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import utils.constants as constants_utils
2
+ import requests
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ from mosestokenizer import *
5
+ from indicnlp.tokenize import sentence_tokenize
6
+ from googletrans import Translator, constants
7
+
8
+
9
+ class TRANSLATOR:
10
+ def __init__(self):
11
+ print()
12
+
13
+
14
+ def split_sentences(self, paragraph, language):
15
+ if language == "en":
16
+ with MosesSentenceSplitter(language) as splitter:
17
+ return splitter([paragraph])
18
+ elif language in constants_utils.INDIC_LANGUAGE:
19
+ return sentence_tokenize.sentence_split(paragraph, lang=language)
20
+
21
+
22
+ def get_in_hindi(self, payload):
23
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
24
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
25
+ article = self.split_sentences(payload['inputs'], 'en')
26
+ # inputs = tokenizer(payload['input'], return_tensors="pt")
27
+ out_text = ""
28
+ for a in article:
29
+ inputs = tokenizer(a, return_tensors="pt")
30
+ translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["hin_Deva"], max_length=100)
31
+ translated_sent = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
32
+ out_text = out_text.join(translated_sent)
33
+ return out_text
34
+
35
+
36
+ def get_in_indic(self, text, language='Hindi'):
37
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
38
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
39
+ inputs = tokenizer(text, return_tensors="pt")
40
+
41
+ code = "eng_Latn"
42
+ if language == 'Hindi':
43
+ code= "hin_Deva"
44
+ elif language == 'Marathi':
45
+ code = "mar_Deva"
46
+
47
+ translated_tokens = model.generate(
48
+ **inputs,
49
+ forced_bos_token_id=tokenizer.lang_code_to_id[code],
50
+ max_length=1000
51
+ )
52
+
53
+ out_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
54
+ return out_text
55
+
56
+
57
+ def get_indic_google_translate(self, text, language='Hindi'):
58
+ # Init the Google API translator
59
+ translator = Translator()
60
+ translations = translator.translate(text, dest=constants_utils.INDIC_LANGUAGE.get(language, 'en'))
61
+ return str(translations.text)
utils/weather.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup as bs
3
+
4
+
5
+ STATE_CODES = {
6
+ 'Andaman-Nicobar': '01',
7
+ 'Andhra-Pradesh': '02',
8
+ 'Arunachal-Pradesh': '03',
9
+ 'Assam': '04',
10
+ 'Bihar': '05',
11
+ 'Chandigarh': '06',
12
+ 'Chhattisgarh': '07',
13
+ 'Dadra-and-Nagar-Haveli': '08',
14
+ 'Daman-and-Diu': '09',
15
+ 'Delhi': '10',
16
+ 'Goa': '11',
17
+ 'Gujarat': '12',
18
+ 'Haryana': '13',
19
+ # 14
20
+ 'Himachal-Pradesh': '15',
21
+ 'Jammu-Kashmir': '16',
22
+ 'Jharkhand': '17',
23
+ 'Karnataka': '18',
24
+ 'Kerala': '19',
25
+ 'Lakshadweep': '20',
26
+ 'Madhya-Pradesh': '21',
27
+ 'Maharashtra': '22',
28
+ 'Manipur': '23',
29
+ 'Meghalaya': '24',
30
+ 'Mizoram': '25',
31
+ 'Nagaland': '26',
32
+ 'Odisha': '27',
33
+ 'Pondicherry': '28',
34
+ 'Punjab': '29',
35
+ 'Rajasthan': '30',
36
+ 'Sikkim': '31',
37
+ 'Tamilnadu': '32',
38
+ 'Telangana': '33',
39
+ 'Tripura': '34',
40
+ 'Uttar-Pradesh': '35',
41
+ 'Uttarakhand': '36',
42
+ 'West-Bengal': '37',
43
+ }
44
+
45
+ # List of states that are given as the input selection to https://nwp.imd.gov.in/blf/blf_temp/ to get the weather forecast
46
+ STATES = {
47
+ 'Andaman-Nicobar': {},
48
+
49
+ 'Andhra-Pradesh': {},
50
+
51
+ 'Arunachal-Pradesh': {},
52
+
53
+ 'Assam': {},
54
+
55
+ 'Bihar': {},
56
+
57
+ 'Chandigarh': {},
58
+
59
+ 'Chhattisgarh': {},
60
+
61
+ 'Dadra-and-Nagar-Haveli': {},
62
+
63
+ 'Daman-and-Diu': {},
64
+
65
+ 'Delhi': {
66
+ 'CENTRAL-DELHI': ['CENTRAL-DELHI'],
67
+ 'EAST-DELHI': ['EAST-DELHI'],
68
+ 'NEW-DELHI': ['NEW-DELHI'],
69
+ 'NORTH-DELHI': ['NORTH-DELHI'],
70
+ 'NORTH-EAST-DELHI': ['NORTH-EAST-DELHI'],
71
+ 'NORTH-WEST-DELHI': ['NORTH-WEST-DELHI'],
72
+ 'SHAHDARA': ['SHAHDARA'],
73
+ 'SOUTH-DELHI': ['SOUTH-DELHI'],
74
+ 'SOUTH-EAST-DELHI': ['SOUTH-EAST-DELHI'],
75
+ 'SOUTH-WEST-DELHI': ['SOUTH-WEST-DELHI'],
76
+ 'WEST-DELHI': ['WEST-DELHI'],
77
+ },
78
+
79
+ 'Goa': {},
80
+
81
+ 'Gujarat': {
82
+ 'AHMADABAD': ['AHMEDABAD-CITY', 'BAVLA', 'DASKROI', 'DETROJ-RAMPURA', 'DHANDHUKA', 'DHOLERA', 'DHOLKA', 'MANDAL', 'SANAND', 'VIRAMGAM'],
83
+ 'AMRELI': ['AMRELI', 'BABRA', 'BAGASARA', 'DHARI', 'JAFRABAD', 'KHAMBHA', 'KUNKAVAV-VADIA', 'LATHI', 'LILIA', 'RAJULA', 'SAVERKUNDLA'],
84
+ 'ANAND': [],
85
+ 'ARVALLI': [],
86
+ 'BANASKANTHA': [],
87
+ 'BHARUCH': [],
88
+ 'BHAVNAGAR': [],
89
+ 'BOTAD': [],
90
+ 'CHHOTAUDEPUR': [],
91
+ 'DANG': [],
92
+ 'DEVBHUMI-DWARKA': [],
93
+ 'DOHAD': [],
94
+ 'GANDHINAGAR': [],
95
+ 'GIR-SOMNATH': [],
96
+ 'JAMNAGAR': [],
97
+ 'JUNAGADH': [],
98
+ 'KACHCHH': [],
99
+ 'KHEDA': [],
100
+ 'MAHESANA': [],
101
+ 'MAHISAGAR': [],
102
+ 'MORBI': [],
103
+ 'NARMADA': [],
104
+ 'NAVSARI': [],
105
+ 'PANCH-MAHALS': [],
106
+ 'PATAN': [],
107
+ 'PORBANDAR': [],
108
+ 'RAJKOT': [],
109
+ 'SABAR-KANTHA': [],
110
+ 'SURAT': ['BARDOLI', 'CHORASI', 'KAMREJ', 'MAHUVA', 'MANDVI', 'MANGROL', 'OLPAD', 'PALSANA', 'SURAT-CITY', 'UMARPADA'],
111
+ 'SURENDRANAGAR': [],
112
+ 'TAPI': [],
113
+ 'VADODARA': [],
114
+ 'VALSAD': [],
115
+ },
116
+
117
+ 'Haryana': {},
118
+
119
+ 'Himachal-Pradesh': {},
120
+
121
+ 'Jammu-Kashmir': {},
122
+
123
+ 'Jharkhand': {},
124
+
125
+ 'Karnataka': {},
126
+
127
+ 'Kerala': {},
128
+
129
+ 'Lakshadweep': {},
130
+
131
+ 'Madhya-Pradesh': {},
132
+
133
+ 'Maharashtra': {},
134
+
135
+ 'Manipur': {},
136
+
137
+ 'Meghalaya': {},
138
+
139
+ 'Mizoram': {},
140
+
141
+ 'Nagaland': {},
142
+
143
+ 'Odisha': {},
144
+
145
+ 'Pondicherry': {},
146
+
147
+ 'Punjab': {},
148
+
149
+ 'Rajasthan': {},
150
+
151
+ 'Sikkim': {},
152
+
153
+ 'Tamilnadu': {},
154
+
155
+ 'Telangana': {},
156
+
157
+ 'Tripura': {},
158
+
159
+ 'Uttar-Pradesh': {},
160
+
161
+ 'Uttarakhand': {},
162
+
163
+ 'West-Bengal': {},
164
+ }
165
+
166
+
167
+
168
+ class WEATHER:
169
+ def __init__(self):
170
+ self.base_url = 'https://nwp.imd.gov.in/blf/blf_temp'
171
+
172
+
173
+ # Weather forecast from Govt. website
174
+ def get_weather_forecast(self, state, district, is_block_level=False):
175
+ self.district_url = f"{self.base_url}/block.php?dis={STATE_CODES.get(state, '') + district}"
176
+ self.block_url = f'{self.base_url}/table2.php'
177
+
178
+ response = requests.get(self.district_url if not is_block_level else self.block_url)
179
+ soup = bs(response.text, 'html.parser')
180
+ scripts = soup.findAll('font')[0]
181
+ return scripts.text
182
+
183
+
184
+ # Weather using Google weather API
185
+ def get_weather(self, city):
186
+ city = city + " weather"
187
+ city = city.replace(" ", "+")
188
+
189
+ headers = {
190
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
191
+ }
192
+ response = requests.get(
193
+ f'https://www.google.com/search?q={city}&oq={city}&aqs=chrome.0.35i39l2j0l4j46j69i60.6128j1j7&sourceid=chrome&ie=UTF-8', headers=headers)
194
+
195
+ soup = bs(response.text, 'html.parser')
196
+ location = soup.select('#wob_loc')[0].getText().strip()
197
+ time = soup.select('#wob_dts')[0].getText().strip()
198
+ info = soup.select('#wob_dc')[0].getText().strip()
199
+ temperature = soup.select('#wob_tm')[0].getText().strip()
200
+ temperature = temperature + "°C"
201
+
202
+ return time, info, temperature