Spaces:
Runtime error
Runtime error
Chintan Donda
commited on
Commit
•
a447435
1
Parent(s):
e52b65f
Application file, KKMS repo and utils files
Browse files- app.py +226 -0
- kkms_kssw.py +125 -0
- utils/constants.py +42 -0
- utils/data_loader.py +104 -0
- utils/langchain_utils.py +169 -0
- utils/mandi_price.py +33 -0
- utils/translator.py +61 -0
- utils/weather.py +202 -0
app.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
import datetime
|
5 |
+
import kkms_kssw as kkms_kssw
|
6 |
+
import utils.constants as constants_utils
|
7 |
+
|
8 |
+
|
9 |
+
# Create and launch Gradio Web UI
|
10 |
+
class DomState:
|
11 |
+
def __init__(self):
|
12 |
+
self.relevant_paragraphs = ''
|
13 |
+
self.answer = ''
|
14 |
+
self.summary = ''
|
15 |
+
self.mandi_price = ''
|
16 |
+
self.mandi_from_date = (datetime.datetime.now() - datetime.timedelta(days=2)).strftime('%Y-%m-%d')
|
17 |
+
self.mandi_to_date = datetime.datetime.now().strftime('%Y-%m-%d')
|
18 |
+
self.weather_info = ''
|
19 |
+
self.weather_forecast = ''
|
20 |
+
self.weather_forecast_summary = ''
|
21 |
+
self.indic_lang_answer = ''
|
22 |
+
|
23 |
+
|
24 |
+
# Initialize index (vector store)
|
25 |
+
self.kkms_kssw_obj = kkms_kssw.KKMS_KSSW()
|
26 |
+
self.kkms_kssw_obj.initialize_index(constants_utils.INDEX_FILENAME, index_type='GPTSimpleVectorIndex')
|
27 |
+
|
28 |
+
|
29 |
+
def click_handler_for_get_relevant_paragraphs(self,
|
30 |
+
question,
|
31 |
+
mode='default',
|
32 |
+
response_mode='default',
|
33 |
+
similarity_top_k=2,
|
34 |
+
):
|
35 |
+
self.relevant_paragraphs = self.kkms_kssw_obj.query(question,
|
36 |
+
mode=mode,
|
37 |
+
response_mode=response_mode,
|
38 |
+
similarity_top_k=similarity_top_k,
|
39 |
+
# required_keywords=required_keywords_list,
|
40 |
+
# exclude_keywords=exclude_keywords_list,
|
41 |
+
)
|
42 |
+
return self.relevant_paragraphs
|
43 |
+
|
44 |
+
|
45 |
+
def click_handler_for_summary(self, answer):
|
46 |
+
self.sumamry = self.kkms_kssw_obj.langchain_utils_obj.get_textual_summary(answer)
|
47 |
+
return self.sumamry
|
48 |
+
|
49 |
+
|
50 |
+
def click_handler_for_get_answer(self,
|
51 |
+
relevant_paragraphs,
|
52 |
+
question
|
53 |
+
):
|
54 |
+
self.answer = self.kkms_kssw_obj.langchain_utils_obj.get_answer_from_para(relevant_paragraphs, question)
|
55 |
+
return self.answer
|
56 |
+
|
57 |
+
|
58 |
+
def click_handler_for_mandi_price(self,
|
59 |
+
state_name,
|
60 |
+
apmc_name,
|
61 |
+
commodity_name,
|
62 |
+
from_date,
|
63 |
+
to_date
|
64 |
+
):
|
65 |
+
if state_name and apmc_name and commodity_name and from_date and to_date:
|
66 |
+
self.mandi_price = self.kkms_kssw_obj.mandi_utils_obj.get_mandi_price(state_name, apmc_name, commodity_name, from_date, to_date)
|
67 |
+
return self.mandi_price
|
68 |
+
|
69 |
+
|
70 |
+
def click_handler_for_get_weather(self, city):
|
71 |
+
time, info, temperature = self.kkms_kssw_obj.weather_utils_obj.get_weather(city)
|
72 |
+
self.weather_info = f'Weather in {city.capitalize()} on {time} is {temperature} with {info}.'
|
73 |
+
return self.weather_info
|
74 |
+
|
75 |
+
|
76 |
+
def click_handler_for_get_weather_forecast(self, state, district):
|
77 |
+
self.weather_forecast = self.kkms_kssw_obj.weather_utils_obj.get_weather_forecast(state, district)
|
78 |
+
return self.weather_forecast
|
79 |
+
|
80 |
+
|
81 |
+
def click_handler_for_weather_forecast_summary(self, weather_forecast):
|
82 |
+
self.weather_forecast_summary = self.kkms_kssw_obj.langchain_utils_obj.get_weather_forecast_summary(weather_forecast)
|
83 |
+
return self.weather_forecast_summary
|
84 |
+
|
85 |
+
|
86 |
+
def click_handler_for_get_indic_answer(self, eng_ans, language='Hindi'):
|
87 |
+
self.indic_lang_answer = self.kkms_kssw_obj.translator_utils_obj.get_indic_google_translate(eng_ans, language)
|
88 |
+
return self.indic_lang_answer
|
89 |
+
|
90 |
+
|
91 |
+
def select_widget(self, choice):
|
92 |
+
if choice == "General":
|
93 |
+
return [
|
94 |
+
gr.update(visible=True),
|
95 |
+
gr.update(visible=False),
|
96 |
+
gr.update(visible=False)
|
97 |
+
]
|
98 |
+
|
99 |
+
elif choice == "Mandi Price":
|
100 |
+
return [
|
101 |
+
gr.update(visible=False),
|
102 |
+
gr.update(visible=True),
|
103 |
+
gr.update(visible=False)
|
104 |
+
]
|
105 |
+
|
106 |
+
elif choice == "Weather":
|
107 |
+
return [
|
108 |
+
gr.update(visible=False),
|
109 |
+
gr.update(visible=False),
|
110 |
+
gr.update(visible=True)
|
111 |
+
]
|
112 |
+
|
113 |
+
else:
|
114 |
+
return gr.update(visible=False)
|
115 |
+
|
116 |
+
|
117 |
+
with gr.Blocks(title='KKMS-KSSW Demo') as demo:
|
118 |
+
dom = DomState()
|
119 |
+
|
120 |
+
radio = gr.Radio(
|
121 |
+
["General", "Mandi Price", "Weather"], label="Query related to"
|
122 |
+
)
|
123 |
+
|
124 |
+
########################### Widget for Govt. Policy #################################################
|
125 |
+
with gr.Row(visible=True) as rowGeneral:
|
126 |
+
with gr.Column(scale=1, min_width=600):
|
127 |
+
with gr.Tab(label='Relevant paragraphs'):
|
128 |
+
question = gr.Textbox(label="Enter your question", placeholder='Type the question here')
|
129 |
+
# Get the Relevant paragraphs for the question asked
|
130 |
+
relevant_paragraphs = gr.Textbox(label="Relevant paragraphs are:", value=dom.relevant_paragraphs, interactive=False)
|
131 |
+
b_relevant_paragraphs = gr.Button("Get Relevant paragraphs").style(size='sm')
|
132 |
+
b_relevant_paragraphs.click(fn=dom.click_handler_for_get_relevant_paragraphs, inputs=question, outputs=[relevant_paragraphs])
|
133 |
+
|
134 |
+
with gr.Column(scale=1, min_width=600):
|
135 |
+
with gr.Tab(label='Extractive Summary'):
|
136 |
+
# Get the extractive text summary from the retrieved Relevant paragraphs
|
137 |
+
summary = gr.Textbox(label="Extractive Summary is:", value=dom.summary, interactive=False)
|
138 |
+
b_summary = gr.Button("Extract Summary").style(size='sm')
|
139 |
+
b_summary.click(fn=dom.click_handler_for_summary, inputs=relevant_paragraphs, outputs=[summary])
|
140 |
+
|
141 |
+
# Get the exact answer for the question asked from the retrieved Relevant paragraphs
|
142 |
+
with gr.Row():
|
143 |
+
with gr.Column(scale=1, min_width=600):
|
144 |
+
with gr.Tab(label='Answer'):
|
145 |
+
answer = gr.Textbox(label="Answer is:", value=dom.answer, interactive=False)
|
146 |
+
b_answer = gr.Button("Get Answer").style(size='sm')
|
147 |
+
b_answer.click(fn=dom.click_handler_for_get_answer, inputs=[relevant_paragraphs, question], outputs=[answer])
|
148 |
+
|
149 |
+
# Covert the answer to Indian language
|
150 |
+
with gr.Row():
|
151 |
+
with gr.Column(scale=1, min_width=600):
|
152 |
+
with gr.Tab(label='Answer in selected language'):
|
153 |
+
# Select the language
|
154 |
+
language = gr.Dropdown(
|
155 |
+
['English', 'Hindi', 'Gujarati', 'Marathi', 'Kannada', 'Bengali', 'Panjabi', 'Telugu', 'Tamil', 'Malayalam'],
|
156 |
+
label="Select language")
|
157 |
+
indic_lang_answer = gr.Textbox(label="Answer in the selected language is:", value=dom.indic_lang_answer, interactive=False)
|
158 |
+
b_indic_lang_answer = gr.Button("Get answer in selected language").style(size='sm')
|
159 |
+
b_indic_lang_answer.click(fn=dom.click_handler_for_get_indic_answer, inputs=[answer, language], outputs=[indic_lang_answer])
|
160 |
+
|
161 |
+
|
162 |
+
########################## Widget for Mandi Price ###################################################
|
163 |
+
with gr.Row(visible=False) as rowMandiPrice:
|
164 |
+
with gr.Column(scale=1, min_width=600):
|
165 |
+
# Select State
|
166 |
+
state_name = gr.Dropdown(['ANDAMAN AND NICOBAR ISLANDS', 'ANDHRA PRADESH', 'ASSAM', 'BIHAR', 'CHANDIGARH', 'CHHATTISGARH', 'GOA', 'GUJARAT', 'HARYANA', 'HIMACHAL PRADESH', 'JAMMU AND KASHMIR', 'JHARKHAND', 'KARNATAKA', 'KERALA', 'MADHYA PRADESH', 'MAHARASHTRA', 'NAGALAND', 'ODISHA', 'PUDUCHERRY', 'PUNJAB', 'RAJASTHAN', 'TAMIL NADU', 'TELANGANA', 'TRIPURA', 'UTTAR PRADESH', 'UTTARAKHAND', 'WEST BENGAL'], label="Select state")
|
167 |
+
|
168 |
+
# APMC name
|
169 |
+
apmc_name = gr.Textbox(label="Enter APMC name", placeholder='Type the APMC name here')
|
170 |
+
|
171 |
+
# APMC name
|
172 |
+
commodity_name = gr.Textbox(label="Enter Commodity name", placeholder='Type the Commodity name here')
|
173 |
+
|
174 |
+
# From/To date in yyyy-mm-dd format
|
175 |
+
from_date = gr.Textbox(label="From date?", value=dom.mandi_from_date, placeholder='Please enter the From date here in yyyy-mm-dd format')
|
176 |
+
to_date = gr.Textbox(label="To date?", value=dom.mandi_to_date, placeholder='Please enter the To date here in yyyy-mm-dd format')
|
177 |
+
|
178 |
+
with gr.Column(scale=1, min_width=600):
|
179 |
+
mandi_price = gr.Textbox(label=f"Mandi Price is:", value=dom.mandi_price, interactive=False)
|
180 |
+
b_summary = gr.Button("Get Mandi Price").style(size='sm')
|
181 |
+
b_summary.click(fn=dom.click_handler_for_mandi_price, inputs=[state_name, apmc_name, commodity_name, from_date, to_date], outputs=[mandi_price])
|
182 |
+
|
183 |
+
|
184 |
+
########################## Widget for Weather Info ###################################################
|
185 |
+
with gr.Row(visible=False) as rowWeather:
|
186 |
+
with gr.Column(scale=1, min_width=600):
|
187 |
+
with gr.Tab(label='Weather Info'):
|
188 |
+
city = gr.Textbox(label="Enter city name", placeholder='Type the city name here')
|
189 |
+
weather = gr.Textbox(label=f"Current weather is:", value=dom.weather_info, interactive=False)
|
190 |
+
b_weather = gr.Button("Get weather info").style(size='sm')
|
191 |
+
b_weather.click(fn=dom.click_handler_for_get_weather, inputs=city, outputs=[weather])
|
192 |
+
|
193 |
+
########### Weather Forecast ###########
|
194 |
+
with gr.Column(scale=1, min_width=600):
|
195 |
+
with gr.Tab(label='Weather Forecast for next 5 days'):
|
196 |
+
# Select the State
|
197 |
+
state = gr.Dropdown(
|
198 |
+
['Andaman-Nicobar', 'Andhra-Pradesh', 'Arunachal-Pradesh', 'Assam', 'Bihar', 'Chandigarh', 'Chhattisgarh', 'Dadra-and-Nagar-Haveli', 'Daman-and-Diu', 'Delhi', 'Goa', 'Gujarat', 'Haryana', 'Himachal-Pradesh', 'Jammu-Kashmir', 'Jharkhand', 'Karnataka', 'Kerala', 'Lakshadweep', 'Madhya-Pradesh', 'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland', 'Odisha', 'Pondicherry', 'Punjab', 'Rajasthan', 'Sikkim', 'Tamilnadu', 'Telangana', 'Tripura', 'Uttar-Pradesh', 'Uttarakhand', 'West-Bengal'],
|
199 |
+
label="Select state"
|
200 |
+
)
|
201 |
+
|
202 |
+
# # Select district
|
203 |
+
# district = gr.Dropdown(
|
204 |
+
# weather_utils.STATES.get(state, {}),
|
205 |
+
# label="Select district"
|
206 |
+
# )
|
207 |
+
|
208 |
+
district = gr.Textbox(label="Enter district name", placeholder='Type the district name here')
|
209 |
+
district_weather = gr.Textbox(label=f"Weather forecast is:", value=dom.weather_forecast, interactive=False)
|
210 |
+
bd_weather = gr.Button("Get weather forecast").style(size='sm')
|
211 |
+
bd_weather.click(fn=dom.click_handler_for_get_weather_forecast, inputs=[state, district], outputs=[district_weather])
|
212 |
+
|
213 |
+
|
214 |
+
with gr.Column(scale=1, min_width=600):
|
215 |
+
with gr.Tab(label='Weather Forecast Summary'):
|
216 |
+
# Get the summary of the weather forecast
|
217 |
+
weather_forecast_summary = gr.Textbox(label="Weather Forecast Summary is:", value=dom.weather_forecast_summary, interactive=False)
|
218 |
+
b_weather_forecast_summary = gr.Button("Get Weather Forecast Summary").style(size='sm')
|
219 |
+
b_weather_forecast_summary.click(fn=dom.click_handler_for_weather_forecast_summary, inputs=district_weather, outputs=[weather_forecast_summary])
|
220 |
+
|
221 |
+
|
222 |
+
radio.change(fn=dom.select_widget, inputs=radio, outputs=[rowGeneral, rowMandiPrice, rowWeather])
|
223 |
+
|
224 |
+
|
225 |
+
demo.launch(share=True)
|
226 |
+
run()
|
kkms_kssw.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import utils.constants as constants_utils
|
4 |
+
import utils.data_loader as data_loader_utils
|
5 |
+
import utils.langchain_utils as langchain_utils
|
6 |
+
import utils.weather as weather_utils
|
7 |
+
import utils.mandi_price as mandi_utils
|
8 |
+
import utils.translator as translator_utils
|
9 |
+
|
10 |
+
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, GPTListIndex
|
11 |
+
from langchain.indexes import VectorstoreIndexCreator
|
12 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
13 |
+
from langchain.vectorstores import FAISS
|
14 |
+
|
15 |
+
import warnings
|
16 |
+
warnings.filterwarnings('ignore')
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
class KKMS_KSSW:
|
21 |
+
def __init__(self):
|
22 |
+
self.index = None
|
23 |
+
self.documents = []
|
24 |
+
self.response = None
|
25 |
+
|
26 |
+
# Instantiate langchain_utils class object
|
27 |
+
self.langchain_utils_obj = langchain_utils.LANGCHAIN_UTILS()
|
28 |
+
# Instantiate Mandi Price utils class object
|
29 |
+
self.mandi_utils_obj = mandi_utils.MANDI_PRICE()
|
30 |
+
# Instantiate Weather class object
|
31 |
+
self.weather_utils_obj = weather_utils.WEATHER()
|
32 |
+
# Instantiate translator_utils class object
|
33 |
+
self.translator_utils_obj = translator_utils.TRANSLATOR()
|
34 |
+
|
35 |
+
|
36 |
+
# Initialize index (vector store)
|
37 |
+
def initialize_index(self, save_index_to_disk=True, index_type='GPTSimpleVectorIndex'):
|
38 |
+
# Delete the old file
|
39 |
+
if os.path.exists(constants_utils.INDEX_FILENAME):
|
40 |
+
# os.system(f'rm -rf {constants_utils.INDEX_FILENAME}')
|
41 |
+
print(f'{constants_utils.INDEX_FILENAME} deleted.')
|
42 |
+
|
43 |
+
if os.path.exists(constants_utils.INDEX_FILENAME):
|
44 |
+
# Load the index from the saved index.json file
|
45 |
+
print(f'Loading pre-generated index from: {constants_utils.INDEX_FILENAME}')
|
46 |
+
self.index = self.langchain_utils_obj.load_index(index_type='GPTSimpleVectorIndex', filepath=constants_utils.INDEX_FILENAME)
|
47 |
+
else:
|
48 |
+
# Load data from Docs
|
49 |
+
if os.path.exists(constants_utils.DATA_PATH):
|
50 |
+
doc_documents = SimpleDirectoryReader(constants_utils.DATA_PATH).load_data()
|
51 |
+
|
52 |
+
# Load data from PDFs only
|
53 |
+
# pdf_documents = data_loader_utils.load_document(doc_type='pdf', doc_filepath=doc_filepath)
|
54 |
+
|
55 |
+
# Load data from URLs & append it to the documents that we read from PDFs
|
56 |
+
# url_documents = data_loader_utils.load_document(doc_type='url', urls=urls)
|
57 |
+
|
58 |
+
# Merge documents of different data sources
|
59 |
+
self.documents = doc_documents[:]
|
60 |
+
# self.documents.extend(url_documents)
|
61 |
+
|
62 |
+
# Build the Vector store for docs
|
63 |
+
if index_type == 'GPTSimpleVectorIndex':
|
64 |
+
self.index = GPTSimpleVectorIndex.from_documents(self.documents)
|
65 |
+
elif index_type == 'FAISS':
|
66 |
+
self.index = FAISS.from_documents(
|
67 |
+
self.documents,
|
68 |
+
OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
|
69 |
+
)
|
70 |
+
|
71 |
+
|
72 |
+
def merge_documents_from_different_sources(doc_documents, url_documents):
|
73 |
+
# Build the Vector store for docs
|
74 |
+
doc_index = GPTSimpleVectorIndex.from_documents(doc_documents)
|
75 |
+
# Build the Vector store for URLs
|
76 |
+
url_index = GPTSimpleVectorIndex.from_documents(url_documents)
|
77 |
+
|
78 |
+
# Set summary of each index
|
79 |
+
doc_index.set_text("index_from_docs")
|
80 |
+
url_index.set_text("index_from_urls")
|
81 |
+
|
82 |
+
# Merge index of different data sources
|
83 |
+
self.index = GPTListIndex([doc_index])
|
84 |
+
self.index.insert(url_index) # can also be passed directly as GPTListIndex([doc_index, url_index])
|
85 |
+
|
86 |
+
return self.index
|
87 |
+
|
88 |
+
|
89 |
+
if save_index_to_disk:
|
90 |
+
# Save index to a index.json file
|
91 |
+
print(f'Saving newly generated index: {constants_utils.INDEX_FILENAME}')
|
92 |
+
|
93 |
+
if index_type == 'GPTSimpleVectorIndex':
|
94 |
+
self.index.save_to_disk(constants_utils.INDEX_FILENAME)
|
95 |
+
elif index_type == 'FAISS':
|
96 |
+
self.index.save_local(constants_utils.INDEX_FILENAME)
|
97 |
+
|
98 |
+
|
99 |
+
|
100 |
+
# Define query on index to retrieve the most relevant top K documents from the vector store
|
101 |
+
def query(self,
|
102 |
+
question,
|
103 |
+
mode='default',
|
104 |
+
response_mode="default",
|
105 |
+
similarity_top_k=1,
|
106 |
+
required_keywords=[],
|
107 |
+
exclude_keywords=[],
|
108 |
+
verbose=False
|
109 |
+
):
|
110 |
+
'''
|
111 |
+
Args:
|
112 |
+
mode: can be any of [default, embedding]
|
113 |
+
response_mode: can be any of [default, compact, tree_summarize]
|
114 |
+
'''
|
115 |
+
|
116 |
+
# Querying the index
|
117 |
+
self.response = self.index.query(question,
|
118 |
+
mode=mode,
|
119 |
+
response_mode=response_mode,
|
120 |
+
similarity_top_k=similarity_top_k,
|
121 |
+
required_keywords=required_keywords,
|
122 |
+
exclude_keywords=exclude_keywords,
|
123 |
+
verbose=verbose)
|
124 |
+
|
125 |
+
return self.response
|
utils/constants.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DATA_PATH = './data/crops/'
|
2 |
+
OUTPUT_PATH = './output'
|
3 |
+
INDEX_FILENAME = f'{OUTPUT_PATH}/index.json'
|
4 |
+
|
5 |
+
URLS = [
|
6 |
+
'https://dmi.gov.in/Documents/GrantCAGrapes.pdf',
|
7 |
+
'https://dmi.gov.in/Documents/organicfaq.pdf',
|
8 |
+
'https://dmi.gov.in/Documents/CAGMOrganic-III.pdf',
|
9 |
+
'https://dmi.gov.in/GradesStandard.aspx',
|
10 |
+
'https://www.india.gov.in/topics/agriculture',
|
11 |
+
'https://www.india.gov.in/farmers-portal',
|
12 |
+
|
13 |
+
# Pest Management related
|
14 |
+
'https://niphm.gov.in/IPMPackages/Maize.pdf',
|
15 |
+
|
16 |
+
# Mandi Price related
|
17 |
+
'https://agmarknet.gov.in/',
|
18 |
+
'https://enam.gov.in/web/dashboard/trade-data',
|
19 |
+
|
20 |
+
# General information related: Information of interests are present on the 2nd level url
|
21 |
+
'https://agricoop.nic.in/#gsc.tab=0',
|
22 |
+
'https://www.manage.gov.in/nf/nf.asp',
|
23 |
+
|
24 |
+
# Weather forecast related
|
25 |
+
'https://nwp.imd.gov.in/blf/blf_temp/', # need to select state -> district (on the new page) -> displays detailed table -> can get info at the block level as well from the same page on selection
|
26 |
+
'https://nwp.imd.gov.in/blf/blf_temp/dis.php?value=12gujarat', # to get weather forecast for the given state
|
27 |
+
'https://nwp.imd.gov.in/blf/blf_temp/block.php?dis=12BHAVNAGAR', # to get the weather forecast for the given district
|
28 |
+
]
|
29 |
+
|
30 |
+
|
31 |
+
# Supported Indian laguages for translating the English text to Indian language
|
32 |
+
INDIC_LANGUAGE = {
|
33 |
+
'Hindi': 'hi',
|
34 |
+
'Gujarati': 'gu',
|
35 |
+
'Kannada': 'kn',
|
36 |
+
'Marathi': 'mr',
|
37 |
+
'Panjabi': 'pa',
|
38 |
+
'Bengali': "bn",
|
39 |
+
'Telugu': 'te',
|
40 |
+
'Tamil': 'ta',
|
41 |
+
'Malayalam': 'ml',
|
42 |
+
}
|
utils/data_loader.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
from pathlib import Path
|
4 |
+
from llama_index import GPTSimpleVectorIndex, download_loader
|
5 |
+
from langchain.agents import initialize_agent, Tool
|
6 |
+
from langchain.llms import OpenAI
|
7 |
+
from langchain.chains.conversation.memory import ConversationBufferMemory
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
class DATA_LOADER:
|
12 |
+
def __init__(self):
|
13 |
+
print()
|
14 |
+
|
15 |
+
|
16 |
+
def clean_df(self, df, dropna=True, fillna=False):
|
17 |
+
if fillna:
|
18 |
+
df.fillna('', inplace=True)
|
19 |
+
if dropna:
|
20 |
+
df.dropna(inplace=True)
|
21 |
+
# df = df[~df.isna()]
|
22 |
+
df = df.drop_duplicates().reset_index(drop=True)
|
23 |
+
return df
|
24 |
+
|
25 |
+
|
26 |
+
def load_external_links_used_by_FTAs(self,
|
27 |
+
sheet_filepath='./data/urls_used_by_ftas/external_links_used_by_FTAs.xlsx'
|
28 |
+
):
|
29 |
+
xls = pd.ExcelFile(sheet_filepath)
|
30 |
+
df = pd.DataFrame(columns=['S.No.', 'Link used for', 'Link type', 'Link'])
|
31 |
+
for sheet_name in xls.sheet_names:
|
32 |
+
sheet = pd.read_excel(xls, sheet_name)
|
33 |
+
if sheet.shape[0] > 0:
|
34 |
+
df = pd.concat([df, sheet])
|
35 |
+
else:
|
36 |
+
print(f'{sheet_name} has no content.')
|
37 |
+
|
38 |
+
df = df[['Link used for', 'Link type', 'Link']]
|
39 |
+
# Clean df
|
40 |
+
df = clean_df(df)
|
41 |
+
print(f'Total links available across all cities: {df.shape[0]}')
|
42 |
+
return df
|
43 |
+
|
44 |
+
|
45 |
+
def load_document(self,
|
46 |
+
doc_type='pdf',
|
47 |
+
doc_filepath='',
|
48 |
+
urls=[]
|
49 |
+
):
|
50 |
+
documents = []
|
51 |
+
|
52 |
+
if doc_type == 'pdf':
|
53 |
+
PDFReader = download_loader("PDFReader")
|
54 |
+
loader = PDFReader()
|
55 |
+
if os.path.exists(doc_filepath):
|
56 |
+
documents = loader.load_data(file=Path(doc_filepath))
|
57 |
+
|
58 |
+
elif doc_type == 'url':
|
59 |
+
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
|
60 |
+
loader = BeautifulSoupWebReader()
|
61 |
+
if len(urls) > 0:
|
62 |
+
# Load data from URLs
|
63 |
+
documents = loader.load_data(urls=urls)
|
64 |
+
|
65 |
+
elif doc_type == 'url-kb':
|
66 |
+
KnowledgeBaseWebReader = download_loader("KnowledgeBaseWebReader")
|
67 |
+
loader = KnowledgeBaseWebReader()
|
68 |
+
for url in urls:
|
69 |
+
doc = loader.load_data(
|
70 |
+
root_url=url,
|
71 |
+
link_selectors=['.article-list a', '.article-list a'],
|
72 |
+
article_path='/articles',
|
73 |
+
body_selector='.article-body',
|
74 |
+
title_selector='.article-title',
|
75 |
+
subtitle_selector='.article-subtitle',
|
76 |
+
)
|
77 |
+
documents.extend(doc)
|
78 |
+
|
79 |
+
elif doc_type == 'url-chatgpt':
|
80 |
+
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
|
81 |
+
loader = BeautifulSoupWebReader()
|
82 |
+
if len(urls) > 0:
|
83 |
+
# Load data from URLs
|
84 |
+
documents = loader.load_data(urls=urls)
|
85 |
+
# Build the Vector database
|
86 |
+
index = GPTSimpleVectorIndex(documents)
|
87 |
+
tools = [
|
88 |
+
Tool(
|
89 |
+
name="Website Index",
|
90 |
+
func=lambda q: index.query(q),
|
91 |
+
description=f"Useful when you want answer questions about the text retrieved from websites.",
|
92 |
+
),
|
93 |
+
]
|
94 |
+
|
95 |
+
# Call ChatGPT API
|
96 |
+
llm = OpenAI(temperature=0) # Keep temperature=0 to search from the given urls only
|
97 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
98 |
+
agent_chain = initialize_agent(
|
99 |
+
tools, llm, agent="zero-shot-react-description", memory=memory
|
100 |
+
)
|
101 |
+
|
102 |
+
output = agent_chain.run(input="What language is on this website?")
|
103 |
+
|
104 |
+
return documents
|
utils/langchain_utils.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.llms import OpenAI
|
2 |
+
from langchain.text_splitter import CharacterTextSplitter
|
3 |
+
from langchain.chains.summarize import load_summarize_chain
|
4 |
+
from langchain.docstore.document import Document
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
+
from langchain.chains.question_answering import load_qa_chain
|
8 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from llama_index import GPTSimpleVectorIndex
|
11 |
+
from langchain.vectorstores import FAISS
|
12 |
+
|
13 |
+
import pickle
|
14 |
+
import os
|
15 |
+
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
|
16 |
+
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
class LANGCHAIN_UTILS:
|
21 |
+
def __init__(self):
|
22 |
+
print()
|
23 |
+
|
24 |
+
|
25 |
+
def generate_prompt_template(self, prompt_type='general'):
|
26 |
+
prompt_template = ''
|
27 |
+
|
28 |
+
if prompt_type == 'general':
|
29 |
+
prompt_template = """Write a concise summary of the following:
|
30 |
+
|
31 |
+
{text}
|
32 |
+
|
33 |
+
CONCISE SUMMARY IN ENGLISH:"""
|
34 |
+
|
35 |
+
elif prompt_type == 'weather':
|
36 |
+
prompt_template = """
|
37 |
+
What would be the weather based on the below data:
|
38 |
+
{text}
|
39 |
+
"""
|
40 |
+
|
41 |
+
return prompt_template
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
def get_textual_summary(self,
|
46 |
+
text,
|
47 |
+
chain_type="stuff",
|
48 |
+
custom_prompt=True,
|
49 |
+
prompt_type='general'
|
50 |
+
):
|
51 |
+
texts = [text]
|
52 |
+
docs = [Document(page_content=t) for t in texts[:3]]
|
53 |
+
|
54 |
+
llm = OpenAI(temperature=0)
|
55 |
+
if custom_prompt:
|
56 |
+
prompt_template = self.generate_prompt_template(prompt_type)
|
57 |
+
PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
|
58 |
+
chain = load_summarize_chain(llm, chain_type=chain_type, prompt=PROMPT)
|
59 |
+
else:
|
60 |
+
chain = load_summarize_chain(llm, chain_type=chain_type)
|
61 |
+
|
62 |
+
text_summary = chain.run(docs)
|
63 |
+
return text_summary
|
64 |
+
|
65 |
+
|
66 |
+
def get_weather_forecast_summary(self,
|
67 |
+
text,
|
68 |
+
chain_type="stuff"
|
69 |
+
):
|
70 |
+
text = f"""
|
71 |
+
What would be the weather based on the below data:
|
72 |
+
{text}
|
73 |
+
|
74 |
+
Give simple response without technical numbers which can be explained to human.
|
75 |
+
"""
|
76 |
+
texts = [text]
|
77 |
+
docs = [Document(page_content=t) for t in texts[:3]]
|
78 |
+
|
79 |
+
llm = OpenAI(temperature=0)
|
80 |
+
chain = load_summarize_chain(llm, chain_type=chain_type)
|
81 |
+
text_summary = chain.run(docs)
|
82 |
+
|
83 |
+
return text_summary
|
84 |
+
|
85 |
+
|
86 |
+
def get_answer_from_para(self,
|
87 |
+
para,
|
88 |
+
question,
|
89 |
+
chain_type="stuff",
|
90 |
+
custom_prompt=True
|
91 |
+
):
|
92 |
+
# Prepare data (Split paragraph into chunks of small documents)
|
93 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
94 |
+
texts = text_splitter.split_text(para)
|
95 |
+
|
96 |
+
# Find similar docs that are relevant to the question
|
97 |
+
embeddings = OpenAIEmbeddings()
|
98 |
+
docsearch = Chroma.from_texts(
|
99 |
+
texts, embeddings,
|
100 |
+
metadatas=[{"source": str(i)} for i in range(len(texts))]
|
101 |
+
)
|
102 |
+
|
103 |
+
# Search for the similar docs
|
104 |
+
docs = docsearch.similarity_search(question, k=1)
|
105 |
+
|
106 |
+
llm = OpenAI(temperature=0)
|
107 |
+
# Create a Chain for question answering
|
108 |
+
if custom_prompt:
|
109 |
+
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
110 |
+
|
111 |
+
{context}
|
112 |
+
|
113 |
+
Question: {question}
|
114 |
+
Answer in English:"""
|
115 |
+
|
116 |
+
PROMPT = PromptTemplate(
|
117 |
+
template=prompt_template, input_variables=["context", "question"]
|
118 |
+
)
|
119 |
+
chain = load_qa_chain(llm, chain_type=chain_type, prompt=PROMPT)
|
120 |
+
else:
|
121 |
+
# chain = load_qa_with_sources_chain(llm, chain_type=chain_type)
|
122 |
+
chain = load_qa_chain(llm, chain_type=chain_type)
|
123 |
+
# chain.run(input_documents=docs, question=question)
|
124 |
+
|
125 |
+
out_dict = chain({"input_documents": docs, "question": question}, return_only_outputs=True)
|
126 |
+
return out_dict['output_text']
|
127 |
+
|
128 |
+
|
129 |
+
def store_index(self,
|
130 |
+
index,
|
131 |
+
index_type='GPTSimpleVectorIndex',
|
132 |
+
filepath='./output/index.json'
|
133 |
+
):
|
134 |
+
if index_type == 'GPTSimpleVectorIndex':
|
135 |
+
index.save_to_disk(filepath)
|
136 |
+
|
137 |
+
elif index_type == 'pickle':
|
138 |
+
with open(filepath, "wb") as f:
|
139 |
+
pickle.dump(index, f)
|
140 |
+
|
141 |
+
elif index_type == 'FAISS':
|
142 |
+
index.save_local(filepath)
|
143 |
+
|
144 |
+
|
145 |
+
def load_index(self,
|
146 |
+
index_type='GPTSimpleVectorIndex',
|
147 |
+
filepath='./output/index.json'
|
148 |
+
):
|
149 |
+
if index_type == 'GPTSimpleVectorIndex':
|
150 |
+
index = GPTSimpleVectorIndex.load_from_disk(filepath)
|
151 |
+
|
152 |
+
elif index_type == 'pickle':
|
153 |
+
with open(filepath, "rb") as f:
|
154 |
+
index = pickle.load(f)
|
155 |
+
|
156 |
+
elif index_type == 'FAISS':
|
157 |
+
index = FAISS.load_local(filepath, OpenAIEmbeddings()) # can we use open-source embeddings?
|
158 |
+
|
159 |
+
return index
|
160 |
+
|
161 |
+
|
162 |
+
def convert_text_to_documents(self, text_list=[]):
|
163 |
+
"""
|
164 |
+
Converts the list of text data to Documents format that can be feed to GPT API to build the Vector store
|
165 |
+
"""
|
166 |
+
|
167 |
+
from llama_index import Document
|
168 |
+
documents = [Document(t) for t in text_list]
|
169 |
+
return documents
|
utils/mandi_price.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
|
4 |
+
class MANDI_PRICE:
|
5 |
+
def __init__(self):
|
6 |
+
self.base_url = "https://enam.gov.in/web/Ajax_ctrl/trade_data_list"
|
7 |
+
# "https://enam.gov.in/web/dashboard/trade-data",
|
8 |
+
# "https://enam.gov.in/web/dashboard/trade_data_list",
|
9 |
+
|
10 |
+
|
11 |
+
def get_mandi_price(self,
|
12 |
+
state_name,
|
13 |
+
apmc_name,
|
14 |
+
commodity_name,
|
15 |
+
from_date,
|
16 |
+
to_date
|
17 |
+
):
|
18 |
+
# Prepare the payload for POST request
|
19 |
+
payload = f"language=en&stateName={state_name}&apmcName={apmc_name}&commodityName={commodity_name}&fromDate={from_date}&toDate={to_date}"
|
20 |
+
|
21 |
+
headers = {
|
22 |
+
"Content-type": "application/x-www-form-urlencoded; charset=UTF-8",
|
23 |
+
"Referer": "https://enam.gov.in/web/dashboard/trade-data",
|
24 |
+
"Accept": "application/json, text/javascript, */*; q=0.01",
|
25 |
+
}
|
26 |
+
|
27 |
+
response = requests.post(
|
28 |
+
self.base_url,
|
29 |
+
json=payload,
|
30 |
+
headers=headers,
|
31 |
+
)
|
32 |
+
|
33 |
+
return response.json()
|
utils/translator.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import utils.constants as constants_utils
|
2 |
+
import requests
|
3 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
4 |
+
from mosestokenizer import *
|
5 |
+
from indicnlp.tokenize import sentence_tokenize
|
6 |
+
from googletrans import Translator, constants
|
7 |
+
|
8 |
+
|
9 |
+
class TRANSLATOR:
|
10 |
+
def __init__(self):
|
11 |
+
print()
|
12 |
+
|
13 |
+
|
14 |
+
def split_sentences(self, paragraph, language):
|
15 |
+
if language == "en":
|
16 |
+
with MosesSentenceSplitter(language) as splitter:
|
17 |
+
return splitter([paragraph])
|
18 |
+
elif language in constants_utils.INDIC_LANGUAGE:
|
19 |
+
return sentence_tokenize.sentence_split(paragraph, lang=language)
|
20 |
+
|
21 |
+
|
22 |
+
def get_in_hindi(self, payload):
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
24 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
25 |
+
article = self.split_sentences(payload['inputs'], 'en')
|
26 |
+
# inputs = tokenizer(payload['input'], return_tensors="pt")
|
27 |
+
out_text = ""
|
28 |
+
for a in article:
|
29 |
+
inputs = tokenizer(a, return_tensors="pt")
|
30 |
+
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["hin_Deva"], max_length=100)
|
31 |
+
translated_sent = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
32 |
+
out_text = out_text.join(translated_sent)
|
33 |
+
return out_text
|
34 |
+
|
35 |
+
|
36 |
+
def get_in_indic(self, text, language='Hindi'):
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
|
38 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
39 |
+
inputs = tokenizer(text, return_tensors="pt")
|
40 |
+
|
41 |
+
code = "eng_Latn"
|
42 |
+
if language == 'Hindi':
|
43 |
+
code= "hin_Deva"
|
44 |
+
elif language == 'Marathi':
|
45 |
+
code = "mar_Deva"
|
46 |
+
|
47 |
+
translated_tokens = model.generate(
|
48 |
+
**inputs,
|
49 |
+
forced_bos_token_id=tokenizer.lang_code_to_id[code],
|
50 |
+
max_length=1000
|
51 |
+
)
|
52 |
+
|
53 |
+
out_text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
|
54 |
+
return out_text
|
55 |
+
|
56 |
+
|
57 |
+
def get_indic_google_translate(self, text, language='Hindi'):
|
58 |
+
# Init the Google API translator
|
59 |
+
translator = Translator()
|
60 |
+
translations = translator.translate(text, dest=constants_utils.INDIC_LANGUAGE.get(language, 'en'))
|
61 |
+
return str(translations.text)
|
utils/weather.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup as bs
|
3 |
+
|
4 |
+
|
5 |
+
STATE_CODES = {
|
6 |
+
'Andaman-Nicobar': '01',
|
7 |
+
'Andhra-Pradesh': '02',
|
8 |
+
'Arunachal-Pradesh': '03',
|
9 |
+
'Assam': '04',
|
10 |
+
'Bihar': '05',
|
11 |
+
'Chandigarh': '06',
|
12 |
+
'Chhattisgarh': '07',
|
13 |
+
'Dadra-and-Nagar-Haveli': '08',
|
14 |
+
'Daman-and-Diu': '09',
|
15 |
+
'Delhi': '10',
|
16 |
+
'Goa': '11',
|
17 |
+
'Gujarat': '12',
|
18 |
+
'Haryana': '13',
|
19 |
+
# 14
|
20 |
+
'Himachal-Pradesh': '15',
|
21 |
+
'Jammu-Kashmir': '16',
|
22 |
+
'Jharkhand': '17',
|
23 |
+
'Karnataka': '18',
|
24 |
+
'Kerala': '19',
|
25 |
+
'Lakshadweep': '20',
|
26 |
+
'Madhya-Pradesh': '21',
|
27 |
+
'Maharashtra': '22',
|
28 |
+
'Manipur': '23',
|
29 |
+
'Meghalaya': '24',
|
30 |
+
'Mizoram': '25',
|
31 |
+
'Nagaland': '26',
|
32 |
+
'Odisha': '27',
|
33 |
+
'Pondicherry': '28',
|
34 |
+
'Punjab': '29',
|
35 |
+
'Rajasthan': '30',
|
36 |
+
'Sikkim': '31',
|
37 |
+
'Tamilnadu': '32',
|
38 |
+
'Telangana': '33',
|
39 |
+
'Tripura': '34',
|
40 |
+
'Uttar-Pradesh': '35',
|
41 |
+
'Uttarakhand': '36',
|
42 |
+
'West-Bengal': '37',
|
43 |
+
}
|
44 |
+
|
45 |
+
# List of states that are given as the input selection to https://nwp.imd.gov.in/blf/blf_temp/ to get the weather forecast
|
46 |
+
STATES = {
|
47 |
+
'Andaman-Nicobar': {},
|
48 |
+
|
49 |
+
'Andhra-Pradesh': {},
|
50 |
+
|
51 |
+
'Arunachal-Pradesh': {},
|
52 |
+
|
53 |
+
'Assam': {},
|
54 |
+
|
55 |
+
'Bihar': {},
|
56 |
+
|
57 |
+
'Chandigarh': {},
|
58 |
+
|
59 |
+
'Chhattisgarh': {},
|
60 |
+
|
61 |
+
'Dadra-and-Nagar-Haveli': {},
|
62 |
+
|
63 |
+
'Daman-and-Diu': {},
|
64 |
+
|
65 |
+
'Delhi': {
|
66 |
+
'CENTRAL-DELHI': ['CENTRAL-DELHI'],
|
67 |
+
'EAST-DELHI': ['EAST-DELHI'],
|
68 |
+
'NEW-DELHI': ['NEW-DELHI'],
|
69 |
+
'NORTH-DELHI': ['NORTH-DELHI'],
|
70 |
+
'NORTH-EAST-DELHI': ['NORTH-EAST-DELHI'],
|
71 |
+
'NORTH-WEST-DELHI': ['NORTH-WEST-DELHI'],
|
72 |
+
'SHAHDARA': ['SHAHDARA'],
|
73 |
+
'SOUTH-DELHI': ['SOUTH-DELHI'],
|
74 |
+
'SOUTH-EAST-DELHI': ['SOUTH-EAST-DELHI'],
|
75 |
+
'SOUTH-WEST-DELHI': ['SOUTH-WEST-DELHI'],
|
76 |
+
'WEST-DELHI': ['WEST-DELHI'],
|
77 |
+
},
|
78 |
+
|
79 |
+
'Goa': {},
|
80 |
+
|
81 |
+
'Gujarat': {
|
82 |
+
'AHMADABAD': ['AHMEDABAD-CITY', 'BAVLA', 'DASKROI', 'DETROJ-RAMPURA', 'DHANDHUKA', 'DHOLERA', 'DHOLKA', 'MANDAL', 'SANAND', 'VIRAMGAM'],
|
83 |
+
'AMRELI': ['AMRELI', 'BABRA', 'BAGASARA', 'DHARI', 'JAFRABAD', 'KHAMBHA', 'KUNKAVAV-VADIA', 'LATHI', 'LILIA', 'RAJULA', 'SAVERKUNDLA'],
|
84 |
+
'ANAND': [],
|
85 |
+
'ARVALLI': [],
|
86 |
+
'BANASKANTHA': [],
|
87 |
+
'BHARUCH': [],
|
88 |
+
'BHAVNAGAR': [],
|
89 |
+
'BOTAD': [],
|
90 |
+
'CHHOTAUDEPUR': [],
|
91 |
+
'DANG': [],
|
92 |
+
'DEVBHUMI-DWARKA': [],
|
93 |
+
'DOHAD': [],
|
94 |
+
'GANDHINAGAR': [],
|
95 |
+
'GIR-SOMNATH': [],
|
96 |
+
'JAMNAGAR': [],
|
97 |
+
'JUNAGADH': [],
|
98 |
+
'KACHCHH': [],
|
99 |
+
'KHEDA': [],
|
100 |
+
'MAHESANA': [],
|
101 |
+
'MAHISAGAR': [],
|
102 |
+
'MORBI': [],
|
103 |
+
'NARMADA': [],
|
104 |
+
'NAVSARI': [],
|
105 |
+
'PANCH-MAHALS': [],
|
106 |
+
'PATAN': [],
|
107 |
+
'PORBANDAR': [],
|
108 |
+
'RAJKOT': [],
|
109 |
+
'SABAR-KANTHA': [],
|
110 |
+
'SURAT': ['BARDOLI', 'CHORASI', 'KAMREJ', 'MAHUVA', 'MANDVI', 'MANGROL', 'OLPAD', 'PALSANA', 'SURAT-CITY', 'UMARPADA'],
|
111 |
+
'SURENDRANAGAR': [],
|
112 |
+
'TAPI': [],
|
113 |
+
'VADODARA': [],
|
114 |
+
'VALSAD': [],
|
115 |
+
},
|
116 |
+
|
117 |
+
'Haryana': {},
|
118 |
+
|
119 |
+
'Himachal-Pradesh': {},
|
120 |
+
|
121 |
+
'Jammu-Kashmir': {},
|
122 |
+
|
123 |
+
'Jharkhand': {},
|
124 |
+
|
125 |
+
'Karnataka': {},
|
126 |
+
|
127 |
+
'Kerala': {},
|
128 |
+
|
129 |
+
'Lakshadweep': {},
|
130 |
+
|
131 |
+
'Madhya-Pradesh': {},
|
132 |
+
|
133 |
+
'Maharashtra': {},
|
134 |
+
|
135 |
+
'Manipur': {},
|
136 |
+
|
137 |
+
'Meghalaya': {},
|
138 |
+
|
139 |
+
'Mizoram': {},
|
140 |
+
|
141 |
+
'Nagaland': {},
|
142 |
+
|
143 |
+
'Odisha': {},
|
144 |
+
|
145 |
+
'Pondicherry': {},
|
146 |
+
|
147 |
+
'Punjab': {},
|
148 |
+
|
149 |
+
'Rajasthan': {},
|
150 |
+
|
151 |
+
'Sikkim': {},
|
152 |
+
|
153 |
+
'Tamilnadu': {},
|
154 |
+
|
155 |
+
'Telangana': {},
|
156 |
+
|
157 |
+
'Tripura': {},
|
158 |
+
|
159 |
+
'Uttar-Pradesh': {},
|
160 |
+
|
161 |
+
'Uttarakhand': {},
|
162 |
+
|
163 |
+
'West-Bengal': {},
|
164 |
+
}
|
165 |
+
|
166 |
+
|
167 |
+
|
168 |
+
class WEATHER:
|
169 |
+
def __init__(self):
|
170 |
+
self.base_url = 'https://nwp.imd.gov.in/blf/blf_temp'
|
171 |
+
|
172 |
+
|
173 |
+
# Weather forecast from Govt. website
|
174 |
+
def get_weather_forecast(self, state, district, is_block_level=False):
|
175 |
+
self.district_url = f"{self.base_url}/block.php?dis={STATE_CODES.get(state, '') + district}"
|
176 |
+
self.block_url = f'{self.base_url}/table2.php'
|
177 |
+
|
178 |
+
response = requests.get(self.district_url if not is_block_level else self.block_url)
|
179 |
+
soup = bs(response.text, 'html.parser')
|
180 |
+
scripts = soup.findAll('font')[0]
|
181 |
+
return scripts.text
|
182 |
+
|
183 |
+
|
184 |
+
# Weather using Google weather API
|
185 |
+
def get_weather(self, city):
|
186 |
+
city = city + " weather"
|
187 |
+
city = city.replace(" ", "+")
|
188 |
+
|
189 |
+
headers = {
|
190 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
191 |
+
}
|
192 |
+
response = requests.get(
|
193 |
+
f'https://www.google.com/search?q={city}&oq={city}&aqs=chrome.0.35i39l2j0l4j46j69i60.6128j1j7&sourceid=chrome&ie=UTF-8', headers=headers)
|
194 |
+
|
195 |
+
soup = bs(response.text, 'html.parser')
|
196 |
+
location = soup.select('#wob_loc')[0].getText().strip()
|
197 |
+
time = soup.select('#wob_dts')[0].getText().strip()
|
198 |
+
info = soup.select('#wob_dc')[0].getText().strip()
|
199 |
+
temperature = soup.select('#wob_tm')[0].getText().strip()
|
200 |
+
temperature = temperature + "°C"
|
201 |
+
|
202 |
+
return time, info, temperature
|