sampathlonka commited on
Commit
377ed3a
1 Parent(s): a6b7040
Files changed (4) hide show
  1. Tools.py +248 -0
  2. database.py +43 -0
  3. requirements.txt +3 -1
  4. utils.py +120 -0
Tools.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import chardet
3
+ import pandas as pd
4
+ import streamlit as st
5
+ import pymysql
6
+ import ast
7
+ import re
8
+ from utils import word_sentence_similarity, extract_meaning_by_language, get_list_meaning_word, get_details_mantra_json
9
+ from llama_index.core.tools.tool_spec.base import BaseToolSpec
10
+ from database import execute_query
11
+ import pandas as pd
12
+ import json
13
+ import ast
14
+ import logging
15
+
16
+
17
+ # Constants
18
+ SCRIPTURE_DESCRIPTIONS_CSV_PATH = r"Data/scripture_descriptions.csv"
19
+ VEDAMANTRA_CSV_PATH = r"Data/veda_content_modified_v3.csv"
20
+ PADA_CSV_PATH = r"Data/term_data_processed_v2.csv"
21
+
22
+ class ScriptureDescriptionToolSpec(BaseToolSpec):
23
+ spec_functions = ["get_description"]
24
+
25
+ def __init__(self):
26
+ super().__init__()
27
+ with open(SCRIPTURE_DESCRIPTIONS_CSV_PATH, 'rb') as f:
28
+ result = chardet.detect(f.read())
29
+ encoding = result['encoding']
30
+ self.df = pd.read_csv(SCRIPTURE_DESCRIPTIONS_CSV_PATH, encoding=encoding)
31
+
32
+ def _query_description(self, conditions):
33
+ try:
34
+ result = self.df[conditions]
35
+ if not result.empty:
36
+ return result.iloc[0].to_dict()
37
+ else:
38
+ raise IndexError("Scripture description not found.")
39
+ except IndexError as e:
40
+ raise ValueError(f"Failed to get scripture description: {e}")
41
+
42
+ def get_description(self, level_0, level_1=None, level_2=None, level_3=None):
43
+ try:
44
+ conditions = (self.df['scripture_name'].str.lower() == level_0.lower())
45
+ if level_3 is not None:
46
+ conditions &= (self.df['level_1'] == str(level_1)) & (self.df['level_2'] == str(level_2)) & (self.df['level_3'] == str(level_3))
47
+ elif level_2 is not None:
48
+ conditions &= (self.df['level_1'] == str(level_1)) & (self.df['level_2'] == str(level_2))
49
+ elif level_1 is not None:
50
+ conditions &= (self.df['level_1'] == str(level_1))
51
+ return self._query_description(conditions)
52
+ except ValueError as e:
53
+ return {"error": str(e)}
54
+
55
+ class MantraToolSpec(BaseToolSpec):
56
+ '''
57
+ To obtain the vedamantra details such as vedamantra, padapata, devata, chandah, rishi etc of vedamantras (or mantras or hyms) from all vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda) using the function
58
+ `get_vedamantra_details`. The mantra summary like anvaya, mantraVishaya, bhavartha/meaning (adhibautic, ahyatmic, adhidaivic), purpose, usage, tippani of vedamantra accessible using the function 'get_vedamantra_summary'
59
+ Sample Query:
60
+ 1. What is the vedamantra of the mantra from Rigveda, first mandala, first shukta, and first mantra?
61
+ 2. What is the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra?
62
+ 3. What is the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram?
63
+ 4. What is the (adhibautic) meaning of the vedamantra from RigVeda, first mandala, first shukta, and first mantra?
64
+ 5. What is the mantraVishaya of the vedamantra from RigVeda, first mandala, first shukta, and first mantra?
65
+ '''
66
+ spec_functions = ["get_vedamantra_details", "get_vedamantra_summary"]
67
+
68
+ def __init__(self):
69
+ super().__init__()
70
+ self.df_vedamantra = pd.read_csv(VEDAMANTRA_CSV_PATH, encoding='utf-8')
71
+
72
+ def _get_mantra_details(self, query):
73
+ try:
74
+ details = get_details_mantra_json(query)
75
+ return details['mantraHeader']['language'][1]
76
+ except Exception as e:
77
+ raise ValueError(f"Failed to get mantra details: {e}")
78
+
79
+ def _query_db(self, conditions):
80
+ try:
81
+ result = self.df_vedamantra[conditions]['mantra_number'].values
82
+ if len(result) == 0:
83
+ raise ValueError("Mantra not found.")
84
+ return result[0]
85
+ except Exception as e:
86
+ raise ValueError("Failed to query database.")
87
+
88
+ def _get_query_conditions(self, scripture_name, **kwargs):
89
+ conditions = (self.df_vedamantra['scripture_name'].str.lower() == scripture_name.lower())
90
+ for key, value in kwargs.items():
91
+ conditions &= (self.df_vedamantra[key] == value)
92
+ return conditions
93
+
94
+ def _get_mantra_id(self, scripture_name, **kwargs):
95
+ conditions = self._get_query_conditions(scripture_name, **kwargs)
96
+ return self._query_db(conditions)
97
+
98
+ def get_vedamantra_details(self, mantraid=None, scripture_name=None, **kwargs):
99
+ try:
100
+ if mantraid:
101
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
102
+ else:
103
+ mantra_id = self._get_mantra_id(scripture_name, **kwargs)
104
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
105
+ return self._get_mantra_details(query)
106
+ except Exception as e:
107
+ return {"error": str(e)}
108
+
109
+ def get_vedamantra_summary(self, mantraid=None, scripture_name=None, **kwargs):
110
+ try:
111
+ if mantraid:
112
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
113
+ else:
114
+ mantra_id = self._get_mantra_id(scripture_name, **kwargs)
115
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
116
+ json_dict = get_details_mantra_json(query)
117
+ mantra_summary = json_dict['mantraSummary']['language']
118
+ summary_dict = {"Roman-IAST summary of vedamantra": json_dict['mantraSummary']['language'][1]}
119
+ for item in mantra_summary:
120
+ if item['languageName'] == 'English':
121
+ mahatma = item['mahatma']['mahatmaName']
122
+ summary_dict[f"English summary of vedamantra by {mahatma}"] = item
123
+ return summary_dict
124
+ except Exception as e:
125
+ return {"error": str(e)}
126
+
127
+ class PadaToolSpec(BaseToolSpec):
128
+ '''
129
+ Purpose: To obtains a complete or meaningful meaning of a word or pada based on context information.
130
+ 1. The function 'get_meaning_pada' used to get all the possible meanings of the pada based on the given information.
131
+ 2. The function 'get_adibauatic_adidaivic_adyatmic_meaning_of_pada' used to get the adibhautic, adidaivic and sdyatmic meaning of a word based on context information.\
132
+ Use the context to generate a meaningful meaning of the pada in the vedamantra.
133
+ Sample query:
134
+ 1. What is the meaning of the word apratidhṛṣṭa-śavasam?
135
+ 2. What is the adibauatic meaning of the word apratidhṛṣṭa-śavasam?
136
+ 3. Whats the adidaivic meaning of the word apratidhṛṣṭa-śavasam?
137
+ 4. What is the adyatmic meaning of the word apratidhṛṣṭa-śavasam?
138
+ '''
139
+ spec_functions = ["get_pada_meaning","get_adibauatic_adidaivic_adhyatmic_meaning_of_pada"]
140
+
141
+ def __init__(self):
142
+ super().__init__()
143
+ self.df_terms = pd.read_csv(PADA_CSV_PATH, dtype={'AnuvakNumber': 'Int64', 'PrapatakNumber': 'Int64', 'KandahNumber': 'Int64', 'ShuktaNumber': 'Int64', 'ArchikahNumber': 'Int64', 'AdhyayaNumber': 'Int64', 'MandalaNumber': 'Int64', 'ParyayaNumber': 'Int64'}, encoding='utf-8')
144
+ self.df_vedic_content = pd.read_csv(VEDAMANTRA_CSV_PATH,encoding = 'utf-8')
145
+
146
+ def _get_pada_details_by_scripture(self, pada, scripture_name=None, **kwargs):
147
+ try:
148
+ condition = (self.df_terms['Pada'] == pada)
149
+ if scripture_name:
150
+ condition &= (self.df_terms['scripture_name'].str.lower() == scripture_name.lower())
151
+ for key, value in kwargs.items():
152
+ if value is not None:
153
+ condition &= (self.df_terms[key] == value)
154
+ filtered_df = self.df_terms[condition]
155
+ return filtered_df if not filtered_df.empty else None
156
+ except Exception as e:
157
+ logging.error(f"Error in _get_pada_details_by_scripture: {e}")
158
+ return None
159
+
160
+ def _get_vedamantra_meaning(self, mantraID, MahatmaName=None):
161
+ try:
162
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraID}'"
163
+ jsonDict = get_details_mantra_json(query)
164
+ mantraSummary = jsonDict['mantraSummary']['language']
165
+ if MahatmaName is not None:
166
+ filtered_summary = [data_dict for data_dict in mantraSummary if data_dict.get('mahatma', {}).get('mahatmaName') == MahatmaName]
167
+ if filtered_summary:
168
+ mantraSummary = filtered_summary
169
+ best_meaning = None
170
+ best_count = 0
171
+ for data_dict in mantraSummary:
172
+ if data_dict.get('languageName') == "English":
173
+ meanings = data_dict['mahatma']['bhavartha']
174
+ count = sum(bool(meanings.get(cat, None)) for cat in ['adibhautic', 'adidaivic', 'adhyatmic'])
175
+ if count >= best_count:
176
+ best_meaning = {cat: meanings.get(cat, None) for cat in ['adibhautic', 'adidaivic', 'adhyatmic']}
177
+ best_count = count
178
+ return best_meaning if best_meaning else {"error": "Required meaning associated with vedamantra is not available."}
179
+ except Exception as e:
180
+ logging.error(f"Error in _get_vedamantra_meaning: {e}")
181
+ return {"error": f"An error occurred: {e}"}
182
+
183
+ def _get_pada_morphology(self, term_details, meanings):
184
+ try:
185
+ morphology_list = ast.literal_eval(term_details['Morphology'])
186
+ term_morph_list = []
187
+ for morphs in morphology_list:
188
+ term_info = {}
189
+ for field in ['stem', 'root']:
190
+ morph_word = morphs.get(field)
191
+ if morph_word:
192
+ meaning = word_sentence_similarity(meanings, morph_word)
193
+ term_info[f'{field}_word'] = morph_word
194
+ term_info[f'{field}_meaning'] = meaning[0][0] if meaning else None
195
+ term_info[f'{field}_score'] = meaning[0][1] if meaning else None
196
+ term_info['grammar'] = morphs['grammar']
197
+ term_morph_list.append(term_info)
198
+ return term_morph_list
199
+ except Exception as e:
200
+ logging.error(f"Error in _get_pada_morphology: {e}")
201
+ return []
202
+
203
+ def get_pada_meaning(self, pada):
204
+ try:
205
+ pada_details = self.df_terms[self.df_terms['Pada'] == pada]
206
+ meanings_list = []
207
+ for morphs in ast.literal_eval(pada_details['Morphology'].values[0]):
208
+ for field in ['stem', 'root']:
209
+ word = morphs.get(field)
210
+ if word:
211
+ meanings_list.append(get_list_meaning_word(word))
212
+ return meanings_list
213
+ except Exception as e:
214
+ logging.error(f"Error in get_pada_meaning: {e}")
215
+ return {"error": f"Required meaning associated with pada is not available. {e}"}
216
+
217
+
218
+ def get_adibauatic_adidaivic_adhyatmic_meaning_of_pada(self, pada, Pada_position=None, mantraid=None, scripture_name=None, **kwargs):
219
+ try:
220
+ if mantraid:
221
+ details = self.df_terms[(self.df_terms['mantra_id'] == mantraid) & (self.df_terms['Pada'] == pada)]
222
+ else:
223
+ if scripture_name is not None:
224
+ details = self._get_pada_details_by_scripture(pada, scripture_name, **kwargs)
225
+ if Pada_position:
226
+ details = details[details['Pada_position'] == Pada_position]
227
+ if details.empty:
228
+ return {"error": f"No details found for pada '{pada}'"}
229
+ pada_details = details.iloc[0]
230
+ #print(pada_details)
231
+ mantraID = pada_details['mantra_id']
232
+ meanings = self._get_vedamantra_meaning(mantraID,MahatmaName=kwargs.get('MahatmaName'))
233
+ if 'error' in meanings:
234
+ return meanings
235
+ ab_term_morph_list = self._get_pada_morphology(pada_details, meanings['adibhautic'])
236
+ ad_term_morph_list = self._get_pada_morphology(pada_details, meanings['adidaivic'])
237
+ at_term_morph_list = self._get_pada_morphology(pada_details, meanings['adhyatmic'])
238
+ return {
239
+ f'adibhautic_info_{pada}': ab_term_morph_list,
240
+ 'vedamantra_adibhautic_meaning': meanings['adibhautic'],
241
+ f'adidavic_info_{pada}': ad_term_morph_list,
242
+ 'vedamantra_adidavic_meaning': meanings['adidaivic'],
243
+ f'adhyatmic_info_{pada}': at_term_morph_list,
244
+ 'vedamantra_adhyatmic_meaning': meanings['adhyatmic']
245
+ }
246
+ except Exception as e:
247
+ logging.error(f"Error in get_adibauatic_adidaivic_adhyatmic_meaning_of_pada: {e}")
248
+ return {"error": f"Failed to get meaning of the word {pada}. {e}"}
database.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymysql
2
+ import json
3
+ import pandas as pd
4
+ import re
5
+
6
+
7
+ def initialize_database():
8
+ # Database Connection
9
+ db_params = {"host": "localhost",
10
+ "user": "cms-readonly-user",
11
+ "password": "%Reed!!",
12
+ "port": 3307,
13
+ "database": "veda_prod_v1"
14
+ }
15
+ db = pymysql.connect(**db_params)
16
+ return db
17
+
18
+ def execute_query(query):
19
+ db = initialize_database()
20
+ cursor = db.cursor()
21
+ try:
22
+ cursor.execute(query)
23
+ description = cursor.description
24
+ result = cursor.fetchall() # Fetch all rows from the result set
25
+ db.commit()
26
+ return description, result
27
+ except Exception as e:
28
+ print("Error executing query:", e)
29
+ db.rollback()
30
+ return None # Return None if an error occurs
31
+ finally:
32
+ db.close()
33
+
34
+
35
+ def _get_details_mantra_json(self, query):
36
+ description, data = execute_query(query)
37
+ df = pd.DataFrame(data)
38
+ df.columns = [x[0] for x in description]
39
+ mantra_json = df['mantra_json'].values[0]
40
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
41
+ return json.loads(cleaned_data)
42
+
43
+
requirements.txt CHANGED
@@ -3,7 +3,9 @@ sentence_transformers==2.4.0
3
  llama_index==0.10.4
4
  llama-index-vector-stores-pinecone
5
  llama-index-embeddings-huggingface
 
6
  pinecone-client==3.1.0
7
  cohere==4.50
8
  chardet==5.2.0
9
- streamlit==1.31.1
 
 
3
  llama_index==0.10.4
4
  llama-index-vector-stores-pinecone
5
  llama-index-embeddings-huggingface
6
+ llama_index-embeddings-nomic
7
  pinecone-client==3.1.0
8
  cohere==4.50
9
  chardet==5.2.0
10
+ streamlit==1.31.1
11
+ aksharamukha==2.1.2
utils.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ import json
5
+ import numpy as np
6
+ import pandas as pd
7
+ from bs4 import BeautifulSoup
8
+ from database import execute_query
9
+ from aksharamukha import transliterate
10
+ from sentence_transformers import util
11
+ from llama_index.embeddings.nomic import NomicEmbedding
12
+
13
+ nomic_api_key = os.getenv('NOMIC_API_KEY')
14
+ #nomic embed model used for similarity scores
15
+ nomic_embed_model = NomicEmbedding(
16
+ api_key=nomic_api_key,
17
+ dimensionality=128,
18
+ model_name="nomic-embed-text-v1.5",
19
+ )
20
+
21
+
22
+
23
+ def get_list_meaning_word(word):
24
+ pada_meanings = {'pada': word,
25
+ 'Monier-Williams Sanskrit-English Dictionary (1899)': [],
26
+ 'Shabda-Sagara (1900)': [],
27
+ 'Apte-Practical Sanskrit-English Dictionary (1890)': [],
28
+ }
29
+ url = f"https://ambuda.org/tools/dictionaries/mw,shabdasagara,apte/{word}"
30
+
31
+ try:
32
+ # Fetch HTML content
33
+ response = requests.get(url)
34
+ response.raise_for_status()
35
+
36
+ # Parse HTML with BeautifulSoup
37
+ soup = BeautifulSoup(response.text, 'html.parser')
38
+
39
+ # Extracting text from different tags
40
+ divs = soup.find_all('div', class_='my-4', attrs={'x-show': 'show'})
41
+
42
+ try:
43
+ # Find all list items <li> within the specified <ul> tag
44
+ div_items_0 = divs[0].find('ul').find_all('li', class_='dict-entry mw-entry')
45
+ # Print the text content of each list item
46
+ dive_text_0 = [li_tag.get_text(strip=True) for li_tag in div_items_0]
47
+ text_0_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_0]
48
+ pada_meanings['Monier-Williams Sanskrit-English Dictionary (1899)'] = text_0_trans
49
+ except :
50
+ print("Error: Unable to find Monier-Williams Sanskrit-English Dictionary (1899) data.")
51
+
52
+ try:
53
+ div_items_1 = divs[1].find_all('div')
54
+ dive_text_1 = [item.get_text(strip=True) for item in div_items_1]
55
+ text_1_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_1]
56
+ pada_meanings['Shabda-Sagara (1900)'] = text_1_trans
57
+ except :
58
+ print("Error: Unable to find Shabda-Sagara (1900) data.")
59
+
60
+ try:
61
+ apte_meanings = []
62
+ for tag in divs[2].find_all('b'):
63
+ if tag.text.strip() != '—':
64
+ text1 = tag.text.strip() # English text within <b> tag
65
+ sibling = tag.find_next_sibling() # Text following <b> tag
66
+ text2 = tag.next_sibling.strip() + ' ' # English text following <b> tag
67
+ while sibling.name != 'div':
68
+ if sibling.name is None: # Handling non-tag text
69
+ text2 += " "
70
+ elif sibling.name == 'span': # Handling <b> tag
71
+ IAST_text = transliterate.process(src='Devanagari', tgt='IAST', txt=sibling.text.strip())
72
+ text2 += IAST_text + ' ' + sibling.next_sibling.strip()
73
+ else:
74
+ text2 += sibling.text.strip() + ' ' + sibling.next_sibling.strip()
75
+ sibling = sibling.find_next_sibling()
76
+ apte_meanings.append(text2)
77
+ pada_meanings['Apte-Practical Sanskrit-English Dictionary (1890)'] = apte_meanings[:-1]
78
+ except:
79
+ print("Error: Unable to find Apte-Practical Sanskrit-English Dictionary (1890) data.")
80
+
81
+ except requests.exceptions.RequestException as e:
82
+ print(f"Error: Failed to fetch data from {url}. {e}")
83
+
84
+ return pada_meanings
85
+
86
+ #get similarity scores
87
+ def word_sentence_similarity(meanings, root_stem_word):
88
+ # Check if the word embeddings are not empty
89
+ if not meanings or not root_stem_word:
90
+ return None
91
+
92
+ meaning_embedding = np.array(nomic_embed_model.get_text_embedding(meanings))
93
+ all_meanings = []
94
+ word_score_pair = []
95
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Monier-Williams Sanskrit-English Dictionary (1899)'])
96
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Shabda-Sagara (1900)'])
97
+ for word_meaning in all_meanings:
98
+ root_stem_word_meaning_embedding = np.array(nomic_embed_model.get_text_embedding(word_meaning))
99
+ # Calculate cosine similarity
100
+ similarity_score = util.pytorch_cos_sim(meaning_embedding, root_stem_word_meaning_embedding).item()
101
+ word_score_pair.append((word_meaning,similarity_score))
102
+ # Sort the list in descending order based on similarity scores
103
+ sorted_word_score_pairs = sorted(word_score_pair, key=lambda x: x[1], reverse=True)
104
+ return sorted_word_score_pairs
105
+
106
+ #extract the adhibautic meaning of the mantra from the vedamantra
107
+ def extract_meaning_by_language(data_list, target_language='English'):
108
+ for data_dict in data_list:
109
+ if data_dict.get('languageName') == target_language:
110
+ return data_dict.get('mahatma', {})
111
+ return None
112
+
113
+ #mantra_json_details
114
+ def get_details_mantra_json(query):
115
+ description, data = execute_query(query)
116
+ df = pd.DataFrame(data)
117
+ df.columns = [x[0] for x in description]
118
+ mantra_json = df['mantra_json'].values[0]
119
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
120
+ return json.loads(cleaned_data)