samlonka commited on
Commit
9a19c9e
1 Parent(s): 3f76d13

Add new files for Veda Bot 2.0

Browse files
Files changed (10) hide show
  1. .streamlit/secrets.toml +14 -0
  2. Docs/ramana_docs_ids.pkl +3 -0
  3. app.py +0 -0
  4. cache.py +75 -0
  5. crag.py +357 -0
  6. crag_app.py +158 -0
  7. database.py +87 -0
  8. function_tools.py +614 -0
  9. requirements.txt +19 -0
  10. utils.py +147 -0
.streamlit/secrets.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_API_KEY="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"
2
+ PINECONE_API_KEY_SAM = "a8f6c5af-e23c-4d57-8038-e47a3c0b3a42"
3
+ #OPENAI_ORGANIZATION=YOUR_ORG_ID_HERE
4
+ LANGCHAIN_API_KEY="ls__3dd443fbc5e5487b91b6d169692925c9"
5
+ GROQ_API_KEY='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'
6
+ NOMIC_API_KEY="nk-TsmqZHH1Y-rwWOuLgoUtJ7iihIljDeJTOIEalgnODhM"
7
+ TAVILY_API_KEY="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"
8
+ MIXED_API_KEY="emb_32ce4f9ad64a153cff0b41f865daf46ad87d3222b7084b6e"
9
+ #database details
10
+ DB_HOST="dev-veda-cms-database-1.cjopjxk5nm26.us-west-2.rds.amazonaws.com"
11
+ DB_USER="veda_bot_user"
12
+ DB_PASSWORD="Vdea#bp0t"
13
+ DB_PORT=3306
14
+ DB="veda_data_bot"
Docs/ramana_docs_ids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b132a67107aed2df316c947a72bda5bcad9eae4917d0a062e82e00733eba31ed
3
+ size 13480212
app.py ADDED
File without changes
cache.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAI
2
+ import os
3
+ import time
4
+ from langchain.cache import SQLAlchemyCache
5
+ from sqlalchemy import Column, Computed, Index, Integer, Sequence, String, create_engine
6
+ from sqlalchemy.orm import sessionmaker, declarative_base
7
+ from datetime import datetime
8
+
9
+
10
+
11
+
12
+ #load postgres engine
13
+ engine = create_engine("postgresql://postgres:sampath@localhost:5432/postgres")
14
+
15
+
16
+
17
+
18
+ Base = declarative_base()
19
+
20
+
21
+ class FeedBackCache(Base):
22
+ """Postgres table for fulltext-indexed LLM Cache"""
23
+
24
+ __tablename__ = "veda_bot_feedback"
25
+ id = Column(Integer, Sequence("cache_id"), primary_key=True)
26
+ user_message = Column(String, nullable=True)
27
+ assistant_message = Column(String, nullable=True)
28
+ feedback_score = Column(String, nullable=True)
29
+ feedback_text = Column(String, nullable=True)
30
+
31
+ # Create the table in the database
32
+ Base.metadata.create_all(engine)
33
+
34
+
35
+ def write_to_db(u_message, a_message, f_score, f_text):
36
+ try:
37
+ # Create a sessionmaker bound to the engine
38
+ Session = sessionmaker(bind=engine)
39
+
40
+ # Create a session
41
+ session = Session()
42
+
43
+ message = FeedBackCache(
44
+ user_message=u_message["content"],
45
+ assistant_message=a_message["content"],
46
+ feedback_score=f_score,
47
+ feedback_text=f_text
48
+ )
49
+
50
+ # Add the instance to the session
51
+ session.add(message)
52
+
53
+ # Commit the session to persist the changes to the database
54
+ session.commit()
55
+ print("Feedback written to DB successfully!")
56
+
57
+ except Exception as e:
58
+ # If an error occurs, rollback the session and print the error message
59
+ session.rollback()
60
+ print("Error occurred while writing feedback to DB:", e)
61
+
62
+ finally:
63
+ # Close the session
64
+ session.close()
65
+
66
+
67
+ def current_time() -> str:
68
+ """Return the current time as a string. Used as part of the session UUID."""
69
+ # Get current date and time
70
+ current_datetime = datetime.now()
71
+
72
+ # Convert to a long number format
73
+ datetime_string = current_datetime.strftime("%Y%m%d%H%M%S")
74
+
75
+ return datetime_string
crag.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from vector_tool import ensemble_retriever
4
+ from langgraph.prebuilt import ToolInvocation
5
+ from langchain_core.messages import ToolMessage
6
+ import json
7
+ # Set up the tools to execute them from the graph
8
+ from langgraph.prebuilt import ToolExecutor
9
+ # tools retrieval
10
+ from function_tools import tool_chain
11
+ from vector_tool import ensemble_retriever
12
+
13
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
14
+ os.environ['TAVILY_API_KEY'] = st.secrets["TAVILY_API_KEY"]
15
+
16
+ ### Retrieval Grader
17
+
18
+ from langchain_openai import ChatOpenAI
19
+ from langchain_core.prompts import ChatPromptTemplate
20
+ from langchain_core.pydantic_v1 import BaseModel, Field
21
+
22
+ #LLM models
23
+ llm_AI4 = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
24
+
25
+ # Data model
26
+ class GradeDocuments(BaseModel):
27
+ """Binary score for relevance check on retrieved documents."""
28
+
29
+ binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
30
+
31
+ # LLM with function call
32
+ structured_llm_grader = llm_AI4.with_structured_output(GradeDocuments)
33
+
34
+ # Prompt
35
+ system = """You are a grader assessing relevance of a retrieved document to a user question. \n
36
+ If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
37
+ Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
38
+ grade_prompt = ChatPromptTemplate.from_messages(
39
+ [
40
+ ("system", system),
41
+ ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
42
+ ]
43
+ )
44
+
45
+ retrieval_grader = grade_prompt | structured_llm_grader
46
+
47
+ ### Generate
48
+ from langchain import hub
49
+ from langchain.prompts import MessagesPlaceholder
50
+ from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
51
+ from langchain.prompts import MessagesPlaceholder
52
+ from langchain.agents.format_scratchpad.openai_tools import (
53
+ format_to_openai_tool_messages
54
+ )
55
+ from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage
56
+ from langchain_core.output_parsers import StrOutputParser
57
+ from typing import Any, List, Union
58
+ # Prompt
59
+ #prompt = hub.pull("rlm/rag-prompt")
60
+ system_message = '''You are an AI assistant for answering questions about vedas and scriptures.
61
+ \nYou are given the following extracted documents from Svarupa Knowledge Base (https://svarupa.org/) and other documents and a question.
62
+ Provide a conversational answer.\nIf you are not provided with any documents, say \"I did not get any relevant context for this but
63
+ I will reply to the best of my knowledge\" and then write your answer\nIf you don't know the answer, just say \"Hmm, I'm not sure. \" Don't try to make up an answer.
64
+ \nIf the question is not about vedas and scriptures, politely inform them that you are tuned to only answer questions about that.\n\n'''
65
+ '''
66
+ prompt = ChatPromptTemplate.from_messages(
67
+ [
68
+ ("system",system_message),
69
+ # Please note the ordering of the fields in the prompt!
70
+ # The correct ordering is:
71
+ # 1. history - the past messages between the user and the agent
72
+ # 2. user - the user's current input
73
+ # 3. agent_scratchpad - the agent's working space for thinking and
74
+ # invoking tools to respond to the user's input.
75
+ # If you change the ordering, the agent will not work correctly since
76
+ # the messages will be shown to the underlying LLM in the wrong order.
77
+ MessagesPlaceholder(variable_name="context"),
78
+ ("user", "{question}"),
79
+ ]
80
+ )
81
+ '''
82
+ generate_prompt = ChatPromptTemplate.from_messages(
83
+ [
84
+ ("system", system_message),
85
+ ("human", "Here is the given context {context}, queation: {question} \n\n Formulate an answer."),
86
+ ]
87
+ )
88
+ # LLM
89
+ llm_AI = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
90
+
91
+ # Post-processing
92
+ def format_docs(docs):
93
+ return "\n\n".join(doc.page_content for doc in docs)
94
+
95
+ # Chain
96
+ rag_chain = generate_prompt | llm_AI4 | StrOutputParser() #OpenAIToolsAgentOutputParser()
97
+
98
+ ####-----------------TESTING
99
+ prompt = ChatPromptTemplate.from_messages(
100
+ [
101
+ (
102
+ "system",
103
+ "You are a helpful assistant. Answer all questions to the best of your ability.",
104
+ ),
105
+ MessagesPlaceholder(variable_name="chat_history"),
106
+ ("human", "{question}"),
107
+ ]
108
+ )
109
+ from langchain_core.runnables.history import RunnableWithMessageHistory
110
+ from langchain.memory import ChatMessageHistory
111
+
112
+ chat_history_for_chain = ChatMessageHistory()
113
+
114
+ chain_with_message_history = RunnableWithMessageHistory(
115
+ rag_chain,
116
+ lambda session_id: chat_history_for_chain,
117
+ input_messages_key="question",
118
+ history_messages_key="chat_history",
119
+ )
120
+
121
+ ### Question Re-writer
122
+
123
+ # LLM
124
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
125
+
126
+ # Prompt
127
+ system = """You a question re-writer that converts an input question to a better version that is optimized \n
128
+ for a search. Look at the input and try to reason about the underlying sematic intent / meaning."""
129
+ re_write_prompt = ChatPromptTemplate.from_messages(
130
+ [
131
+ ("system", system),
132
+ ("human", "Here is the initial question: \n\n {question} \n Formulate an improved question."),
133
+ ]
134
+ )
135
+
136
+ question_rewriter = re_write_prompt | llm | StrOutputParser()
137
+
138
+
139
+
140
+ ### Search
141
+
142
+ from langchain_community.tools.tavily_search import TavilySearchResults
143
+ web_search_tool = TavilySearchResults(k=2)
144
+
145
+ from typing_extensions import TypedDict
146
+ from typing import List
147
+ from typing import TypedDict, Annotated, Sequence
148
+ import operator
149
+ from langchain_core.messages import BaseMessage
150
+
151
+ class GraphState(TypedDict):
152
+ """
153
+ Represents the state of our graph.
154
+
155
+ Attributes:
156
+ question: question
157
+ generation: LLM generation
158
+ web_search: whether to add search
159
+ documents: list of documents
160
+ """
161
+ question : str
162
+ generation : str
163
+ web_search : str
164
+ messages: List[str] #Union[dict[str, Any]]
165
+
166
+ from langchain.schema import Document
167
+
168
+
169
+
170
+ def retrieve(state):
171
+ """
172
+ Retrieve documents
173
+
174
+ Args:
175
+ state (dict): The current graph state
176
+
177
+ Returns:
178
+ state (dict): New key added to state, documents, that contains retrieved documents
179
+ """
180
+ print("---VECTOR RETRIEVE---")
181
+ question = state["question"]
182
+ # Retrieval
183
+ documents = ensemble_retriever.get_relevant_documents(question)
184
+ #print(documents)
185
+ # Iterate over each document and update the 'metadata' field with the file name
186
+ for doc in documents:
187
+ try:
188
+ file_path = doc.metadata['source']
189
+ #print(file_path)
190
+ file_name = os.path.split(file_path)[1] # Get the file name from the file path
191
+ doc.metadata['source'] = file_name
192
+ except KeyError:
193
+ # Handle the case where 'source' field is missing in the metadata
194
+ doc.metadata['source'] = 'unavailable'
195
+ except Exception as e:
196
+ # Handle any other exceptions that may occur
197
+ print(f"An error occurred while processing document: {e}")
198
+ return {"messages": documents, "question": question}
199
+
200
+
201
+ def generate(state):
202
+ """
203
+ Generate answer
204
+
205
+ Args:
206
+ state (dict): The current graph state
207
+
208
+ Returns:
209
+ state (dict): New key added to state, generation, that contains LLM generation
210
+ """
211
+ print("---GENERATE---")
212
+ question = state["question"]
213
+ messages = state["messages"]
214
+ print(messages)
215
+ # RAG generation
216
+ generation = chain_with_message_history.invoke({"context": messages, "question": question},{"configurable": {"session_id": "unused"}})
217
+ return {"messages": messages, "question": question, "generation": generation}
218
+
219
+ def grade_documents(state):
220
+ """
221
+ Determines whether the retrieved documents are relevant to the question.
222
+
223
+ Args:
224
+ state (dict): The current graph state
225
+
226
+ Returns:
227
+ state (dict): Updates documents key with only filtered relevant documents
228
+ """
229
+
230
+ print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
231
+ question = state["question"]
232
+ messages = state["messages"]
233
+
234
+ # Score each doc
235
+ filtered_docs = []
236
+ web_search = "No"
237
+ for d in messages:
238
+ score = retrieval_grader.invoke({"question": question, "document": d.page_content})
239
+ grade = score.binary_score
240
+ if grade == "yes":
241
+ print("---GRADE: DOCUMENT RELEVANT---")
242
+ filtered_docs.append(d)
243
+ else:
244
+ print("---GRADE: DOCUMENT NOT RELEVANT---")
245
+ continue
246
+ print("---TOOLS RETRIEVE---")
247
+ tool_documents = tool_chain.invoke(question)
248
+ #print(tool_documents)
249
+ if tool_documents:
250
+ for item in tool_documents:
251
+ filtered_docs.append(Document(page_content=str(item['output']),metadata={"source": 'https://svarupa.org/home',"name":item['name']}))
252
+ # If filtered_docs is empty, perform a web search
253
+ if not filtered_docs:
254
+ print("--PERFORMING WEB SEARCH--")
255
+ web_search = "Yes"
256
+
257
+ return {"messages": filtered_docs, "question": question, "web_search": web_search}
258
+
259
+
260
+
261
+ def transform_query(state):
262
+ """
263
+ Transform the query to produce a better question.
264
+
265
+ Args:
266
+ state (dict): The current graph state
267
+
268
+ Returns:
269
+ state (dict): Updates question key with a re-phrased question
270
+ """
271
+
272
+ print("---TRANSFORM QUERY---")
273
+ question = state["question"]
274
+ messages = state["messages"]
275
+
276
+ # Re-write question
277
+ better_question = question_rewriter.invoke({"question": question})
278
+ return {"messages": messages, "question": better_question}
279
+
280
+ def web_search(state):
281
+ """
282
+ Web search based on the re-phrased question.
283
+
284
+ Args:
285
+ state (dict): The current graph state
286
+
287
+ Returns:
288
+ state (dict): Updates documents key with appended web results
289
+ """
290
+
291
+ print("---WEB SEARCH---")
292
+ question = state["question"]
293
+ messages = state["messages"]
294
+
295
+ # Web search
296
+ docs = web_search_tool.invoke({"query": question})
297
+ #web_results = "\n".join([d["content"] for d in docs])
298
+ web_results = [Document(page_content=d["content"], metadata={"source": d["url"]}) for d in docs]
299
+ print(f"Web Results: {web_results}")
300
+ messages.extend(web_results)
301
+ return {"messages": messages, "question": question}
302
+
303
+ ### Edges
304
+
305
+ def decide_to_generate(state):
306
+ """
307
+ Determines whether to generate an answer, or re-generate a question.
308
+
309
+ Args:
310
+ state (dict): The current graph state
311
+
312
+ Returns:
313
+ str: Binary decision for next node to call
314
+ """
315
+
316
+ print("---ASSESS GRADED DOCUMENTS---")
317
+ question = state["question"]
318
+ web_search = state["web_search"]
319
+ filtered_documents = state["messages"]
320
+
321
+ if web_search == "Yes":
322
+ # All documents have been filtered check_relevance
323
+ # We will re-generate a new query
324
+ print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---")
325
+ return "transform_query"
326
+ else:
327
+ # We have relevant documents, so generate answer
328
+ print("---DECISION: GENERATE---")
329
+ return "generate"
330
+ from langgraph.graph import END, StateGraph
331
+
332
+ workflow = StateGraph(GraphState)
333
+
334
+ # Define the nodes
335
+ workflow.add_node("retrieve", retrieve) # retrieve
336
+ workflow.add_node("grade_documents", grade_documents) # grade documents
337
+ workflow.add_node("generate", generate) # generatae
338
+ workflow.add_node("transform_query", transform_query) # transform_query
339
+ workflow.add_node("web_search_node", web_search) # web search
340
+
341
+ # Build graph
342
+ workflow.set_entry_point("retrieve")
343
+ workflow.add_edge("retrieve", "grade_documents")
344
+ workflow.add_conditional_edges(
345
+ "grade_documents",
346
+ decide_to_generate,
347
+ {
348
+ "transform_query": "transform_query",
349
+ "generate": "generate",
350
+ },
351
+ )
352
+ workflow.add_edge("transform_query", "web_search_node")
353
+ workflow.add_edge("web_search_node", "generate")
354
+ workflow.add_edge("generate", END)
355
+
356
+ # Compile
357
+ crag_app = workflow.compile()
crag_app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import uuid
4
+ from streamlit_feedback import streamlit_feedback
5
+ import streamlit as st
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.messages import HumanMessage
8
+ from langchain.memory import ChatMessageHistory
9
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
11
+ from langchain_core.prompts import HumanMessagePromptTemplate
12
+ import tiktoken
13
+ #from agent import app
14
+ from crag import crag_app
15
+ from datetime import timedelta
16
+ from sqlalchemy import create_engine
17
+ from cache import (write_to_db,
18
+ current_time)
19
+
20
+
21
+ #load postgres engine
22
+ engine = create_engine("postgresql://postgres:sampath@localhost:5432/postgres")
23
+ #load keys
24
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
25
+ chat_history = ChatMessageHistory()
26
+
27
+ system_message = '''You are an AI assistant for answering questions about vedas and scriptures.
28
+ \nYou are given the following extracted documents from Svarupa Knowledge Base (https://svarupa.org/) and other documents and a question.
29
+ Provide a conversational answer. If there are any unicode characters in the final answer, please encode and provide readable answer to the user.
30
+ \nIf you are not provided with any documents, say \"I did not get any relevant context for this but
31
+ I will reply to the best of my knowledge\" and then write your answer\nIf you don't know the answer, just say \"Hmm, I'm not sure. \" Don't try to make up an answer.
32
+ \nIf the question is not about vedas and scriptures, politely inform them that you are tuned to only answer questions about that.\n\n'''
33
+ generate_prompt = ChatPromptTemplate.from_messages(
34
+ [
35
+ ("system", system_message),
36
+ ("human", "Here is the given context {context}, queation: {question} \n\n Formulate an answer."),
37
+ ]
38
+ )
39
+ #@st.cache_resource(show_spinner=False) # Set allow_output_mutation to True for mutable objects like instances
40
+ def bot_response(user_input):
41
+ response = crag_app.invoke({"question": user_input})
42
+ return response
43
+
44
+
45
+ ##======
46
+ # Main chatbot function
47
+ def veda_bot(sidebar: bool = True) -> None:
48
+ # Define custom CSS
49
+ custom_css = """
50
+ <style>
51
+ /* Adjust the selector as needed */
52
+ .stHeadingContainer {
53
+ margin-top: -100px; /* Reduce the top margin */
54
+ }
55
+ #MainMenu {visibility: hidden;}
56
+ footer {visibility: hidden;}
57
+ header {visibility: hidden;}
58
+ </style>
59
+ """
60
+
61
+ # Apply the custom CSS
62
+ st.markdown(custom_css, unsafe_allow_html=True)
63
+
64
+ # Streamlit Components Initialization
65
+ st.title("Veda Bot")
66
+ st.write("This bot is developed based on the content from the [Svarupa](https://svarupa.org/home) website.")
67
+ chat_history.add_message(SystemMessage(content="Welcome! I am your Veda Bot. How can I assist you today?"))
68
+ # Initialize session state variables
69
+ if "messages" not in st.session_state.keys():
70
+ st.session_state.messages = [{"role": "assistant", "content": "Hi. I am an AI Assistant. Ask me a question about Vedas!"}]
71
+
72
+ if "session_uuid" not in st.session_state:
73
+ st.session_state["session_uuid"] = f"{current_time()}-{str(uuid.uuid4())}"
74
+
75
+ if "feedback" not in st.session_state:
76
+ st.session_state["feedback"] = None
77
+
78
+ if "chat_engine" not in st.session_state.keys():
79
+ st.session_state.chat_engine = bot_response
80
+
81
+ if "memory" not in st.session_state:
82
+ st.session_state["memory"] = ChatMessageHistory()
83
+ st.session_state["memory"].add_message(generate_prompt)
84
+ st.session_state["memory"].add_message({"role":"user","content":"Hi/Hello or Any Greating"})
85
+ st.session_state["memory"].add_message({"role":"assistant","content":"Hi. Please ask the question about vedas!"})
86
+ # Display chat history
87
+ for message in st.session_state.messages:
88
+ with st.chat_message(message["role"]):
89
+ st.markdown(message["content"])
90
+
91
+ # Get user input
92
+ prompt = st.chat_input("Enter your question!")
93
+
94
+ if prompt:
95
+ # Display user message in chat message container
96
+ with st.chat_message("user"):
97
+ st.markdown(prompt)
98
+
99
+ # Log user message
100
+ st.session_state["messages"].append({"role": "user", "content": prompt})
101
+ st.session_state["memory"].add_message({"role": "user", "content": prompt})
102
+
103
+ # Generate bot response
104
+ if st.session_state.messages[-1]["role"] != "assistant":
105
+ with st.spinner("Thinking..."):
106
+ references = []
107
+ message_placeholder = st.empty()
108
+ full_response = ""
109
+
110
+ # Get bot response
111
+ response_bot = st.session_state.chat_engine(prompt)
112
+ generation = response_bot['generation']
113
+ full_response += generation
114
+ web_search = response_bot['web_search']
115
+
116
+ # Extract references from bot response
117
+ if response_bot['messages']:
118
+ try:
119
+ references.extend([doc.metadata['source'] for doc in response_bot['messages']])
120
+ except Exception as e:
121
+ print("Error:", e)
122
+ #message_placeholder.markdown(full_response + "▌")
123
+
124
+ # Add references to the full response
125
+ if references:
126
+ unique_references = set(references)
127
+ full_response += "\n\n**References:**\n\n"
128
+ for reference in unique_references:
129
+ full_response += f"- {reference}\n"
130
+
131
+ #message_placeholder.markdown(full_response + "▌")
132
+ # Submit Feedback
133
+ streamlit_feedback(
134
+ feedback_type="faces",
135
+ on_submit=None,
136
+ optional_text_label="[Optional] Please provide an explanation",
137
+ key="feedback",
138
+ )
139
+ message_placeholder.markdown(full_response)
140
+ st.session_state["messages"].append({"role": "assistant", "content": generation})
141
+ st.session_state["memory"].add_message({"role": "assistant", "content": generation})
142
+ print(f"Response added to memory: {full_response}")
143
+
144
+ # Log feedback and messages
145
+ if st.session_state['feedback']:
146
+ user_feedback ={
147
+ "user_message": st.session_state["messages"][-2],
148
+ "assistant_message": st.session_state["messages"][-1],
149
+ "feedback_score": st.session_state["feedback"]["score"],
150
+ "feedback_text": st.session_state["feedback"]["text"],
151
+ }
152
+ #write_to_db(u_message=user_feedback["user_message"],
153
+ # a_message=user_feedback["assistant_message"],
154
+ # f_score=user_feedback["feedback_score"],
155
+ # f_text=user_feedback["feedback_text"])
156
+
157
+ if __name__ == "__main__":
158
+ veda_bot()
database.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymysql
2
+ import streamlit as st
3
+ import logging
4
+ import json
5
+ import pandas as pd
6
+ import re
7
+ import os
8
+ from langchain_community.utilities.sql_database import SQLDatabase
9
+
10
+
11
+ db_params = {"host": st.secrets("DB_HOST"),
12
+ "user": os.getenv("DB_USER"),
13
+ "password": os.getenv("DB_PASSWORD"),
14
+ "port": 3306,
15
+ "database":os.getenv("DB")
16
+ }
17
+
18
+
19
+
20
+ def initialize_database():
21
+ try:
22
+ # Database Connection
23
+ db = pymysql.connect(**db_params)
24
+ st.success("Database connection successful!")
25
+ return db
26
+ except Exception as e:
27
+ st.error(f"Database connection failed: {e}")
28
+ return None
29
+
30
+
31
+ def get_db():
32
+ try:
33
+ db = SQLDatabase.from_uri(
34
+ f"mysql+pymysql://{db_params['user']}:{db_params['password']}@{db_params['host']}/{db_params['database']}",
35
+ include_tables=['term_details_modified', 'veda_content_details', 'veda_content_modified']
36
+ )
37
+ #st.success("Database connection successful!")
38
+ return db
39
+ except Exception as e:
40
+ st.error(f"Database connection failed: {e}")
41
+ return None
42
+
43
+
44
+ def execute_query(query):
45
+ db = initialize_database()
46
+ cursor = db.cursor()
47
+ try:
48
+ cursor.execute(query)
49
+ description = cursor.description
50
+ result = cursor.fetchall() # Fetch all rows from the result set
51
+ db.commit()
52
+ return description, result
53
+ except Exception as e:
54
+ print("Error executing query:", e)
55
+ db.rollback()
56
+ return None # Return None if an error occurs
57
+ finally:
58
+ db.close()
59
+
60
+
61
+ def execute_sql_query(query, parameters=None):
62
+ # Establish database connection and execute SQL query
63
+ db = initialize_database()
64
+ cursor = db.cursor(pymysql.cursors.DictCursor) # Use dictionary cursor to retrieve data as dictionaries
65
+ try:
66
+ if parameters:
67
+ cursor.execute(query, parameters)
68
+ else:
69
+ cursor.execute(query)
70
+ results = cursor.fetchall()
71
+ return results
72
+ except Exception as e:
73
+ logging.error(f"Error executing SQL query: {e}")
74
+ return None
75
+ finally:
76
+ db.close()
77
+
78
+
79
+ def get_details_mantra_json(query):
80
+ description, data = execute_query(query)
81
+ df = pd.DataFrame(data)
82
+ df.columns = [x[0] for x in description]
83
+ mantra_json = df['mantra_json'].values[0]
84
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
85
+ return json.loads(cleaned_data)
86
+
87
+
function_tools.py ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ import json
3
+ import logging
4
+ from utils import iast_process, get_list_meaning_word, get_details_mantra_json, word_sentence_similarity, extract_meaning_by_language
5
+ import ast
6
+ from langchain_core.tools import tool
7
+ from database import execute_sql_query, get_details_mantra_json
8
+ from langchain.pydantic_v1 import BaseModel, Field
9
+ from langchain.tools import StructuredTool
10
+ from typing import Optional
11
+ import streamlit as st
12
+ from langchain_core.utils.function_calling import convert_to_openai_function
13
+ from langchain_core.messages import AIMessage
14
+ from langchain_core.runnables import Runnable
15
+ import os
16
+
17
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
18
+ from langchain_openai import ChatOpenAI
19
+
20
+ #LLM
21
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
22
+ llm_AI4 = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
23
+
24
+
25
+
26
+ #Classes
27
+ class MantraInput(BaseModel):
28
+ mantraid: Optional[str] = Field(None, description="The mantra id. For example, 1.1.1.1, 2.1.1,3.1.1.2,4.2.3.1, and 5.0.1.1.2")
29
+ scripture_name: Optional[str] = Field(None, description="Name of the scripture like RigVeda, SamaVeda, AtharvaVeda, KrishnaYajurVeda, and ShuklaYajurVeda")
30
+ KandahNumber: Optional[int] = Field(None, description="Kandah Number of Vedamantra")
31
+ MandalaNumber: Optional[int] = Field(None, description="Mandala Number of Vedamantra")
32
+ ArchikahNumber: Optional[int] = Field(None, description="Archikah Number of Vedamantra")
33
+ ShuktaNumber: Optional[int] = Field(None, description="Shukta Number of Vedamantra")
34
+ PrapatakNumber: Optional[int] = Field(None, description="Prapatak Number of Vedamantra")
35
+ MantraNumber: Optional[int] = Field(None, description="Mantra Number of Vedamantra")
36
+ AnuvakNumber: Optional[int] = Field(None, description="Anuvak Number of Vedamantra")
37
+ AdhyayaNumber: Optional[int] = Field(None, description="Adhyaya Number of Vedamantra")
38
+
39
+ class PadaMeaningInput(BaseModel):
40
+ pada: str = Field(description="The pada or word that is being meaning checked")
41
+
42
+ class PadaAAAInput(BaseModel):
43
+ pada: str = Field(description="The pada or word that is being meaning checked")
44
+ mantraid: Optional[str] = Field(None, description="The mantra id. For example, 1.1.1.1, 2.1.1,3.1.1.2,4.2.3.1, and 5.0.1.1.2")
45
+ scripture_name: Optional[str] = Field(None, description="Name of the scripture like RigVeda, SamaVeda, AtharvaVeda, KrishnaYajurVeda, and ShuklaYajurVeda")
46
+ KandahNumber: Optional[int] = Field(None, description="Kandah Number of Vedamantra")
47
+ MandalaNumber: Optional[int] = Field(None, description="Mandala Number of Vedamantra")
48
+ ArchikahNumber: Optional[int] = Field(None, description="Archikah Number of Vedamantra")
49
+ ShuktaNumber: Optional[int] = Field(None, description="Shukta Number of Vedamantra")
50
+ PrapatakNumber: Optional[int] = Field(None, description="Prapatak Number of Vedamantra")
51
+ MantraNumber: Optional[int] = Field(None, description="Mantra Number of Vedamantra")
52
+ AnuvakNumber: Optional[int] = Field(None, description="Anuvak Number of Vedamantra")
53
+ AdhyayaNumber: Optional[int] = Field(None, description="Adhyaya Number of Vedamantra")
54
+
55
+ class NLSQLResponse(BaseModel):
56
+ user_query: str = Field(description="user query")
57
+
58
+ class VectorResponse(BaseModel):
59
+ query: str = Field(description="User query")
60
+
61
+ class Response(BaseModel):
62
+ result: str = Field(description="The result based on the context. Provide the text in a readable format if there are unicode characters. Use only available context. If there is no context, return as 'unknown'. Do not use prior knowledge.")
63
+ explanation: str = Field(description="Explanation of the steps taken to get the result")
64
+
65
+ #function tools for mantra level
66
+ def _get_mantra_details(query):
67
+ try:
68
+ details = get_details_mantra_json(query)
69
+ return details['mantraHeader']['language'][1]
70
+ except Exception as e:
71
+ raise ValueError(f"Failed to get mantra details: {e}")
72
+
73
+
74
+ def _get_mantra_details_by_scripture(scripture_name=None, KandahNumber=None, MandalaNumber=None, ArchikahNumber=None,
75
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None, AnuvakNumber=None,
76
+ AdhyayaNumber=None):
77
+ try:
78
+ # Construct the base SQL query
79
+ query = "SELECT * FROM veda_content_details WHERE 1 = 1"
80
+ parameters = []
81
+
82
+ # Add conditions based on provided parameters
83
+ if scripture_name:
84
+ query += " AND scripture_name = %s"
85
+ parameters.append(scripture_name.lower())
86
+ if KandahNumber:
87
+ query += " AND KandahNumber = %s"
88
+ parameters.append(KandahNumber)
89
+ if MandalaNumber:
90
+ query += " AND MandalaNumber = %s"
91
+ parameters.append(MandalaNumber)
92
+ if ArchikahNumber:
93
+ query += " AND ArchikahNumber = %s"
94
+ parameters.append(ArchikahNumber)
95
+ if ShuktaNumber:
96
+ query += " AND ShuktaNumber = %s"
97
+ parameters.append(ShuktaNumber)
98
+ if PrapatakNumber:
99
+ query += " AND PrapatakNumber = %s"
100
+ parameters.append(PrapatakNumber)
101
+ if MantraNumber:
102
+ query += " AND MantraNumber = %s"
103
+ parameters.append(MantraNumber)
104
+ if AnuvakNumber:
105
+ query += " AND AnuvakNumber = %s"
106
+ parameters.append(AnuvakNumber)
107
+ if AdhyayaNumber:
108
+ query += " AND AdhyayaNumber = %s"
109
+ parameters.append(AdhyayaNumber)
110
+
111
+ # Execute the SQL query
112
+ results = execute_sql_query(query, parameters)
113
+
114
+ if results:
115
+ return results
116
+ else:
117
+ return None
118
+ except Exception as e:
119
+ logging.error(f"Error in _get_mantra_details_by_scripture: {e}")
120
+
121
+
122
+
123
+ def get_vedamantra_details(mantraid=None, scripture_name=None, KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
124
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
125
+ AnuvakNumber=None, AdhyayaNumber=None):
126
+ '''
127
+ This function is used to get the vedamantra such as vedamantra, padapatha, devata, chandah,
128
+ and rishi, from all Vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda).
129
+ The Vedic scriptures has the following structure: \
130
+ RigVeda->Mandala->Shukta->Mantra\
131
+ SamaVeda->Archikah->Shukta->Mantra\
132
+ AtharvaVeda->Kandah->Shukta->Mantra\
133
+ ShuklaYajurVeda->Adhyaya->Mantra\
134
+ KrishnaYajurVeda->Kandah->Prapatak->Anuvak->Mantra\
135
+ Sample Questions:
136
+ 1. Obtain the vedamantra of the mantra whose id is 1.1.1.1?
137
+ 2. Retrieve the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra.
138
+ 3. Provide the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram.
139
+ 4. Explain the adhibautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
140
+ 5. Identify the mantraVishaya of the vedamantra from RigVeda, first mandala, first shukta, and first mantra.
141
+ 6. What is the adibhautic meaning of the mantra 1.1.1.9?
142
+ 7. What is the adhyatmic meaning of the mantra 1.1.1.7?
143
+ 8. What is the adhidyvic meaning of the 6th mantra from RigVeda, first mandala, and first shukta?
144
+ '''
145
+ try:
146
+ query = "" # Initialize query outside of the if-else block
147
+ if mantraid:
148
+ query = f'''SELECT mantra_json FROM veda_content WHERE mantra_number = "{mantraid}"
149
+ '''
150
+ else:
151
+ filter_df = _get_mantra_details_by_scripture(scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
152
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
153
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
154
+ if filter_df is not None:
155
+ mantra_id = filter_df[0]['mantra_id']
156
+ query = f'''SELECT mantra_json FROM veda_content WHERE mantra_number = "{mantra_id}"
157
+ '''
158
+ return _get_mantra_details(query)
159
+ except Exception as e:
160
+ return json.dumps({"error": str(e)})
161
+
162
+ def get_vedamantra_summary(mantraid=None, scripture_name=None, KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
163
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
164
+ AnuvakNumber=None, AdhyayaNumber=None):
165
+ '''
166
+ Use the function `get_vedamantra_summary` to access the information such as adibhautic meaning of the mantra, anvaya of the mantra, mantraVishaya of the mantra,
167
+ adhibautic (or adhyatmic or adhidyvic) meaning (or bhavarth) of the mantra, purpose of the mantra, usage of the mantra, and tippani of the mantra.
168
+ Sample Query:
169
+ 1. Obtain the anvaya of the mantra whose id (mantraid) is 1.1.1.1?
170
+ 2. Retrieve tha adibhautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
171
+ '''
172
+ try:
173
+ if mantraid:
174
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
175
+ else:
176
+ filtered_df = _get_mantra_details_by_scripture(scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
177
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
178
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
179
+ if filtered_df is not None:
180
+ mantra_id = filtered_df[0]['mantra_id']
181
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
182
+ else:
183
+ return None
184
+ json_dict = get_details_mantra_json(query)
185
+ mantra_summary = json_dict['mantraSummary']['language']
186
+ summary_dict = {"Roman-IAST summary of vedamantra": json_dict['mantraSummary']['language'][1]}
187
+ for item in mantra_summary:
188
+ if item['languageName'] == 'English':
189
+ mahatma = item['mahatma']['mahatmaName']
190
+ summary_dict[f"English summary of vedamantra by {mahatma}"] = item
191
+ return summary_dict
192
+ except Exception as e:
193
+ return {"error": str(e)}
194
+
195
+
196
+
197
+ def get_pada_meaning(pada):
198
+ '''
199
+ Purpose: For given sanskrit word, you have collection of meanings for available roots and stems of it.\
200
+ You need to process this information as context and provide possible meanings for given word.
201
+ Sample query:
202
+ 1. What is the meaning of the word apratidhṛṣṭa-śavasam?
203
+ '''
204
+ #pada=iast_process(pada)
205
+ try:
206
+ query = f'''
207
+ SELECT * FROM term_details_modified WHERE Pada = "{pada}"
208
+ '''
209
+ # Execute the query to get details from the database
210
+ details = execute_sql_query(query)
211
+ #print(details)
212
+ pada_details = details[0]
213
+ #print(pada_details['Morphology'])
214
+ meanings_list = []
215
+ for morphs in ast.literal_eval(pada_details['Morphology']):
216
+ for field in ['stem', 'root']:
217
+ word = morphs.get(field)
218
+ if word:
219
+ meanings_list.append(get_list_meaning_word(word))
220
+ return meanings_list
221
+ except Exception as e:
222
+ logging.error(f"Error in get_pada_meaning: {e}")
223
+ return {"error": f"Required meaning associated with pada is not available. {e}"}
224
+
225
+
226
+
227
+ def _get_pada_details_by_scripture(pada, scripture_name=None, KandahNumber=None, MandalaNumber=None, ArchikahNumber=None,
228
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None, AnuvakNumber=None,
229
+ AdhyayaNumber=None):
230
+ try:
231
+ # Construct the base SQL query
232
+ query = "SELECT * FROM term_details_modified WHERE Pada = %s"
233
+ parameters = [pada]
234
+
235
+ # Add conditions based on provided parameters
236
+ if scripture_name:
237
+ query += " AND scripture_name = %s"
238
+ parameters.append(scripture_name)
239
+ if KandahNumber:
240
+ query += " AND KandahNumber = %s"
241
+ parameters.append(KandahNumber)
242
+ if MandalaNumber:
243
+ query += " AND MandalaNumber = %s"
244
+ parameters.append(MandalaNumber)
245
+ if ArchikahNumber:
246
+ query += " AND ArchikahNumber = %s"
247
+ parameters.append(ArchikahNumber)
248
+ if ShuktaNumber:
249
+ query += " AND ShuktaNumber = %s"
250
+ parameters.append(ShuktaNumber)
251
+ if PrapatakNumber:
252
+ query += " AND PrapatakNumber = %s"
253
+ parameters.append(PrapatakNumber)
254
+ if MantraNumber:
255
+ query += " AND MantraNumber = %s"
256
+ parameters.append(MantraNumber)
257
+ if AnuvakNumber:
258
+ query += " AND AnuvakNumber = %s"
259
+ parameters.append(AnuvakNumber)
260
+ if AdhyayaNumber:
261
+ query += " AND AdhyayaNumber = %s"
262
+ parameters.append(AdhyayaNumber)
263
+
264
+ # Execute the SQL query
265
+ results = execute_sql_query(query, parameters)
266
+
267
+ if results:
268
+ return results
269
+ else:
270
+ return None
271
+
272
+ except Exception as e:
273
+ logging.error(f"Error in _get_pada_details_by_scripture: {e}")
274
+ return None
275
+
276
+ def _get_vedamantra_meaning(mantraID, MahatmaName=None):
277
+ try:
278
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraID}'"
279
+ jsonDict = get_details_mantra_json(query)
280
+ mantraSummary = jsonDict['mantraSummary']['language']
281
+ if MahatmaName is not None:
282
+ filtered_summary = [data_dict for data_dict in mantraSummary if data_dict.get('mahatma', {}).get('mahatmaName') == MahatmaName]
283
+ if filtered_summary:
284
+ mantraSummary = filtered_summary
285
+ best_meaning = None
286
+ best_count = 0
287
+ for data_dict in mantraSummary:
288
+ if data_dict.get('languageName') == "English":
289
+ meanings = data_dict['mahatma']['bhavartha']
290
+ count = sum(bool(meanings.get(cat, None)) for cat in ['adibhautic', 'adidaivic', 'adhyatmic'])
291
+ if count >= best_count:
292
+ best_meaning = {cat: meanings.get(cat, None) for cat in ['adibhautic', 'adidaivic', 'adhyatmic']}
293
+ best_count = count
294
+ return best_meaning if best_meaning else json.dumps({"error": "Required meaning associated with vedamantra is not available."})
295
+ except Exception as e:
296
+ logging.error(f"Error in _get_vedamantra_meaning: {e}")
297
+ return json.dumps({"error": f"An error occurred: {e}"})
298
+
299
+ def _get_pada_morphology(term_details, meanings):
300
+ try:
301
+ morphology_list = ast.literal_eval(term_details['Morphology'])
302
+ term_morph_list = []
303
+ for morphs in morphology_list:
304
+ term_info = {}
305
+ for field in ['stem', 'root']:
306
+ morph_word = morphs.get(field)
307
+ if morph_word:
308
+ meaning = word_sentence_similarity(meanings, morph_word)
309
+ term_info[f'{field}_word'] = morph_word
310
+ term_info[f'{field}_meaning'] = meaning[0][0] if meaning else None
311
+ term_info[f'{field}_score'] = meaning[0][1] if meaning else None
312
+ term_info['grammar'] = morphs['grammar']
313
+ term_morph_list.append(term_info)
314
+ return term_morph_list
315
+ except Exception as e:
316
+ logging.error(f"Error in _get_pada_morphology: {e}")
317
+ return []
318
+
319
+ def get_morphological_info_of_pada(pada, mantraid=None, scripture_name=None, KandahNumber=None, MandalaNumber=None,
320
+ ArchikahNumber=None, ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
321
+ AnuvakNumber=None, AdhyayaNumber=None):
322
+ '''
323
+ This help to get segmentation and morphological information about the word.
324
+ '''
325
+ try:
326
+ if pada:
327
+ query = f'''SELECT * FROM term_details_modified WHERE Pada = "{pada}"
328
+ '''
329
+ details = execute_sql_query(query)
330
+ else:
331
+ # Placeholder for _get_pada_details_by_scripture function call
332
+ # Replace with your actual implementation
333
+ details = _get_pada_details_by_scripture(pada, scripture_name=scripture_name, KandahNumber=KandahNumber,
334
+ MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
335
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber,
336
+ MantraNumber=MantraNumber, AnuvakNumber=AnuvakNumber,
337
+ AdhyayaNumber=AdhyayaNumber)
338
+
339
+ if details:
340
+ if mantraid is not None:
341
+ for record in details:
342
+ if record["mantra_id"] == mantraid:
343
+ segmentation = record["Segmentation"]
344
+ morphological_info = record["Morphology"]
345
+ return {"morphology_info": {"segmentation": segmentation, "morphology": morphological_info}}
346
+ return {"error": f"No details found for mantraid '{mantraid}'"}
347
+ else:
348
+ pada_details = details[0]
349
+ segmentation = pada_details["Segmentation"]
350
+ morphological_info = pada_details["Morphology"]
351
+ return {"morphology_info": {"segmentation": segmentation, "morphology": morphological_info}}
352
+ else:
353
+ return {"error": "No details found for pada."}
354
+
355
+ except Exception as e:
356
+ logging.error(f"Error in get_morphological_info_of_pada: {e}")
357
+ return {"error": f"Failed to get meaning of the word {pada}. {e}"}
358
+
359
+
360
+ def get_adibauatic_adidaivic_adhyatmic_meaning_of_pada(pada, mantraid=None, scripture_name=None,
361
+ KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
362
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
363
+ AnuvakNumber=None, AdhyayaNumber=None,MahatmaName=None):
364
+ '''
365
+ Sample query:
366
+ 1. What is the adibhautic meaning of pada 'agnim' from RigVeda, first mandala, first shukta and first mantra?
367
+ 2. What is the adhyatmic meaning of the pada agnim in the context of the mantra whose id is '1.1.1.1?'
368
+ '''
369
+ try:
370
+ if mantraid:
371
+ query = f'''
372
+ SELECT * FROM term_details_modified WHERE mantra_id = '{mantraid}' AND Pada = "{pada}"
373
+ '''
374
+ # Execute the query to get details from the database
375
+ details = execute_sql_query(query)
376
+ else:
377
+ # Call the function to get details by scripture
378
+ details = _get_pada_details_by_scripture(pada, scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
379
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
380
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
381
+
382
+ if details:
383
+ pada_details = details[0] # Assuming details is a list of dictionaries, select the first item
384
+ mantraID = pada_details['mantra_id']
385
+ meanings = _get_vedamantra_meaning(mantraID,MahatmaName=MahatmaName)
386
+ if 'error' in meanings:
387
+ return json.dumps(meanings)
388
+ ab_term_morph_list = _get_pada_morphology(pada_details, meanings['adibhautic'])
389
+ ad_term_morph_list = _get_pada_morphology(pada_details, meanings['adidaivic'])
390
+ at_term_morph_list = _get_pada_morphology(pada_details, meanings['adhyatmic'])
391
+ return json.dumps({
392
+ f'adibhautic_info_{pada}': ab_term_morph_list,
393
+ 'vedamantra_adibhautic_meaning': meanings['adibhautic'],
394
+ f'adidavic_info_{pada}': ad_term_morph_list,
395
+ 'vedamantra_adidavic_meaning': meanings['adidaivic'],
396
+ f'adhyatmic_info_{pada}': at_term_morph_list,
397
+ 'vedamantra_adhyatmic_meaning': meanings['adhyatmic']
398
+ })
399
+ else:
400
+ return json.dumps({"error": f"No details found for pada '{pada}'"})
401
+ except Exception as e:
402
+ logging.error(f"Error in get_adibauatic_adidaivic_adhyatmic_meaning_of_pada: {e}")
403
+ return json.dumps({"error": f"Failed to get meaning of the word {pada}. {e}"})
404
+
405
+ # sql agent
406
+ from langchain_community.utilities.sql_database import SQLDatabase
407
+ from database import get_db
408
+ from langchain_community.agent_toolkits import create_sql_agent
409
+ from langchain_openai import ChatOpenAI
410
+ from langchain.pydantic_v1 import BaseModel, Field
411
+ from langchain.tools import StructuredTool
412
+ from typing import Optional
413
+ import json
414
+
415
+
416
+ class NLSQLResponse(BaseModel):
417
+ user_query:str = Field(description="user query")
418
+
419
+ db = get_db()
420
+ agent_executor = create_sql_agent(llm_AI4, db=db, agent_type="openai-tools", verbose=False)
421
+
422
+ def get_response(user_query):
423
+ response = agent_executor.invoke(user_query)
424
+ return response
425
+
426
+ sql_tool = StructuredTool.from_function(
427
+ func = get_response,
428
+ name = "nl_sql_query",
429
+ description="""To obtains a information using natural language query to sql query and then exceting sql query to get natural response.
430
+ Sample Query:
431
+ 1. How many mantras are there in RigVeda?
432
+ 2. What is the segmentation of the word 'prathasva' from KrishnaYajurVeda?""",
433
+ args_schema=NLSQLResponse,
434
+ return_direct=True,
435
+ )
436
+
437
+ pada_morphological_tool = StructuredTool.from_function(
438
+ func=get_morphological_info_of_pada,
439
+ name="pada_morphology",
440
+ description="""Purpose: To obtain morphological information such as segmentation, morphology, and grammar of a word.\
441
+ Sample query:
442
+ 1. What is the segmentation and morphology of the word 'apratidhṛṣṭa-śavasam' from RigVeda?
443
+ 2. What is the grammar of the word 'prathasva' from KrishnaYajurVeda?
444
+ """,
445
+ args_schema=PadaAAAInput,
446
+ return_direct=False
447
+ )
448
+
449
+ pada_meaning_tool = StructuredTool.from_function(
450
+ func=get_pada_meaning,
451
+ name="pada_meaning",
452
+ description="""Purpose: For given sanskrit word, you have collection of meanings for available roots and stems of it.\
453
+ You need to process this information as context and provide possible meanings for given word.
454
+ Sample query:
455
+ 1. What is the meaning of the word apratidhṛṣṭa-śavasam?
456
+ """,
457
+ args_schema=PadaMeaningInput,
458
+ return_direct=False
459
+ )
460
+
461
+ pada_word_sense_tool = StructuredTool.from_function(
462
+ func=get_adibauatic_adidaivic_adhyatmic_meaning_of_pada,
463
+ name="pada_AAA_meaning",
464
+ description="""To obtain a complete or meaningful adibauatic/adhidaivic/adhyatmic meaning of a word or pada based on context information.\n
465
+ Sample query:
466
+ 1. What is the adibhautic meaning of pada 'agnim' from RigVeda, first mandala, first shukta and first mantra?
467
+ 2. What is the adhyatmic meaning of the pada agnim in the context of the mantra whose id is '1.1.1.1'?
468
+ """,
469
+ args_schema=PadaAAAInput,
470
+ return_direct=False
471
+ )
472
+
473
+ vedamantra_tool = StructuredTool.from_function(
474
+ func=get_vedamantra_details,
475
+ name="vedamantra_details",
476
+ description='''This function is used to get the vedamantra such as vedamantra, padapatha, devata, chandah,
477
+ and rishi, from all Vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda).
478
+ Sample Questions:
479
+ 1. Obtain the vedamantra of the mantra whose id is 1.1.1.1?
480
+ 2. Retrieve the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra.
481
+ 3. Provide the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram.
482
+ 4. Explain the adhibautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
483
+ ''',
484
+ args_schema=MantraInput,
485
+ return_direct=False
486
+ )
487
+ vedamantra_summary_tool =StructuredTool.from_function(
488
+ func=get_vedamantra_summary,
489
+ name="vedamantra_summary",
490
+ description="""Use the function `get_vedamantra_summary` to access the information such as adibhautic meaning of the mantra, anvaya of the mantra, mantraVishaya of the mantra,
491
+ adhibautic (or adhyatmic or adhidyvic) meaning (or bhavarth) of the mantra, purpose of the mantra, usage of the mantra, and tippani of the mantra.
492
+ Sample Query:
493
+ 1. Obtain the anvaya of the mantra whose id (mantraid) is 1.1.1.1?
494
+ 2. Retrieve tha adibhautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
495
+ 3. Provide the adhyatmic meaning of the mantra 1.1.1.9?
496
+ 4. What is the tippani of the mantra 1.1.1.7?
497
+ 5. What is the adhyatmic meaning of the mantra 1.1.1.7?
498
+ 6. What is the mantravishaya of the 6th mantra from RigVeda, first mandala, and first shukta?""",
499
+ args_schema=MantraInput,
500
+ return_direct=False
501
+ )
502
+
503
+ ## vector tool
504
+ import os
505
+ import time
506
+ import pickle
507
+ import streamlit as st
508
+ from dotenv import load_dotenv
509
+ from pinecone import Pinecone, ServerlessSpec
510
+ from utils import load_pickle, initialize_embedding_model
511
+ from langchain_community.retrievers import BM25Retriever
512
+ from langchain_pinecone import PineconeVectorStore
513
+ from langchain.retrievers import EnsembleRetriever
514
+ from langchain.tools.retriever import create_retriever_tool
515
+
516
+
517
+
518
+ # Load .env file
519
+ load_dotenv()
520
+
521
+ # Constants
522
+ INDEX_NAME = "veda-index-v2"
523
+ MODEL_NAME = "BAAI/bge-large-en-v1.5"
524
+ DOCS_DIRECTORY = r"Docs\ramana_docs_ids.pkl"
525
+ CURRENT_DIRECTORY = os.getcwd()
526
+
527
+
528
+ # Initialize Pinecone client
529
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY_SAM")
530
+ pc = Pinecone(api_key=PINECONE_API_KEY)
531
+
532
+ #@st.cache_resource
533
+ def create_or_load_index():
534
+ # Check if index already exists
535
+ if INDEX_NAME not in pc.list_indexes().names():
536
+ # Create index if it does not exist
537
+ pc.create_index(
538
+ INDEX_NAME,
539
+ dimension=1024,
540
+ metric='dotproduct',
541
+ spec=ServerlessSpec(
542
+ cloud="aws",
543
+ region="us-east-1"
544
+ )
545
+ )
546
+ # Wait for index to be initialized
547
+ while not pc.describe_index(INDEX_NAME).status['ready']:
548
+ time.sleep(1)
549
+ # Connect to index
550
+ return pc.Index(INDEX_NAME)
551
+
552
+ # Load documents
553
+ docs = load_pickle(DOCS_DIRECTORY)
554
+ # Initialize embedding model
555
+ embedding = initialize_embedding_model(MODEL_NAME)
556
+ # Create or load index
557
+ index = create_or_load_index()
558
+
559
+ # Initialize BM25 retriever
560
+ bm25_retriever = BM25Retriever.from_texts(
561
+ [text['document'].page_content for text in docs],
562
+ metadatas=[text['document'].metadata for text in docs]
563
+ )
564
+ bm25_retriever.k = 2
565
+
566
+ # Switch back to normal index for LangChain
567
+ vector_store = PineconeVectorStore(index, embedding)
568
+ retriever = vector_store.as_retriever(search_type="mmr")
569
+
570
+ # Initialize the ensemble retriever
571
+ ensemble_retriever = EnsembleRetriever(
572
+ retrievers=[bm25_retriever, retriever], weights=[0.2, 0.8]
573
+ )
574
+
575
+ class VectorResponse(BaseModel):
576
+ query:str = Field(description="user query")
577
+
578
+ def vector_retrieve(query):
579
+ response = retriever.get_relevant_documents(query)
580
+ return response
581
+
582
+ vector_tool = StructuredTool.from_function(
583
+ func = vector_retrieve,
584
+ name = "vector_retrieve",
585
+ description="Search and return documents related user query from the vector index.",
586
+ args_schema=VectorResponse,
587
+ return_direct=False
588
+ )
589
+
590
+ tools_list = [pada_morphological_tool, sql_tool, pada_meaning_tool, pada_word_sense_tool, vedamantra_tool, vedamantra_summary_tool]
591
+ #vector_tool,
592
+
593
+ # Convert tools to OpenAI functions
594
+ tools_all = [convert_to_openai_function(tool) for tool in tools_list]
595
+ # Set up the tools to execute them from the graph
596
+ from langgraph.prebuilt import ToolExecutor
597
+
598
+ tool_executor = ToolExecutor(tools_list)
599
+ #tools_response = tools_all.append(convert_to_openai_function(Response))
600
+ llm_with_tools = llm_AI4.bind_tools(tools_all)
601
+ #tool_map = {tool.name: tool for tool in tools_list}
602
+
603
+
604
+ def call_tools(msg: AIMessage) -> Runnable:
605
+ """Simple sequential tool calling helper."""
606
+ tool_map = {tool.name: tool for tool in tools_list}
607
+ tool_calls = msg.tool_calls.copy()
608
+ for tool_call in tool_calls:
609
+ tool_call["output"] = tool_map[tool_call["name"]].invoke(tool_call["args"])
610
+ return tool_calls
611
+
612
+ #print("Invoking the chain")
613
+ tool_chain = llm_with_tools | call_tools
614
+
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.12
2
+ langsmith==0.1.29
3
+ langgraph==0.0.28
4
+ streamlit==1.31.1
5
+ langchain_openai==0.0.8
6
+ openai==1.14.1
7
+ langchain-groq==0.0.1
8
+ chardet==5.2.0
9
+ aksharamukha==2.1.2
10
+ sentence_transformers==2.4.0
11
+ langchain-nomic==0.0.2
12
+ beautifulsoup4==4.12.3
13
+ pymysql==1.1.0
14
+ langchain_core== 0.1.34
15
+ mysql-connector-python==8.3.0
16
+ pinecone-text==0.9.0
17
+ rank-bm25==0.2.2
18
+ langchain-mistralai==0.1.2
19
+ momento==1.20.1
utils.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ import json
5
+ import pickle
6
+ import numpy as np
7
+ import pandas as pd
8
+ from typing import List
9
+ from typing import Optional
10
+ from typing import Union
11
+ import streamlit as st
12
+ from database import execute_sql_query
13
+ from bs4 import BeautifulSoup
14
+ from aksharamukha import transliterate
15
+ from sentence_transformers import util
16
+ from langchain_nomic.embeddings import NomicEmbeddings
17
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
18
+
19
+
20
+
21
+ def load_pickle(path):
22
+ with open(path,'rb') as f:
23
+ docs = pickle.load(f)
24
+ return docs
25
+
26
+
27
+
28
+ def initialize_embedding_model(model_name, device="cpu", normalize_embeddings=True):
29
+ model_kwargs = {"device": device}
30
+ encode_kwargs = {"normalize_embeddings": normalize_embeddings}
31
+ return HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
32
+
33
+
34
+
35
+ # embedding model for quick calculations
36
+ nomic_api_key = os.getenv('NOMIC_API_KEY')
37
+ #nomic embed model used for similarity scores
38
+ nomic_embed_model = NomicEmbeddings(
39
+ dimensionality=128,
40
+ model="nomic-embed-text-v1.5",
41
+ )
42
+
43
+
44
+
45
+ def get_list_meaning_word(word):
46
+ pada_meanings = {'pada': word,
47
+ 'Monier-Williams Sanskrit-English Dictionary (1899)': [],
48
+ 'Shabda-Sagara (1900)': [],
49
+ 'Apte-Practical Sanskrit-English Dictionary (1890)': [],
50
+ }
51
+ url = f"https://ambuda.org/tools/dictionaries/mw,shabdasagara,apte/{word}"
52
+
53
+ try:
54
+ # Fetch HTML content
55
+ response = requests.get(url)
56
+ response.raise_for_status()
57
+
58
+ # Parse HTML with BeautifulSoup
59
+ soup = BeautifulSoup(response.text, 'html.parser')
60
+
61
+ # Extracting text from different tags
62
+ divs = soup.find_all('div', class_='my-4', attrs={'x-show': 'show'})
63
+
64
+ try:
65
+ # Find all list items <li> within the specified <ul> tag
66
+ div_items_0 = divs[0].find('ul').find_all('li', class_='dict-entry mw-entry')
67
+ # Print the text content of each list item
68
+ dive_text_0 = [li_tag.get_text(strip=True) for li_tag in div_items_0]
69
+ text_0_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_0]
70
+ pada_meanings['Monier-Williams Sanskrit-English Dictionary (1899)'] = text_0_trans
71
+ except :
72
+ print("Error: Unable to find Monier-Williams Sanskrit-English Dictionary (1899) data.")
73
+
74
+ try:
75
+ div_items_1 = divs[1].find_all('div')
76
+ dive_text_1 = [item.get_text(strip=True) for item in div_items_1]
77
+ text_1_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_1]
78
+ pada_meanings['Shabda-Sagara (1900)'] = text_1_trans
79
+ except :
80
+ print("Error: Unable to find Shabda-Sagara (1900) data.")
81
+
82
+ try:
83
+ apte_meanings = []
84
+ for tag in divs[2].find_all('b'):
85
+ if tag.text.strip() != '—':
86
+ text1 = tag.text.strip() # English text within <b> tag
87
+ sibling = tag.find_next_sibling() # Text following <b> tag
88
+ text2 = tag.next_sibling.strip() + ' ' # English text following <b> tag
89
+ while sibling.name != 'div':
90
+ if sibling.name is None: # Handling non-tag text
91
+ text2 += " "
92
+ elif sibling.name == 'span': # Handling <b> tag
93
+ IAST_text = transliterate.process(src='Devanagari', tgt='IAST', txt=sibling.text.strip())
94
+ text2 += IAST_text + ' ' + sibling.next_sibling.strip()
95
+ else:
96
+ text2 += sibling.text.strip() + ' ' + sibling.next_sibling.strip()
97
+ sibling = sibling.find_next_sibling()
98
+ apte_meanings.append(text2)
99
+ pada_meanings['Apte-Practical Sanskrit-English Dictionary (1890)'] = apte_meanings[:-1]
100
+ except:
101
+ print("Error: Unable to find Apte-Practical Sanskrit-English Dictionary (1890) data.")
102
+
103
+ except requests.exceptions.RequestException as e:
104
+ print(f"Error: Failed to fetch data from {url}. {e}")
105
+
106
+ return pada_meanings
107
+
108
+ #get similarity scores
109
+ def word_sentence_similarity(meanings, root_stem_word):
110
+ # Check if the word embeddings are not empty
111
+ if not meanings or not root_stem_word:
112
+ return None
113
+
114
+ meaning_embedding = np.array(nomic_embed_model.embed_query(meanings))
115
+ all_meanings = []
116
+ word_score_pair = []
117
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Monier-Williams Sanskrit-English Dictionary (1899)'])
118
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Shabda-Sagara (1900)'])
119
+ for word_meaning in all_meanings:
120
+ root_stem_word_meaning_embedding = np.array(nomic_embed_model.embed_query(word_meaning))
121
+ # Calculate cosine similarity
122
+ similarity_score = util.pytorch_cos_sim(meaning_embedding, root_stem_word_meaning_embedding).item()
123
+ word_score_pair.append((word_meaning,similarity_score))
124
+ # Sort the list in descending order based on similarity scores
125
+ sorted_word_score_pairs = sorted(word_score_pair, key=lambda x: x[1], reverse=True)
126
+ return sorted_word_score_pairs
127
+
128
+ #extract the adhibautic meaning of the mantra from the vedamantra
129
+ def extract_meaning_by_language(data_list, target_language='English'):
130
+ for data_dict in data_list:
131
+ if data_dict.get('languageName') == target_language:
132
+ return data_dict.get('mahatma', {})
133
+ return None
134
+
135
+ #mantra_json_details
136
+ def get_details_mantra_json(query):
137
+ description, data = execute_sql_query(query)
138
+ df = pd.DataFrame(data)
139
+ df.columns = [x[0] for x in description]
140
+ mantra_json = df['mantra_json'].values[0]
141
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
142
+ return json.loads(cleaned_data)
143
+
144
+ def iast_process(input_text):
145
+ output_text = re.sub('[\u0951-\u0954,\u200d,\u0331]', '', input_text)
146
+ return output_text
147
+