Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Aug 1

Commit

9609347

verified ·

1 Parent(s): c021b25

Upload folder using huggingface_hub

Browse files

Files changed (23) hide show

.gitattributes +2 -0
.github/workflows/update_space.yml +28 -0
.gitignore +12 -0
.python-version +1 -0
README.md +3 -9
app.py +74 -0
assets/adiyen_bot.jpg +3 -0
assets/adiyen_bot.png +3 -0
assets/avatar_bot.png +0 -0
assets/avatar_user.png +0 -0
config.py +108 -0
db.py +35 -0
drive_downloader.py +44 -0
embeddings.py +9 -0
graph_helper.py +103 -0
main.py +22 -0
push_notifications_helper.py +24 -0
pyproject.toml +21 -0
requirements.txt +556 -0
sanatan_assistant.py +98 -0
serperdev_helper.py +14 -0
tools.py +28 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/adiyen_bot.jpg filter=lfs diff=lfs merge=lfs -text
+assets/adiyen_bot.png filter=lfs diff=lfs merge=lfs -text

.github/workflows/update_space.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Run Python script
+on:
+  push:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install Gradio
+      run: python -m pip install gradio
+    - name: Log in to Hugging Face
+      run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
+    - name: Deploy to Spaces
+      run: gradio deploy

.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.env
+chromadb-store/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: Sanatan Ai
-emoji: 😻
-colorFrom: blue
-colorTo: red
-sdk: gradio
-sdk_version: 5.39.0
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: sanatan_ai
 app_file: app.py
+sdk: gradio
+sdk_version: 5.38.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+import gradio as gr
+from config import SanatanConfig
+from drive_downloader import ZipDownloader
+from graph_helper import generate_graph
+import uuid
+import logging
+from dotenv import load_dotenv
+logging.basicConfig()
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+graph = generate_graph()
+def init():
+    load_dotenv(override=True)
+    downloader = ZipDownloader(
+        service_account_json=os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
+    )
+    # 1. Download zip file
+    zip_path = downloader.download_zip_from_drive(
+        file_id=os.getenv("CHROMADB_FILE_ID"),
+        output_path=SanatanConfig.dbStorePath,
+    )
+    # 2. Extract it
+    downloader.unzip(zip_path, extract_to="./")
+def init_session():
+    # Generate a unique ID per browser session
+    return str(uuid.uuid4())
+def chat(message, history, thread_id):
+    print("received chat message for thread:", thread_id)
+    config = {"configurable": {"thread_id": thread_id}}
+    response = graph.invoke(
+        {"messages": [{"role": "user", "content": message}]}, config=config
+    )
+    return response["messages"][-1].content
+thread_id = gr.State(init_session)
+supported_scriptures = "\n - ".join(
+    [scripture["title"] for scripture in SanatanConfig.scriptures]
+)
+intro_messages = [
+    {"role" : "assistant","content" : f"Namaskaram! I can assist with the following scriptures: \n - {supported_scriptures}"},
+    {"role" : "assistant","content" : """
+    #### You can ask me questions like:
+      - How do all scriptures describe the form of Vishnu?
+      - What did Krishna teach in Gita?
+      - How did Arjun react on seeing the Vishwarupa form of Krishna?
+      - Give some names of Vishnu from Sahasranamam.
+      """}
+]
+chatbot = gr.Chatbot(
+    elem_id="chatbot",
+    avatar_images=("assets/avatar_user.png", "assets/adiyen_bot.png"),
+    value=intro_messages,
+    label="Sanatan-Bot",
+    show_copy_button=True,
+    show_copy_all_button=True,
+    type="messages",
+    height=800
+)
+chatInterface = gr.ChatInterface(title="Sanatan-AI",
+    fn=chat, additional_inputs=[thread_id], chatbot=chatbot
+)
+# initializze database
+init()
+chatInterface.launch()

assets/adiyen_bot.jpg ADDED Viewed

Git LFS Details

SHA256: d03c5fffb6ec242566aab8fe158ba13bd972f3fe1372a4a6adb919c9f9053a6c
Pointer size: 131 Bytes
Size of remote file: 255 kB

assets/adiyen_bot.png ADDED Viewed

Git LFS Details

SHA256: 3e806b1f187245e359b1b97d7f49d9cbe1c5d020d651751c3bfd89eb4f45cc7d
Pointer size: 132 Bytes
Size of remote file: 1.24 MB

assets/avatar_bot.png ADDED Viewed

assets/avatar_user.png ADDED Viewed

config.py ADDED Viewed

	@@ -0,0 +1,108 @@

+class SanatanConfig:
+    # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
+    # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
+    # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf"
+    # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}]
+    dbStorePath: str = "./chromadb-store"
+    # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam"
+    # vishnuPuranamCollectionName: str = "vishnu_puranam"
+    # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam"
+    # vishnuPuranamOutputDir = "./output/vishnu_puranam"
+    scriptures = [
+        {
+            "name": "vishnu_puranam",
+            "title": "Sri Vishnu Puranam",
+            "output_dir": "./output/vishnu_puranam",
+            "collection_name": "vishnu_puranam",
+            "pdf_path": "./data/vishnu_puranam.pdf",
+            "language": "san+eng",
+            "example_labels": [
+                "Vishnu's form",
+                "About the five elements",
+                "About Garuda",
+                "Weapons of Vishnu",
+            ],
+            "examples": [
+                "describe Vishnu's form",
+                "five elements and their significance",
+                "What is the significance of Garuda? Show some verses that describe him.",
+                "What weapons does Vishnu hold?",
+            ],
+        },
+        {
+            "name": "shukla_yajur_vedam",
+            "title": "Shukla Yajur Vedam",
+            "output_dir": "./output/shukla_yajur_vedam",
+            "collection_name": "shukla_yajur_vedam",
+            "pdf_path": "./data/shukla-yajur-veda.pdf",
+            "language": "san+eng",
+            "example_labels": [
+                "About Vedam",
+                "About the five elements",
+                "About Brahma",
+            ],
+            "examples": [
+                "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.",
+                "What is the significance of fire and water. show some sanskrit verses",
+                "Brahma",
+            ],
+        },
+        {
+            "name": "bhagavat_gita",
+            "title": "Bhagavat Gita",
+            "output_dir": "./output/bhagavat_gita",
+            "collection_name": "bhagavat_gita",
+            "pdf_path": "./data/bhagavat_gita.pdf",
+            "language": "san+eng",
+            "example_labels": [
+                "About Arjuna",
+                "About Karma",
+                "About birth and death",
+                "About the battle field",
+                "About Krishna's form",
+            ],
+            "examples": [
+                "Show some verses where Krishna advises Arjuna",
+                "What does Krishna say about Karma",
+                "What does Krishna say about birth and death",
+                "describe the battle field",
+                "Vishwarupa",
+            ],
+        },
+        {
+            "name": "valmiki_ramayanam",
+            "title": "Valmiki Ramayanam",
+            "output_dir": "./output/valmiki_ramayanam",
+            "collection_name": "valmiki_ramayanam",
+            "pdf_path": "./data/valmiki_ramayanam.pdf",
+            "language": "san+eng",
+            "example_labels": [
+                "About Jatayu",
+                "About Hanuman",
+                "About Vali",
+                "About Sita",
+                "About Ravana",
+            ],
+            "examples": [
+                "What is the significance of Jatayu? show some sanskrit verses to support the argument",
+                "Show some verses where Hanuman is mentioned",
+                "How did Rama kill Vali",
+                "How was Sita abducted",
+                "How did Rama kill Ravana?",
+            ],
+        },
+        {
+            "name": "vishnu_sahasranamam",
+            "title": "Vishnu Sahasranamam",
+            "output_dir": "./output/vishnu_sahasranamam",
+            "collection_name": "vishnu_sahasranamam",
+            "pdf_path": "./data/vishnu_sahasranamam.pdf",
+            "language": "san+eng",
+            "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"],
+            "examples": [
+                "Vanamali",
+                "Show some of the 1000 names of Vishnu along with their meaning",
+                "show the verse that begins with शुक्लाम्बरधरं",
+            ],
+        },
+    ]

db.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import chromadb
+from config import SanatanConfig
+from embeddings import get_embedding
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+class SanatanDatabase:
+    def __init__(self) -> None:
+        self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
+    def does_data_exist(self, collection_name: str) -> bool:
+        collection = self.chroma_client.get_or_create_collection(name=collection_name)
+        num_rows = collection.count()
+        logger.info("num_rows in %s = %d", collection_name, num_rows)
+        return num_rows > 0
+    def load(self, collection_name: str, ids, documents, embeddings, metadatas):
+        collection = self.chroma_client.get_or_create_collection(name=collection_name)
+        collection.add(
+            ids=ids,
+            documents=documents,
+            embeddings=embeddings,
+            metadatas=metadatas,
+        )
+    def search(self, collection_name: str, query: str, n_results=2):
+        collection = self.chroma_client.get_or_create_collection(name=collection_name)
+        response = collection.query(
+            query_embeddings=[get_embedding(query)], n_results=n_results
+        )
+        return response

drive_downloader.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os
+import json
+import io
+import zipfile
+from google.oauth2.service_account import Credentials
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseDownload
+class ZipDownloader:
+    def __init__(self, service_account_json: str):
+        self.creds = Credentials.from_service_account_info(json.loads(service_account_json))
+        self.service = build("drive", "v3", credentials=self.creds)
+    def download_zip_from_drive(self, file_id: str, output_path: str) -> str:
+        """
+        Downloads a ZIP file from Google Drive and saves it locally.
+        Returns the path to the downloaded ZIP.
+        """
+        request = self.service.files().get_media(fileId=file_id)
+        local_zip_path = os.path.join(output_path, "downloaded.zip")
+        os.makedirs(output_path, exist_ok=True)
+        fh = io.FileIO(local_zip_path, 'wb')
+        downloader = MediaIoBaseDownload(fh, request)
+        print(f"⬇️ Downloading ZIP file from Drive ID: {file_id}")
+        done = False
+        while not done:
+            status, done = downloader.next_chunk()
+            print(f"   ⏬ Progress: {int(status.progress() * 100)}%")
+        print(f"✅ ZIP downloaded to: {local_zip_path}")
+        return local_zip_path
+    def unzip(self, zip_path: str, extract_to: str):
+        """
+        Unzips the downloaded ZIP file to a specified directory.
+        """
+        print(f"📂 Extracting ZIP: {zip_path} -> {extract_to}")
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(extract_to)
+        print("✅ Extraction complete.")

embeddings.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from sentence_transformers import SentenceTransformer
+# Step 1: Load SentenceTransformer model
+# model = SentenceTransformer("all-MiniLM-L6-v2")
+model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+def get_embedding(text: str) -> list:
+    return model.encode(text).tolist()

graph_helper.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from typing import Annotated, TypedDict
+from langgraph.graph import StateGraph, START, END
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph.message import add_messages
+from langchain_openai import ChatOpenAI
+from langgraph.graph.state import CompiledStateGraph
+from tools import (
+    tool_format_scripture_answer,
+    tool_search_db,
+    tool_search_web,
+    tool_push,
+)
+from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
+import logging
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+class ChatState(TypedDict):
+    messages: Annotated[list[str], add_messages]
+def generate_graph() -> CompiledStateGraph:
+    memory = MemorySaver()
+    tools = [tool_search_web, tool_push, tool_search_db, tool_format_scripture_answer]
+    llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
+    def chatNode(state: ChatState) -> ChatState:
+        # logger.info("messages before LLM: %s", str(state["messages"]))
+        response = llm.invoke(state["messages"])
+        # return {"messages": [response]}
+        return {"messages": state["messages"] + [response]}
+    def init_system_prompt_node(state: ChatState) -> ChatState:
+        messages = state["messages"] or []
+        # Check if system prompts were already added
+        already_has_prompt = any(
+            isinstance(m, SystemMessage) and "format_scripture_answer" in m.content
+            for m in messages
+        )
+        if not already_has_prompt:
+            messages += [
+                SystemMessage(
+                    content="⚠️ Do NOT summarize or compress the output from the `query` tool. It will be passed directly to `format_scripture_answer` tool that formats the answer **AS IS**. DO NOT REMOVE SANSKRIT TEXTS"
+                ),
+                SystemMessage(
+                    content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
+                ),
+                SystemMessage(
+                    content="If the user's question is about any scripture content (even if multiple scriptures), you must use the `tool_search_db`. Only use `tool_search_web` for general non-scriptural questions."
+                ),
+            ]
+        return {"messages": messages}
+    # def chatNode(state: ChatState) -> ChatState:
+    #     messages = state["messages"]
+    #     system_prompt = None
+    #     new_messages = []
+    #     for m in messages:
+    #         if isinstance(m, ToolMessage):
+    #             print("m.name = ", m.name)
+    #             if m.name == "format_scripture_answer":
+    #                 system_prompt = m.content
+    #         else:
+    #             new_messages.append(m)
+    #     if system_prompt:
+    #         full_history = [
+    #             SystemMessage(content=system_prompt),
+    #             SystemMessage(
+    #                 content="⚠️ Do NOT summarize or compress the context from the query tool. It will be passed directly to another tool that formats the answer."
+    #             ),
+    #             SystemMessage(
+    #                 content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the query tool has returned a result."
+    #             ),
+    #         ] + new_messages
+    #     else:
+    #         full_history = messages
+    #     # 🔍 Debug log (optional)
+    #     # print("\n🧠 LLM Full History:")
+    #     # for m in full_history:
+    #     #    print(f"- {m.type.upper()}: {m.content[:100]}...\n")
+    #     ai_response = llm.invoke(full_history)
+    #     return {"messages": messages + [ai_response]}
+    graph = StateGraph(ChatState)
+    graph.add_node("init", init_system_prompt_node)
+    graph.add_node("chat", chatNode)
+    graph.add_node("tools", ToolNode(tools))
+    graph.add_edge(START, "init")
+    graph.add_edge("init", "chat")
+    graph.add_conditional_edges("chat", tools_condition, "tools")
+    graph.add_edge("tools", "chat")
+    return graph.compile(checkpointer=memory)

main.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from langchain_openai import ChatOpenAI
+from dotenv import load_dotenv
+from push_notifications_helper import push
+from langchain.agents import Tool
+from tools import tool_push, tool_search_web
+load_dotenv(override=True)
+def main():
+    print("Hello from sanatan-ai!")
+    llm = ChatOpenAI()
+    llm_response = llm.invoke("Generate a simple question to ask the user about geography.")
+    # print(response)
+    response = tool_search_web.invoke(llm_response.content)
+    message = "Question: " + llm_response.content + "\nAnswer: " + response
+    tool_push.invoke(message)
+if __name__ == "__main__":
+    main()

push_notifications_helper.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+import requests
+from dotenv import load_dotenv
+import logging
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+load_dotenv(override=True)
+pushover_token = os.getenv("PUSHOVER_TOKEN")
+pushover_user = os.getenv("PUSHOVER_USER")
+pushover_url = "https://api.pushover.net/1/messages.json"
+def push(text: str):
+    """Send a push notification to the user"""
+    logger.info("Sending a push notification for %s", text)
+    response = requests.post(
+        pushover_url,
+        data={"token": pushover_token, "user": pushover_user, "message": text},
+    )
+    logger.info("response = %s", response.json())
+    logger.info("Sent notification")

pyproject.toml ADDED Viewed

	@@ -0,0 +1,21 @@

+[project]
+name = "sanatan-ai"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "chromadb>=1.0.15",
+    "dotenv>=0.9.9",
+    "google-api-python-client>=2.177.0",
+    "google-auth-httplib2>=0.2.0",
+    "google-auth-oauthlib>=1.2.2",
+    "gradio>=5.39.0",
+    "gspread>=6.2.1",
+    "langchain>=0.3.27",
+    "langchain-community>=0.3.27",
+    "langchain-openai>=0.3.28",
+    "langgraph>=0.6.2",
+    "oauth2client>=4.1.3",
+    "sentence-transformers>=5.0.0",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,556 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.15
+    # via langchain-community
+aiosignal==1.4.0
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   openai
+    #   starlette
+    #   watchfiles
+attrs==25.3.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+backoff==2.2.1
+    # via posthog
+bcrypt==4.3.0
+    # via chromadb
+brotli==1.1.0
+    # via gradio
+build==1.2.2.post1
+    # via chromadb
+cachetools==5.5.2
+    # via google-auth
+certifi==2025.7.14
+    # via
+    #   httpcore
+    #   httpx
+    #   kubernetes
+    #   requests
+charset-normalizer==3.4.2
+    # via requests
+chromadb==1.0.15
+    # via sanatan-ai (pyproject.toml)
+click==8.2.1
+    # via
+    #   typer
+    #   uvicorn
+colorama==0.4.6
+    # via
+    #   build
+    #   click
+    #   tqdm
+    #   uvicorn
+coloredlogs==15.0.1
+    # via onnxruntime
+dataclasses-json==0.6.7
+    # via langchain-community
+distro==1.9.0
+    # via
+    #   openai
+    #   posthog
+dotenv==0.9.9
+    # via sanatan-ai (pyproject.toml)
+durationpy==0.10
+    # via kubernetes
+fastapi==0.116.1
+    # via gradio
+ffmpy==0.6.1
+    # via gradio
+filelock==3.18.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+flatbuffers==25.2.10
+    # via onnxruntime
+frozenlist==1.7.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2025.7.0
+    # via
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+google-api-core==2.25.1
+    # via google-api-python-client
+google-api-python-client==2.177.0
+    # via sanatan-ai (pyproject.toml)
+google-auth==2.40.3
+    # via
+    #   google-api-core
+    #   google-api-python-client
+    #   google-auth-httplib2
+    #   google-auth-oauthlib
+    #   gspread
+    #   kubernetes
+google-auth-httplib2==0.2.0
+    # via
+    #   sanatan-ai (pyproject.toml)
+    #   google-api-python-client
+google-auth-oauthlib==1.2.2
+    # via
+    #   sanatan-ai (pyproject.toml)
+    #   gspread
+googleapis-common-protos==1.70.0
+    # via
+    #   google-api-core
+    #   opentelemetry-exporter-otlp-proto-grpc
+gradio==5.39.0
+    # via sanatan-ai (pyproject.toml)
+gradio-client==1.11.0
+    # via gradio
+greenlet==3.2.3
+    # via sqlalchemy
+groovy==0.1.2
+    # via gradio
+grpcio==1.74.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+gspread==6.2.1
+    # via sanatan-ai (pyproject.toml)
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.9
+    # via httpx
+httplib2==0.22.0
+    # via
+    #   google-api-python-client
+    #   google-auth-httplib2
+    #   oauth2client
+httptools==0.6.4
+    # via uvicorn
+httpx==0.28.1
+    # via
+    #   chromadb
+    #   gradio
+    #   gradio-client
+    #   langgraph-sdk
+    #   langsmith
+    #   openai
+    #   safehttpx
+httpx-sse==0.4.1
+    # via langchain-community
+huggingface-hub==0.34.3
+    # via
+    #   gradio
+    #   gradio-client
+    #   sentence-transformers
+    #   tokenizers
+    #   transformers
+humanfriendly==10.0
+    # via coloredlogs
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+importlib-metadata==8.7.0
+    # via opentelemetry-api
+importlib-resources==6.5.2
+    # via chromadb
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   torch
+jiter==0.10.0
+    # via openai
+joblib==1.5.1
+    # via scikit-learn
+jsonpatch==1.33
+    # via langchain-core
+jsonpointer==3.0.0
+    # via jsonpatch
+jsonschema==4.25.0
+    # via chromadb
+jsonschema-specifications==2025.4.1
+    # via jsonschema
+kubernetes==33.1.0
+    # via chromadb
+langchain==0.3.27
+    # via
+    #   sanatan-ai (pyproject.toml)
+    #   langchain-community
+langchain-community==0.3.27
+    # via sanatan-ai (pyproject.toml)
+langchain-core==0.3.72
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-openai
+    #   langchain-text-splitters
+    #   langgraph
+    #   langgraph-checkpoint
+    #   langgraph-prebuilt
+langchain-openai==0.3.28
+    # via sanatan-ai (pyproject.toml)
+langchain-text-splitters==0.3.9
+    # via langchain
+langgraph==0.6.2
+    # via sanatan-ai (pyproject.toml)
+langgraph-checkpoint==2.1.1
+    # via
+    #   langgraph
+    #   langgraph-prebuilt
+langgraph-prebuilt==0.6.2
+    # via langgraph
+langgraph-sdk==0.2.0
+    # via langgraph
+langsmith==0.4.9
+    # via
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+marshmallow==3.26.1
+    # via dataclasses-json
+mdurl==0.1.2
+    # via markdown-it-py
+mmh3==5.2.0
+    # via chromadb
+mpmath==1.3.0
+    # via sympy
+multidict==6.6.3
+    # via
+    #   aiohttp
+    #   yarl
+mypy-extensions==1.1.0
+    # via typing-inspect
+networkx==3.5
+    # via torch
+numpy==2.3.2
+    # via
+    #   chromadb
+    #   gradio
+    #   langchain-community
+    #   onnxruntime
+    #   pandas
+    #   scikit-learn
+    #   scipy
+    #   transformers
+oauth2client==4.1.3
+    # via sanatan-ai (pyproject.toml)
+oauthlib==3.3.1
+    # via
+    #   kubernetes
+    #   requests-oauthlib
+onnxruntime==1.22.1
+    # via chromadb
+openai==1.98.0
+    # via langchain-openai
+opentelemetry-api==1.36.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+opentelemetry-exporter-otlp-proto-common==1.36.0
+    # via opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-exporter-otlp-proto-grpc==1.36.0
+    # via chromadb
+opentelemetry-proto==1.36.0
+    # via
+    #   opentelemetry-exporter-otlp-proto-common
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-sdk==1.36.0
+    # via
+    #   chromadb
+    #   opentelemetry-exporter-otlp-proto-grpc
+opentelemetry-semantic-conventions==0.57b0
+    # via opentelemetry-sdk
+orjson==3.11.1
+    # via
+    #   chromadb
+    #   gradio
+    #   langgraph-sdk
+    #   langsmith
+ormsgpack==1.10.0
+    # via langgraph-checkpoint
+overrides==7.7.0
+    # via chromadb
+packaging==25.0
+    # via
+    #   build
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   langchain-core
+    #   langsmith
+    #   marshmallow
+    #   onnxruntime
+    #   transformers
+pandas==2.3.1
+    # via gradio
+pillow==11.3.0
+    # via
+    #   gradio
+    #   sentence-transformers
+posthog==5.4.0
+    # via chromadb
+propcache==0.3.2
+    # via
+    #   aiohttp
+    #   yarl
+proto-plus==1.26.1
+    # via google-api-core
+protobuf==6.31.1
+    # via
+    #   google-api-core
+    #   googleapis-common-protos
+    #   onnxruntime
+    #   opentelemetry-proto
+    #   proto-plus
+pyasn1==0.6.1
+    # via
+    #   oauth2client
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.2
+    # via
+    #   google-auth
+    #   oauth2client
+pybase64==1.4.2
+    # via chromadb
+pydantic==2.11.7
+    # via
+    #   chromadb
+    #   fastapi
+    #   gradio
+    #   langchain
+    #   langchain-core
+    #   langgraph
+    #   langsmith
+    #   openai
+    #   pydantic-settings
+pydantic-core==2.33.2
+    # via pydantic
+pydantic-settings==2.10.1
+    # via langchain-community
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via rich
+pyparsing==3.2.3
+    # via httplib2
+pypika==0.48.9
+    # via chromadb
+pyproject-hooks==1.2.0
+    # via build
+pyreadline3==3.5.4
+    # via humanfriendly
+python-dateutil==2.9.0.post0
+    # via
+    #   kubernetes
+    #   pandas
+    #   posthog
+python-dotenv==1.1.1
+    # via
+    #   dotenv
+    #   pydantic-settings
+    #   uvicorn
+python-multipart==0.0.20
+    # via gradio
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   chromadb
+    #   gradio
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langchain-core
+    #   transformers
+    #   uvicorn
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2025.7.34
+    # via
+    #   tiktoken
+    #   transformers
+requests==2.32.4
+    # via
+    #   google-api-core
+    #   huggingface-hub
+    #   kubernetes
+    #   langchain
+    #   langchain-community
+    #   langsmith
+    #   posthog
+    #   requests-oauthlib
+    #   requests-toolbelt
+    #   tiktoken
+    #   transformers
+requests-oauthlib==2.0.0
+    # via
+    #   google-auth-oauthlib
+    #   kubernetes
+requests-toolbelt==1.0.0
+    # via langsmith
+rich==14.1.0
+    # via
+    #   chromadb
+    #   typer
+rpds-py==0.26.0
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9.1
+    # via
+    #   google-auth
+    #   oauth2client
+ruff==0.12.7
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+safetensors==0.5.3
+    # via transformers
+scikit-learn==1.7.1
+    # via sentence-transformers
+scipy==1.16.1
+    # via
+    #   scikit-learn
+    #   sentence-transformers
+semantic-version==2.10.0
+    # via gradio
+sentence-transformers==5.0.0
+    # via sanatan-ai (pyproject.toml)
+setuptools==80.9.0
+    # via torch
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via
+    #   kubernetes
+    #   oauth2client
+    #   posthog
+    #   python-dateutil
+sniffio==1.3.1
+    # via
+    #   anyio
+    #   openai
+sqlalchemy==2.0.42
+    # via
+    #   langchain
+    #   langchain-community
+starlette==0.47.2
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.14.0
+    # via
+    #   onnxruntime
+    #   torch
+tenacity==9.1.2
+    # via
+    #   chromadb
+    #   langchain-community
+    #   langchain-core
+threadpoolctl==3.6.0
+    # via scikit-learn
+tiktoken==0.9.0
+    # via langchain-openai
+tokenizers==0.21.4
+    # via
+    #   chromadb
+    #   transformers
+tomlkit==0.13.3
+    # via gradio
+torch==2.7.1
+    # via sentence-transformers
+tqdm==4.67.1
+    # via
+    #   chromadb
+    #   huggingface-hub
+    #   openai
+    #   sentence-transformers
+    #   transformers
+transformers==4.54.1
+    # via sentence-transformers
+typer==0.16.0
+    # via
+    #   chromadb
+    #   gradio
+typing-extensions==4.14.1
+    # via
+    #   aiosignal
+    #   anyio
+    #   chromadb
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   langchain-core
+    #   openai
+    #   opentelemetry-api
+    #   opentelemetry-exporter-otlp-proto-grpc
+    #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   sentence-transformers
+    #   sqlalchemy
+    #   starlette
+    #   torch
+    #   typer
+    #   typing-inspect
+    #   typing-inspection
+typing-inspect==0.9.0
+    # via dataclasses-json
+typing-inspection==0.4.1
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+uritemplate==4.2.0
+    # via google-api-python-client
+urllib3==2.5.0
+    # via
+    #   kubernetes
+    #   requests
+uvicorn==0.35.0
+    # via
+    #   chromadb
+    #   gradio
+watchfiles==1.1.0
+    # via uvicorn
+websocket-client==1.8.0
+    # via kubernetes
+websockets==15.0.1
+    # via
+    #   gradio-client
+    #   uvicorn
+xxhash==3.5.0
+    # via langgraph
+yarl==1.20.1
+    # via aiohttp
+zipp==3.23.0
+    # via importlib-metadata
+zstandard==0.23.0
+    # via langsmith

sanatan_assistant.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import logging
+from typing import Literal
+from dotenv import load_dotenv
+from config import SanatanConfig
+from db import SanatanDatabase
+load_dotenv(override=True)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+sanatanDatabase = SanatanDatabase()
+allowedCollections = Literal[
+    *[scripture["collection_name"] for scripture in SanatanConfig.scriptures]
+]
+def format_scripture_answer(scripture_title: allowedCollections, question: str, query_tool_output: str):
+    """
+    Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.
+    This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.
+    The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit verses, English explanations, and source chapters.
+    """
+    prompt = f"""You are a knowledgeable assistant on the scripture *{scripture_title}*, well-versed in both **Sanskrit** and **English**.
+You must answer the question using **only** the content from *{scripture_title}* provided in the context below.
+- Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
+- Do **not** quote any Sanskrit verses unless they appear **explicitly** in the provided context.
+- Do **not** use verse numbers or line references unless clearly mentioned in the context.
+- If the answer cannot be found in the context, clearly say:
+  **"I do not have enough information from the {scripture_title} to answer this."**
+If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.
+If the user query is not small talk, use the following response format (in Markdown):
+### 🧾 Answer
+- Present the explanation in clear, concise **English**.
+- If it is an interpretation, say so explicitly.
+### 🔍 Scripture
+- Mention the scripture from which the references were taken.
+### 🔍 Chapter
+- Mention the chapter(s) from which the references were taken.
+- Only mention chapters if they are explicitly part of the context.
+### 📜 Supporting Sanskrit Verse(s)
+- Quote **only the most relevant** Sanskrit verse(s) from the context.
+- Do **not** add verses from memory or outside the context.
+- Format each verse clearly, one per line.
+- **Avoid transliteration unless no Devanagari is available.**
+- Do **not** provide English text in this section.
+### 🔍 English Translation
+- Provide the **corresponding English meaning** for each Sanskrit verse shown.
+- Keep it readable and aligned with the verse above.
+### Notes
+- Bullet any extra points or cross-references from explanatory notes **only if present in the context**.
+- Do **not** include anything that is not supported or implied in the context.
+**Question:**
+{question}
+---
+**Context:**
+{query_tool_output}
+---
+Respond in **Markdown** format only. Ensure Sanskrit verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
+"""
+    return prompt
+def query(collection_name: allowedCollections, query: str, n_results=5):
+    """
+    Search a scripture collection.
+    Parameters:
+    - collection_name (str): The name of the scripture collection to search. ...
+    - query (str): The search query.
+    - n_results (int): Number of results to return. Default is 5.
+    Returns:
+    - A list of matching results.
+    """
+    logger.info("Searching collection [%s] for [%s]", collection_name, query)
+    response = sanatanDatabase.search(
+        collection_name=collection_name, query=query, n_results=n_results
+    )
+    return "\n\n".join(response["documents"][0])

serperdev_helper.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from langchain_community.utilities import GoogleSerperAPIWrapper
+from dotenv import load_dotenv
+import logging
+load_dotenv(override=True)
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+serper = GoogleSerperAPIWrapper()
+def search(query: str):
+    """Search the web for any given query"""
+    logger.info("Searching the web for %s", query)
+    return serper.run(query)

tools.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from langchain.agents import Tool
+from langchain_core.tools import StructuredTool
+from config import SanatanConfig
+from push_notifications_helper import push
+from serperdev_helper import search as search_web
+from sanatan_assistant import format_scripture_answer, query
+tool_push = Tool(
+    name="push", description="Send a push notification to the user", func=push
+)
+allowed_collections = [s["collection_name"] for s in SanatanConfig.scriptures]
+tool_search_db = StructuredTool.from_function(
+    query,
+    description=(
+        "Search within a specific scripture collection. "
+        f"The collection_name must be one of: {', '.join(allowed_collections)}."
+        " Use this to find relevant scripture verses or explanations."
+    ),
+)
+tool_search_web = Tool(
+    name="search_web", description="Search the web for information", func=search_web
+)
+tool_format_scripture_answer = StructuredTool.from_function(format_scripture_answer)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff