Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Sep 2

Commit

75a5b18

verified ·

1 Parent(s): 6086d23

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

config.py +69 -31
modules/nodes/init.py +19 -12
tools.py +1 -1

config.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from metadata import MetadataWhereClause
 from typing import List, Dict
 class SanatanConfig:
     # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
     # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
@@ -18,9 +19,13 @@ class SanatanConfig:
             "output_dir": "./output/vishnu_puranam",
             "collection_name": "vishnu_puranam_openai",
             "collection_embedding_fn": "openai",
-            "unit" : "page",
             "metadata_fields": [
-                {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
@@ -48,9 +53,13 @@ class SanatanConfig:
             "title": "Shukla Yajur Vedam",
             "output_dir": "./output/shukla_yajur_vedam",
             "collection_name": "shukla_yajur_vedam",
-            "unit" : "page",
             "metadata_fields": [
-                {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
@@ -75,9 +84,13 @@ class SanatanConfig:
             "output_dir": "./output/bhagavat_gita",
             "collection_name": "bhagavat_gita_openai",
             "collection_embedding_fn": "openai",
-            "unit" : "page",
             "metadata_fields": [
-                {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
@@ -108,9 +121,13 @@ class SanatanConfig:
             "output_dir": "./output/valmiki_ramayanam",
             "collection_name": "valmiki_ramayanam_openai",
             "collection_embedding_fn": "openai",
-            "unit" : "page",
             "metadata_fields": [
-                {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
@@ -143,14 +160,30 @@ class SanatanConfig:
             "output_dir": "./output/vishnu_sahasranamam",
             "collection_name": "vishnu_sahasranamam_openai",
             "collection_embedding_fn": "openai",
-            "unit" : "verse",
             "metadata_fields": [
                 {"name": "chapter", "datatype": "str"},
                 {"name": "page_number", "datatype": "int"},
-                {"name": "sanskrit", "datatype": "str", "desc" : "The original sloka in sanskrit."},
-                {"name": "translation", "datatype": "str", "desc" : "The english translation."},
-                {"name": "transliteration", "datatype": "str", "desc" : "The english transliteration."},
-                {"name": "verse", "datatype": "int", "desc" : "The verse number of the sloka."},
             ],
             "pdf_path": "./data/vishnu_sahasranamam.pdf",
             "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
@@ -169,7 +202,7 @@ class SanatanConfig:
             "output_dir": "./output/divya_prabandham",
             "collection_name": "divya_prabandham",
             "collection_embedding_fn": "openai",
-            "unit" : "verse",
             "metadata_fields": [
                 {
                     "name": "prabandham_code",
@@ -198,12 +231,16 @@ class SanatanConfig:
                     "name": "title",
                     "datatype": "str",
                     "description": (
-                        "Title of this pasuram."
-                        "Use this when a specific prabandham code or name is given along with a relative verse number."
-                        "for example:\n"
-                        "- `TVM 1.8.3`\n"
-                        "- if the user query is 'give me 3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.' - you must convert this representation to the format '{prabandham_code} {nth_decade}.{nth_chapter}.{nth_pasuram}' and pass as filter vaoue to the `title` field. \n"
-                        "If no decade is provided but a prabandham name is provided, assume decade = 1"
                     ),
                 },
                 {
@@ -272,9 +309,13 @@ class SanatanConfig:
             "title": "Bhagavatha Puranam",
             "output_dir": "./output/bhagavata_purana",
             "collection_name": "bhagavata_purana",
-            "unit" : "page",
             "metadata_fields": [
-                {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
@@ -293,7 +334,7 @@ class SanatanConfig:
             "title": "Kamba Ramayanam (English)",
             "output_dir": "./output/kamba_ramayanam",
             "collection_name": "kamba_ramayanam_en",
-            "unit" : "verse",
             "metadata_fields": [
                 {
                     "name": "kandam",
@@ -337,7 +378,7 @@ class SanatanConfig:
             "title": "Kamba Ramayanam (Tamil)",
             "output_dir": "./output/kamba_ramayanam",
             "collection_name": "kamba_ramayanam",
-            "unit" : "chunk",
             "metadata_fields": [
                 {
                     "name": "chunk_index",
@@ -374,7 +415,7 @@ class SanatanConfig:
             "title": "Chathusloki by Sri Alavandar",
             "output_dir": "./output/chathusloki",
             "collection_name": "chathusloki",
-            "unit" : "slokam",
             "metadata_fields": [
                 {
                     "name": "sloka_number",
@@ -403,7 +444,7 @@ class SanatanConfig:
             "title": "Sri Stavam by Sri Koorathazhwar",
             "output_dir": "./output/sri_stavam",
             "collection_name": "sri_stavam",
-            "unit" : "slokam",
             "metadata_fields": [
                 {
                     "name": "sloka_number",
@@ -445,7 +486,7 @@ class SanatanConfig:
             "output_dir": "./output/yt_metadata",
             "collection_name": "yt_metadata",
             "collection_embedding_fn": "openai",
-            "unit" : "video",
             "metadata_fields": [
                 {
                     "name": "video_id",
@@ -514,10 +555,7 @@ class SanatanConfig:
             embedding_fn = scripture["collection_embedding_fn"]  # overridden in config
         return embedding_fn
-    def filter_scriptures_fields(
-        self,
-        fields_to_keep: List[str]
-    ) -> List[Dict]:
         """
         Return a list of scripture dicts containing only the specified fields.
         """

 from metadata import MetadataWhereClause
 from typing import List, Dict
 class SanatanConfig:
     # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
     # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
             "output_dir": "./output/vishnu_puranam",
             "collection_name": "vishnu_puranam_openai",
             "collection_embedding_fn": "openai",
+            "unit": "page",
             "metadata_fields": [
+                {
+                    "name": "file",
+                    "datatype": "str",
+                    "desc": "name of the file from which the information was extracted",
+                },
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
             "title": "Shukla Yajur Vedam",
             "output_dir": "./output/shukla_yajur_vedam",
             "collection_name": "shukla_yajur_vedam",
+            "unit": "page",
             "metadata_fields": [
+                {
+                    "name": "file",
+                    "datatype": "str",
+                    "desc": "name of the file from which the information was extracted",
+                },
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
             "output_dir": "./output/bhagavat_gita",
             "collection_name": "bhagavat_gita_openai",
             "collection_embedding_fn": "openai",
+            "unit": "page",
             "metadata_fields": [
+                {
+                    "name": "file",
+                    "datatype": "str",
+                    "desc": "name of the file from which the information was extracted",
+                },
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
             "output_dir": "./output/valmiki_ramayanam",
             "collection_name": "valmiki_ramayanam_openai",
             "collection_embedding_fn": "openai",
+            "unit": "page",
             "metadata_fields": [
+                {
+                    "name": "file",
+                    "datatype": "str",
+                    "desc": "name of the file from which the information was extracted",
+                },
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
             "output_dir": "./output/vishnu_sahasranamam",
             "collection_name": "vishnu_sahasranamam_openai",
             "collection_embedding_fn": "openai",
+            "unit": "verse",
             "metadata_fields": [
                 {"name": "chapter", "datatype": "str"},
                 {"name": "page_number", "datatype": "int"},
+                {
+                    "name": "sanskrit",
+                    "datatype": "str",
+                    "desc": "The original sloka in sanskrit.",
+                },
+                {
+                    "name": "translation",
+                    "datatype": "str",
+                    "desc": "The english translation.",
+                },
+                {
+                    "name": "transliteration",
+                    "datatype": "str",
+                    "desc": "The english transliteration.",
+                },
+                {
+                    "name": "verse",
+                    "datatype": "int",
+                    "desc": "The verse number of the sloka.",
+                },
             ],
             "pdf_path": "./data/vishnu_sahasranamam.pdf",
             "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
             "output_dir": "./output/divya_prabandham",
             "collection_name": "divya_prabandham",
             "collection_embedding_fn": "openai",
+            "unit": "verse",
             "metadata_fields": [
                 {
                     "name": "prabandham_code",
                     "name": "title",
                     "datatype": "str",
                     "description": (
+                        "Exact title of a pasuram in one of the following formats:\n"
+                        "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n"
+                        "2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n"
+                        "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n"
+                        "Examples of valid usage:\n"
+                        "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n"
+                        "  → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n"
+                        "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n"
+                        "  → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
+                        "Do NOT use `title` for general queries or keyword searches — leave it empty in those cases."
                     ),
                 },
                 {
             "title": "Bhagavatha Puranam",
             "output_dir": "./output/bhagavata_purana",
             "collection_name": "bhagavata_purana",
+            "unit": "page",
             "metadata_fields": [
+                {
+                    "name": "file",
+                    "datatype": "str",
+                    "desc": "name of the file from which the information was extracted",
+                },
                 {"name": "num_chars", "datatype": "str"},
                 {"name": "page", "datatype": "int"},
             ],
             "title": "Kamba Ramayanam (English)",
             "output_dir": "./output/kamba_ramayanam",
             "collection_name": "kamba_ramayanam_en",
+            "unit": "verse",
             "metadata_fields": [
                 {
                     "name": "kandam",
             "title": "Kamba Ramayanam (Tamil)",
             "output_dir": "./output/kamba_ramayanam",
             "collection_name": "kamba_ramayanam",
+            "unit": "chunk",
             "metadata_fields": [
                 {
                     "name": "chunk_index",
             "title": "Chathusloki by Sri Alavandar",
             "output_dir": "./output/chathusloki",
             "collection_name": "chathusloki",
+            "unit": "slokam",
             "metadata_fields": [
                 {
                     "name": "sloka_number",
             "title": "Sri Stavam by Sri Koorathazhwar",
             "output_dir": "./output/sri_stavam",
             "collection_name": "sri_stavam",
+            "unit": "slokam",
             "metadata_fields": [
                 {
                     "name": "sloka_number",
             "output_dir": "./output/yt_metadata",
             "collection_name": "yt_metadata",
             "collection_embedding_fn": "openai",
+            "unit": "video",
             "metadata_fields": [
                 {
                     "name": "video_id",
             embedding_fn = scripture["collection_embedding_fn"]  # overridden in config
         return embedding_fn
+    def filter_scriptures_fields(self, fields_to_keep: List[str]) -> List[Dict]:
         """
         Return a list of scripture dicts containing only the specified fields.
         """

modules/nodes/init.py CHANGED Viewed

@@ -96,20 +96,26 @@ If the answer asks for translation to another language of their choice and you a
 When generating a response, follow these rules strictly:
-1. **No information in context**
    → Respond in {user_preferred_language}:
    "Can you give me more context please?"
-2. **Some results found, but low confidence**
    → Respond in {user_preferred_language}:
    "I may have some results but I am not sure of their accuracy. Would you like me to show them?"
-3. **No relevant answer found in context**
    → Respond in {user_preferred_language}:
    "I do not have enough information in the context provided from the {scripture} to answer this. I searched using {search_methodology}. Do you want me to try another search like {alternative_searchmethod}?"
-4. **Answer found in context with confidence**
-   → Respond in {user_preferred_language} using the following Markdown format:
 ### 🧾 Answer
 - Present a brief summary of your response in concise **{user_preferred_language}**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
@@ -157,20 +163,20 @@ The following format should be used to show only the most relevant match. Do not
 - Otherwise, provide the transliterations in {user_preferred_language}, matching the order of verses above.
 ### 📜 {user_preferred_language} - Translation(s)
-- Provide the **{user_preferred_language} meaning** for each verse listed above.
 - Again, follow the **same order**.
 - Do **not** repeat the original verse here — just the translation.
-### 📜 Notes
-- Bullet any extra points or cross-references in {user_preferred_language} from explanatory notes **only if present in the context**.
 - Do **not** include anything that is not supported or implied in the context.
 ⚠️ Do **not duplicate content** across sections.
 - Each section has a distinct purpose.
-- If a verse is shown in `📜 Supporting Verse(s)`, do **not** repeat it in the Translation section.
 - Only transliterations and meanings should appear in their respective sections.
 **Question:**
 {question}
@@ -184,6 +190,7 @@ The following format should be used to show only the most relevant match. Do not
 Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
 """
             ),
         ]
         state["initialized"] = True
@@ -196,8 +203,8 @@ Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are alw
                 f"While translating, meticulously correct any spelling mistakes, typos, conversion errors, "
                 f"and remove any untranslated words or foreign characters. "
                 f"Ensure the output text is **fully natural, grammatically correct, and orthographically valid** "
-                f"Take *EXTRA* care in ensuring names of the authors, the title of their work is not mis-spelled or misrepresented."
-                f"in {state['language']}."
             )
         )
     )

 When generating a response, follow these rules strictly:
+1. **Check for existing context first**
+   - If relevant context from previous interactions or retrieved context exists, use it.
+   - Only call the DB tool if:
+     a) No relevant context exists, OR
+     b) You need additional passages to answer the query accurately.
+2. **No information found after checking context and/or DB tool**
    → Respond in {user_preferred_language}:
    "Can you give me more context please?"
+3. **Some results found, but low confidence**
    → Respond in {user_preferred_language}:
    "I may have some results but I am not sure of their accuracy. Would you like me to show them?"
+4. **No relevant answer found after full retrieval**
    → Respond in {user_preferred_language}:
    "I do not have enough information in the context provided from the {scripture} to answer this. I searched using {search_methodology}. Do you want me to try another search like {alternative_searchmethod}?"
+5. **Answer found in context with confidence**
+   → Respond in {user_preferred_language} using the following Markdown format:
 ### 🧾 Answer
 - Present a brief summary of your response in concise **{user_preferred_language}**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
 - Otherwise, provide the transliterations in {user_preferred_language}, matching the order of verses above.
 ### 📜 {user_preferred_language} - Translation(s)
+- Provide the translation in {user_preferred_language} for each verse listed above.
 - Again, follow the **same order**.
 - Do **not** repeat the original verse here — just the translation.
+### 📜 {user_preferred_language} - Detailed Notes
+- Skip this section if there is no `explanatory_notes_english` or `purport_english` or `wbw_english` or `wbw_ta` available
+- Summarize content from `explanatory_notes_english` | `purport_english` |  `wbw_english` | `wbw_ta` and translate them to {user_preferred_language}.
 - Do **not** include anything that is not supported or implied in the context.
 ⚠️ Do **not duplicate content** across sections.
 - Each section has a distinct purpose.
+- If a verse is shown in `📜 Original Verse(s)`, do **not** repeat it in the Translation section.
 - Only transliterations and meanings should appear in their respective sections.
 **Question:**
 {question}
 Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
 """
             ),
         ]
         state["initialized"] = True
                 f"While translating, meticulously correct any spelling mistakes, typos, conversion errors, "
                 f"and remove any untranslated words or foreign characters. "
                 f"Ensure the output text is **fully natural, grammatically correct, and orthographically valid** "
+                f"Take *EXTRA* care in ensuring names of the authors, the title of their work is not mis-spelled or misrepresented in the language {state['language']}."
+                f"Ensure to replace all occurences of the literal {{user_preferred_language}} with {state['language']}."
             )
         )
     )

tools.py CHANGED Viewed

@@ -57,7 +57,7 @@ tool_search_db_by_metadata = StructuredTool.from_function(
         "Use this tool **only when the user provides explicit metadata criteria**, such as: azhwar name, pasuram number, verse number, decade, prabandham name, or divya desam name."
         " This is not meant for general queries."
         f" The collection_name must be one of: {', '.join(allowed_collections)}."
-        "you must ALWAYS call one of the standardization tools available to get the correct entity name before using this tool."
         "If the user asks for a specific azhwar, use `tool_get_standardized_azhwar_names` first."
         "If the user asks for a specific prabandham, use `tool_get_standardized_prabandham_names` first."
         "If the user mentions a divya desam, use `tool_get_standardized_divya_desam_names` first."

         "Use this tool **only when the user provides explicit metadata criteria**, such as: azhwar name, pasuram number, verse number, decade, prabandham name, or divya desam name."
         " This is not meant for general queries."
         f" The collection_name must be one of: {', '.join(allowed_collections)}."
+        "You *MUST* ALWAYS call one of the standardization tools available to get the correct entity name before using this tool."
         "If the user asks for a specific azhwar, use `tool_get_standardized_azhwar_names` first."
         "If the user asks for a specific prabandham, use `tool_get_standardized_prabandham_names` first."
         "If the user mentions a divya desam, use `tool_get_standardized_divya_desam_names` first."