vikramvasudevan commited on
Commit
75a5b18
Β·
verified Β·
1 Parent(s): 6086d23

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.py +69 -31
  2. modules/nodes/init.py +19 -12
  3. tools.py +1 -1
config.py CHANGED
@@ -1,6 +1,7 @@
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
 
4
  class SanatanConfig:
5
  # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
6
  # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
@@ -18,9 +19,13 @@ class SanatanConfig:
18
  "output_dir": "./output/vishnu_puranam",
19
  "collection_name": "vishnu_puranam_openai",
20
  "collection_embedding_fn": "openai",
21
- "unit" : "page",
22
  "metadata_fields": [
23
- {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
 
 
 
 
24
  {"name": "num_chars", "datatype": "str"},
25
  {"name": "page", "datatype": "int"},
26
  ],
@@ -48,9 +53,13 @@ class SanatanConfig:
48
  "title": "Shukla Yajur Vedam",
49
  "output_dir": "./output/shukla_yajur_vedam",
50
  "collection_name": "shukla_yajur_vedam",
51
- "unit" : "page",
52
  "metadata_fields": [
53
- {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
 
 
 
 
54
  {"name": "num_chars", "datatype": "str"},
55
  {"name": "page", "datatype": "int"},
56
  ],
@@ -75,9 +84,13 @@ class SanatanConfig:
75
  "output_dir": "./output/bhagavat_gita",
76
  "collection_name": "bhagavat_gita_openai",
77
  "collection_embedding_fn": "openai",
78
- "unit" : "page",
79
  "metadata_fields": [
80
- {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
 
 
 
 
81
  {"name": "num_chars", "datatype": "str"},
82
  {"name": "page", "datatype": "int"},
83
  ],
@@ -108,9 +121,13 @@ class SanatanConfig:
108
  "output_dir": "./output/valmiki_ramayanam",
109
  "collection_name": "valmiki_ramayanam_openai",
110
  "collection_embedding_fn": "openai",
111
- "unit" : "page",
112
  "metadata_fields": [
113
- {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
 
 
 
 
114
  {"name": "num_chars", "datatype": "str"},
115
  {"name": "page", "datatype": "int"},
116
  ],
@@ -143,14 +160,30 @@ class SanatanConfig:
143
  "output_dir": "./output/vishnu_sahasranamam",
144
  "collection_name": "vishnu_sahasranamam_openai",
145
  "collection_embedding_fn": "openai",
146
- "unit" : "verse",
147
  "metadata_fields": [
148
  {"name": "chapter", "datatype": "str"},
149
  {"name": "page_number", "datatype": "int"},
150
- {"name": "sanskrit", "datatype": "str", "desc" : "The original sloka in sanskrit."},
151
- {"name": "translation", "datatype": "str", "desc" : "The english translation."},
152
- {"name": "transliteration", "datatype": "str", "desc" : "The english transliteration."},
153
- {"name": "verse", "datatype": "int", "desc" : "The verse number of the sloka."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  ],
155
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
156
  "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
@@ -169,7 +202,7 @@ class SanatanConfig:
169
  "output_dir": "./output/divya_prabandham",
170
  "collection_name": "divya_prabandham",
171
  "collection_embedding_fn": "openai",
172
- "unit" : "verse",
173
  "metadata_fields": [
174
  {
175
  "name": "prabandham_code",
@@ -198,12 +231,16 @@ class SanatanConfig:
198
  "name": "title",
199
  "datatype": "str",
200
  "description": (
201
- "Title of this pasuram."
202
- "Use this when a specific prabandham code or name is given along with a relative verse number."
203
- "for example:\n"
204
- "- `TVM 1.8.3`\n"
205
- "- if the user query is 'give me 3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.' - you must convert this representation to the format '{prabandham_code} {nth_decade}.{nth_chapter}.{nth_pasuram}' and pass as filter vaoue to the `title` field. \n"
206
- "If no decade is provided but a prabandham name is provided, assume decade = 1"
 
 
 
 
207
  ),
208
  },
209
  {
@@ -272,9 +309,13 @@ class SanatanConfig:
272
  "title": "Bhagavatha Puranam",
273
  "output_dir": "./output/bhagavata_purana",
274
  "collection_name": "bhagavata_purana",
275
- "unit" : "page",
276
  "metadata_fields": [
277
- {"name": "file", "datatype": "str", "desc" : "name of the file from which the information was extracted"},
 
 
 
 
278
  {"name": "num_chars", "datatype": "str"},
279
  {"name": "page", "datatype": "int"},
280
  ],
@@ -293,7 +334,7 @@ class SanatanConfig:
293
  "title": "Kamba Ramayanam (English)",
294
  "output_dir": "./output/kamba_ramayanam",
295
  "collection_name": "kamba_ramayanam_en",
296
- "unit" : "verse",
297
  "metadata_fields": [
298
  {
299
  "name": "kandam",
@@ -337,7 +378,7 @@ class SanatanConfig:
337
  "title": "Kamba Ramayanam (Tamil)",
338
  "output_dir": "./output/kamba_ramayanam",
339
  "collection_name": "kamba_ramayanam",
340
- "unit" : "chunk",
341
  "metadata_fields": [
342
  {
343
  "name": "chunk_index",
@@ -374,7 +415,7 @@ class SanatanConfig:
374
  "title": "Chathusloki by Sri Alavandar",
375
  "output_dir": "./output/chathusloki",
376
  "collection_name": "chathusloki",
377
- "unit" : "slokam",
378
  "metadata_fields": [
379
  {
380
  "name": "sloka_number",
@@ -403,7 +444,7 @@ class SanatanConfig:
403
  "title": "Sri Stavam by Sri Koorathazhwar",
404
  "output_dir": "./output/sri_stavam",
405
  "collection_name": "sri_stavam",
406
- "unit" : "slokam",
407
  "metadata_fields": [
408
  {
409
  "name": "sloka_number",
@@ -445,7 +486,7 @@ class SanatanConfig:
445
  "output_dir": "./output/yt_metadata",
446
  "collection_name": "yt_metadata",
447
  "collection_embedding_fn": "openai",
448
- "unit" : "video",
449
  "metadata_fields": [
450
  {
451
  "name": "video_id",
@@ -514,10 +555,7 @@ class SanatanConfig:
514
  embedding_fn = scripture["collection_embedding_fn"] # overridden in config
515
  return embedding_fn
516
 
517
- def filter_scriptures_fields(
518
- self,
519
- fields_to_keep: List[str]
520
- ) -> List[Dict]:
521
  """
522
  Return a list of scripture dicts containing only the specified fields.
523
  """
 
1
  from metadata import MetadataWhereClause
2
  from typing import List, Dict
3
 
4
+
5
  class SanatanConfig:
6
  # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
7
  # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
 
19
  "output_dir": "./output/vishnu_puranam",
20
  "collection_name": "vishnu_puranam_openai",
21
  "collection_embedding_fn": "openai",
22
+ "unit": "page",
23
  "metadata_fields": [
24
+ {
25
+ "name": "file",
26
+ "datatype": "str",
27
+ "desc": "name of the file from which the information was extracted",
28
+ },
29
  {"name": "num_chars", "datatype": "str"},
30
  {"name": "page", "datatype": "int"},
31
  ],
 
53
  "title": "Shukla Yajur Vedam",
54
  "output_dir": "./output/shukla_yajur_vedam",
55
  "collection_name": "shukla_yajur_vedam",
56
+ "unit": "page",
57
  "metadata_fields": [
58
+ {
59
+ "name": "file",
60
+ "datatype": "str",
61
+ "desc": "name of the file from which the information was extracted",
62
+ },
63
  {"name": "num_chars", "datatype": "str"},
64
  {"name": "page", "datatype": "int"},
65
  ],
 
84
  "output_dir": "./output/bhagavat_gita",
85
  "collection_name": "bhagavat_gita_openai",
86
  "collection_embedding_fn": "openai",
87
+ "unit": "page",
88
  "metadata_fields": [
89
+ {
90
+ "name": "file",
91
+ "datatype": "str",
92
+ "desc": "name of the file from which the information was extracted",
93
+ },
94
  {"name": "num_chars", "datatype": "str"},
95
  {"name": "page", "datatype": "int"},
96
  ],
 
121
  "output_dir": "./output/valmiki_ramayanam",
122
  "collection_name": "valmiki_ramayanam_openai",
123
  "collection_embedding_fn": "openai",
124
+ "unit": "page",
125
  "metadata_fields": [
126
+ {
127
+ "name": "file",
128
+ "datatype": "str",
129
+ "desc": "name of the file from which the information was extracted",
130
+ },
131
  {"name": "num_chars", "datatype": "str"},
132
  {"name": "page", "datatype": "int"},
133
  ],
 
160
  "output_dir": "./output/vishnu_sahasranamam",
161
  "collection_name": "vishnu_sahasranamam_openai",
162
  "collection_embedding_fn": "openai",
163
+ "unit": "verse",
164
  "metadata_fields": [
165
  {"name": "chapter", "datatype": "str"},
166
  {"name": "page_number", "datatype": "int"},
167
+ {
168
+ "name": "sanskrit",
169
+ "datatype": "str",
170
+ "desc": "The original sloka in sanskrit.",
171
+ },
172
+ {
173
+ "name": "translation",
174
+ "datatype": "str",
175
+ "desc": "The english translation.",
176
+ },
177
+ {
178
+ "name": "transliteration",
179
+ "datatype": "str",
180
+ "desc": "The english transliteration.",
181
+ },
182
+ {
183
+ "name": "verse",
184
+ "datatype": "int",
185
+ "desc": "The verse number of the sloka.",
186
+ },
187
  ],
188
  "pdf_path": "./data/vishnu_sahasranamam.pdf",
189
  "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
 
202
  "output_dir": "./output/divya_prabandham",
203
  "collection_name": "divya_prabandham",
204
  "collection_embedding_fn": "openai",
205
+ "unit": "verse",
206
  "metadata_fields": [
207
  {
208
  "name": "prabandham_code",
 
231
  "name": "title",
232
  "datatype": "str",
233
  "description": (
234
+ "Exact title of a pasuram in one of the following formats:\n"
235
+ "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' β€” use when the prabandham has decades.\n"
236
+ "2. '{prabandham_code} {chapter}.{pasuram}' β€” use when the prabandham does not have decades.\n\n"
237
+ "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n"
238
+ "Examples of valid usage:\n"
239
+ "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n"
240
+ " β†’ Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n"
241
+ "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n"
242
+ " β†’ Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n"
243
+ "Do NOT use `title` for general queries or keyword searches β€” leave it empty in those cases."
244
  ),
245
  },
246
  {
 
309
  "title": "Bhagavatha Puranam",
310
  "output_dir": "./output/bhagavata_purana",
311
  "collection_name": "bhagavata_purana",
312
+ "unit": "page",
313
  "metadata_fields": [
314
+ {
315
+ "name": "file",
316
+ "datatype": "str",
317
+ "desc": "name of the file from which the information was extracted",
318
+ },
319
  {"name": "num_chars", "datatype": "str"},
320
  {"name": "page", "datatype": "int"},
321
  ],
 
334
  "title": "Kamba Ramayanam (English)",
335
  "output_dir": "./output/kamba_ramayanam",
336
  "collection_name": "kamba_ramayanam_en",
337
+ "unit": "verse",
338
  "metadata_fields": [
339
  {
340
  "name": "kandam",
 
378
  "title": "Kamba Ramayanam (Tamil)",
379
  "output_dir": "./output/kamba_ramayanam",
380
  "collection_name": "kamba_ramayanam",
381
+ "unit": "chunk",
382
  "metadata_fields": [
383
  {
384
  "name": "chunk_index",
 
415
  "title": "Chathusloki by Sri Alavandar",
416
  "output_dir": "./output/chathusloki",
417
  "collection_name": "chathusloki",
418
+ "unit": "slokam",
419
  "metadata_fields": [
420
  {
421
  "name": "sloka_number",
 
444
  "title": "Sri Stavam by Sri Koorathazhwar",
445
  "output_dir": "./output/sri_stavam",
446
  "collection_name": "sri_stavam",
447
+ "unit": "slokam",
448
  "metadata_fields": [
449
  {
450
  "name": "sloka_number",
 
486
  "output_dir": "./output/yt_metadata",
487
  "collection_name": "yt_metadata",
488
  "collection_embedding_fn": "openai",
489
+ "unit": "video",
490
  "metadata_fields": [
491
  {
492
  "name": "video_id",
 
555
  embedding_fn = scripture["collection_embedding_fn"] # overridden in config
556
  return embedding_fn
557
 
558
+ def filter_scriptures_fields(self, fields_to_keep: List[str]) -> List[Dict]:
 
 
 
559
  """
560
  Return a list of scripture dicts containing only the specified fields.
561
  """
modules/nodes/init.py CHANGED
@@ -96,20 +96,26 @@ If the answer asks for translation to another language of their choice and you a
96
 
97
  When generating a response, follow these rules strictly:
98
 
99
- 1. **No information in context**
 
 
 
 
 
 
100
  β†’ Respond in {user_preferred_language}:
101
  "Can you give me more context please?"
102
 
103
- 2. **Some results found, but low confidence**
104
  β†’ Respond in {user_preferred_language}:
105
  "I may have some results but I am not sure of their accuracy. Would you like me to show them?"
106
 
107
- 3. **No relevant answer found in context**
108
  β†’ Respond in {user_preferred_language}:
109
  "I do not have enough information in the context provided from the {scripture} to answer this. I searched using {search_methodology}. Do you want me to try another search like {alternative_searchmethod}?"
110
 
111
- 4. **Answer found in context with confidence**
112
- β†’ Respond in {user_preferred_language} using the following Markdown format:
113
 
114
  ### 🧾 Answer
115
  - Present a brief summary of your response in concise **{user_preferred_language}**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
@@ -157,20 +163,20 @@ The following format should be used to show only the most relevant match. Do not
157
  - Otherwise, provide the transliterations in {user_preferred_language}, matching the order of verses above.
158
 
159
  ### πŸ“œ {user_preferred_language} - Translation(s)
160
- - Provide the **{user_preferred_language} meaning** for each verse listed above.
161
  - Again, follow the **same order**.
162
  - Do **not** repeat the original verse here β€” just the translation.
163
 
164
- ### πŸ“œ Notes
165
- - Bullet any extra points or cross-references in {user_preferred_language} from explanatory notes **only if present in the context**.
 
166
  - Do **not** include anything that is not supported or implied in the context.
167
 
168
  ⚠️ Do **not duplicate content** across sections.
169
  - Each section has a distinct purpose.
170
- - If a verse is shown in `πŸ“œ Supporting Verse(s)`, do **not** repeat it in the Translation section.
171
  - Only transliterations and meanings should appear in their respective sections.
172
 
173
-
174
  **Question:**
175
  {question}
176
 
@@ -184,6 +190,7 @@ The following format should be used to show only the most relevant match. Do not
184
  Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
185
  """
186
  ),
 
187
  ]
188
  state["initialized"] = True
189
 
@@ -196,8 +203,8 @@ Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are alw
196
  f"While translating, meticulously correct any spelling mistakes, typos, conversion errors, "
197
  f"and remove any untranslated words or foreign characters. "
198
  f"Ensure the output text is **fully natural, grammatically correct, and orthographically valid** "
199
- f"Take *EXTRA* care in ensuring names of the authors, the title of their work is not mis-spelled or misrepresented."
200
- f"in {state['language']}."
201
  )
202
  )
203
  )
 
96
 
97
  When generating a response, follow these rules strictly:
98
 
99
+ 1. **Check for existing context first**
100
+ - If relevant context from previous interactions or retrieved context exists, use it.
101
+ - Only call the DB tool if:
102
+ a) No relevant context exists, OR
103
+ b) You need additional passages to answer the query accurately.
104
+
105
+ 2. **No information found after checking context and/or DB tool**
106
  β†’ Respond in {user_preferred_language}:
107
  "Can you give me more context please?"
108
 
109
+ 3. **Some results found, but low confidence**
110
  β†’ Respond in {user_preferred_language}:
111
  "I may have some results but I am not sure of their accuracy. Would you like me to show them?"
112
 
113
+ 4. **No relevant answer found after full retrieval**
114
  β†’ Respond in {user_preferred_language}:
115
  "I do not have enough information in the context provided from the {scripture} to answer this. I searched using {search_methodology}. Do you want me to try another search like {alternative_searchmethod}?"
116
 
117
+ 5. **Answer found in context with confidence**
118
+ β†’ Respond in {user_preferred_language} using the following Markdown format:
119
 
120
  ### 🧾 Answer
121
  - Present a brief summary of your response in concise **{user_preferred_language}**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
 
163
  - Otherwise, provide the transliterations in {user_preferred_language}, matching the order of verses above.
164
 
165
  ### πŸ“œ {user_preferred_language} - Translation(s)
166
+ - Provide the translation in {user_preferred_language} for each verse listed above.
167
  - Again, follow the **same order**.
168
  - Do **not** repeat the original verse here β€” just the translation.
169
 
170
+ ### πŸ“œ {user_preferred_language} - Detailed Notes
171
+ - Skip this section if there is no `explanatory_notes_english` or `purport_english` or `wbw_english` or `wbw_ta` available
172
+ - Summarize content from `explanatory_notes_english` | `purport_english` | `wbw_english` | `wbw_ta` and translate them to {user_preferred_language}.
173
  - Do **not** include anything that is not supported or implied in the context.
174
 
175
  ⚠️ Do **not duplicate content** across sections.
176
  - Each section has a distinct purpose.
177
+ - If a verse is shown in `πŸ“œ Original Verse(s)`, do **not** repeat it in the Translation section.
178
  - Only transliterations and meanings should appear in their respective sections.
179
 
 
180
  **Question:**
181
  {question}
182
 
 
190
  Respond in **Markdown** format only. Ensure native Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
191
  """
192
  ),
193
+
194
  ]
195
  state["initialized"] = True
196
 
 
203
  f"While translating, meticulously correct any spelling mistakes, typos, conversion errors, "
204
  f"and remove any untranslated words or foreign characters. "
205
  f"Ensure the output text is **fully natural, grammatically correct, and orthographically valid** "
206
+ f"Take *EXTRA* care in ensuring names of the authors, the title of their work is not mis-spelled or misrepresented in the language {state['language']}."
207
+ f"Ensure to replace all occurences of the literal {{user_preferred_language}} with {state['language']}."
208
  )
209
  )
210
  )
tools.py CHANGED
@@ -57,7 +57,7 @@ tool_search_db_by_metadata = StructuredTool.from_function(
57
  "Use this tool **only when the user provides explicit metadata criteria**, such as: azhwar name, pasuram number, verse number, decade, prabandham name, or divya desam name."
58
  " This is not meant for general queries."
59
  f" The collection_name must be one of: {', '.join(allowed_collections)}."
60
- "you must ALWAYS call one of the standardization tools available to get the correct entity name before using this tool."
61
  "If the user asks for a specific azhwar, use `tool_get_standardized_azhwar_names` first."
62
  "If the user asks for a specific prabandham, use `tool_get_standardized_prabandham_names` first."
63
  "If the user mentions a divya desam, use `tool_get_standardized_divya_desam_names` first."
 
57
  "Use this tool **only when the user provides explicit metadata criteria**, such as: azhwar name, pasuram number, verse number, decade, prabandham name, or divya desam name."
58
  " This is not meant for general queries."
59
  f" The collection_name must be one of: {', '.join(allowed_collections)}."
60
+ "You *MUST* ALWAYS call one of the standardization tools available to get the correct entity name before using this tool."
61
  "If the user asks for a specific azhwar, use `tool_get_standardized_azhwar_names` first."
62
  "If the user asks for a specific prabandham, use `tool_get_standardized_prabandham_names` first."
63
  "If the user mentions a divya desam, use `tool_get_standardized_divya_desam_names` first."