vikramvasudevan commited on
Commit
f2034cd
·
verified ·
1 Parent(s): 8b5be8c

Upload folder using huggingface_hub

Browse files
.gitignore CHANGED
@@ -15,4 +15,7 @@ outputs/
15
  chromadb-store_20251112.zip
16
  chromadb-store_20251118.zip
17
  chroma_exports/
18
- chroma_exports.zip
 
 
 
 
15
  chromadb-store_20251112.zip
16
  chromadb-store_20251118.zip
17
  chroma_exports/
18
+ chroma_exports.zip
19
+ chromadb-store-CORRUPTED/
20
+ chromadb-store_BLOATED.zip
21
+ chromadb-store_OLD.zip
copy_chromadb.py CHANGED
@@ -102,7 +102,102 @@ db_config = {
102
  "source_db_path": "../bhagavata_purana_ai/chromadb_store",
103
  "source_collection_name": "bhagavata_purana",
104
  "destination_collection_name": "bhagavata_purana",
105
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
 
108
  parser = argparse.ArgumentParser(description="My app with database parameter")
 
102
  "source_db_path": "../bhagavata_purana_ai/chromadb_store",
103
  "source_collection_name": "bhagavata_purana",
104
  "destination_collection_name": "bhagavata_purana",
105
+ },
106
+ "agnipuranam": {
107
+ "source_db_path": "../puranas_ai/chromadb_store",
108
+ "source_collection_name": "agnipuranam",
109
+ "destination_collection_name": "agnipuranam"
110
+ },
111
+ "bhavishyapuranam": {
112
+ "source_db_path": "../puranas_ai/chromadb_store",
113
+ "source_collection_name": "bhavishyapuranam",
114
+ "destination_collection_name": "bhavishyapuranam"
115
+ },
116
+ "brahmandpuranam": {
117
+ "source_db_path": "../puranas_ai/chromadb_store",
118
+ "source_collection_name": "brahmandpuranam",
119
+ "destination_collection_name": "brahmandpuranam"
120
+ },
121
+ "brahmapuranam": {
122
+ "source_db_path": "../puranas_ai/chromadb_store",
123
+ "source_collection_name": "brahmapuranam",
124
+ "destination_collection_name": "brahmapuranam"
125
+ },
126
+ "brahmavaivarthapurana": {
127
+ "source_db_path": "../puranas_ai/chromadb_store",
128
+ "source_collection_name": "brahmavaivarthapurana",
129
+ "destination_collection_name": "brahmavaivarthapurana"
130
+ },
131
+ "garudapuranam": {
132
+ "source_db_path": "../puranas_ai/chromadb_store",
133
+ "source_collection_name": "garudapuranam",
134
+ "destination_collection_name": "garudapuranam"
135
+ },
136
+ "harivanshapuraanam": {
137
+ "source_db_path": "../puranas_ai/chromadb_store",
138
+ "source_collection_name": "harivanshapuraanam",
139
+ "destination_collection_name": "harivanshapuraanam"
140
+ },
141
+ "kurmapuranam": {
142
+ "source_db_path": "../puranas_ai/chromadb_store",
143
+ "source_collection_name": "kurmapuranam",
144
+ "destination_collection_name": "kurmapuranam"
145
+ },
146
+ "lingapuranam": {
147
+ "source_db_path": "../puranas_ai/chromadb_store",
148
+ "source_collection_name": "lingapuranam",
149
+ "destination_collection_name": "lingapuranam"
150
+ },
151
+ "markandeypuranam": {
152
+ "source_db_path": "../puranas_ai/chromadb_store",
153
+ "source_collection_name": "markandeypuranam",
154
+ "destination_collection_name": "markandeypuranam"
155
+ },
156
+ "matsyapuranam": {
157
+ "source_db_path": "../puranas_ai/chromadb_store",
158
+ "source_collection_name": "matsyapuranam",
159
+ "destination_collection_name": "matsyapuranam"
160
+ },
161
+ "naradapuranam": {
162
+ "source_db_path": "../puranas_ai/chromadb_store",
163
+ "source_collection_name": "naradapuranam",
164
+ "destination_collection_name": "naradapuranam"
165
+ },
166
+ "padmapuranam": {
167
+ "source_db_path": "../puranas_ai/chromadb_store",
168
+ "source_collection_name": "padmapuranam",
169
+ "destination_collection_name": "padmapuranam"
170
+ },
171
+ "shivapuraanam": {
172
+ "source_db_path": "../puranas_ai/chromadb_store",
173
+ "source_collection_name": "shivapuraanam",
174
+ "destination_collection_name": "shivapuraanam"
175
+ },
176
+ "skandapuranam": {
177
+ "source_db_path": "../puranas_ai/chromadb_store",
178
+ "source_collection_name": "skandapuranam",
179
+ "destination_collection_name": "skandapuranam"
180
+ },
181
+ "vaamanapuraanam": {
182
+ "source_db_path": "../puranas_ai/chromadb_store",
183
+ "source_collection_name": "vaamanapuraanam",
184
+ "destination_collection_name": "vaamanapuraanam"
185
+ },
186
+ "vaayupuraanam": {
187
+ "source_db_path": "../puranas_ai/chromadb_store",
188
+ "source_collection_name": "vaayupuraanam",
189
+ "destination_collection_name": "vaayupuraanam"
190
+ },
191
+ "varahapuranam": {
192
+ "source_db_path": "../puranas_ai/chromadb_store",
193
+ "source_collection_name": "varahapuranam",
194
+ "destination_collection_name": "varahapuranam"
195
+ },
196
+ "vishnupuranam": {
197
+ "source_db_path": "../puranas_ai/chromadb_store",
198
+ "source_collection_name": "vishnupuranam",
199
+ "destination_collection_name": "vishnu_puranam_openai"
200
+ }
201
  }
202
 
203
  parser = argparse.ArgumentParser(description="My app with database parameter")
db.py CHANGED
@@ -824,15 +824,26 @@ class SanatanDatabase:
824
  )
825
 
826
  # Fetch all records (keep embeddings for upsert)
827
- try:
828
- results = collection.get(include=["metadatas", "documents", "embeddings"])
829
- except Exception as e:
830
- logger.error(
831
- "build_global_index_for_all_scriptures:%s Error getting data from chromadb",
832
- scripture_name,
833
- exc_info=True,
834
- )
835
- return
 
 
 
 
 
 
 
 
 
 
 
836
 
837
  ids = results["ids"]
838
  metadatas = results["metadatas"]
 
824
  )
825
 
826
  # Fetch all records (keep embeddings for upsert)
827
+ MAX_RETRIES = 3
828
+ RETRY_DELAY = 5 # seconds
829
+
830
+ for attempt in range(1, MAX_RETRIES + 1):
831
+ try:
832
+ results = collection.get(include=["metadatas", "documents", "embeddings"])
833
+ break # success → exit loop
834
+ except Exception as e:
835
+ logger.error(
836
+ "build_global_index_for_all_scriptures:%s Error getting data from chromadb (attempt %s/%s)",
837
+ scripture_name,
838
+ attempt,
839
+ MAX_RETRIES,
840
+ exc_info=True,
841
+ )
842
+
843
+ if attempt == MAX_RETRIES:
844
+ # still failing after 3 attempts
845
+ return
846
+ time.sleep(RETRY_DELAY) # wait before retry
847
 
848
  ids = results["ids"]
849
  metadatas = results["metadatas"]
modules/config/__init__.py CHANGED
@@ -1,26 +1,44 @@
 
1
  from modules.config.bhagavat_gita import bhagavat_gita_config
2
  from modules.config.bhagavata_purana import bhagavata_purana_config
 
3
  from modules.config.brahma_sutra import brahma_sutra_config
 
 
 
4
  from modules.config.chathusloki import chathusloki_config
5
  from modules.config.desika_prabandham import desika_prabandham_config
6
  from modules.config.divya_prabandham import divya_prabandham_config
7
  from modules.config.divya_prabandham_taniyans import divya_prabandham_taniyans_config
 
 
8
  from modules.config.kamba_ramayanam import kamba_ramayanam_config
9
  from modules.config.kamba_ramayanam_en import kamba_ramayanam_en_config
10
  from modules.config.katakam import katakam_config
 
 
 
 
 
11
  from modules.config.narayaneeyam import narayaneeyam_config
 
12
  from modules.config.pancha_sooktham import pancha_sooktham_config
13
  from modules.config.raghuveera_gadhyam import raghuveera_gadhyam_config
14
  from modules.config.shanthi_panchakam import shanthi_panchakam_config
 
15
  from modules.config.shukla_yajur_vedam import shukla_yajur_vedam_config
 
16
  from modules.config.sri_stavam import sri_stavam_config
17
  from modules.config.sri_vachana_bhushanam import sri_vachana_bhushanam_config
18
  from modules.config.taitriya_aranyakam import taitriya_aranyakam_config
19
  from modules.config.taitriya_brahmanam import taitriya_brahmanam_config
20
  from modules.config.taitriya_samhitha import taitriya_samhitha_config
21
  from modules.config.taitriya_upanishad import taitriya_upanishad_config
 
 
22
  from modules.config.valmiki_ramayanam import valmiki_ramayanam_config
23
- from modules.config.vishnu_puranam import vishnu_puranam_config
 
24
  from modules.config.vishnu_sahasranamam import vishnu_sahasranamam_config
25
  from modules.config.yt_metadata import yt_metadata_config
26
 
@@ -43,7 +61,7 @@ scripture_configurations = [
43
  taitriya_aranyakam_config,
44
  taitriya_upanishad_config,
45
  valmiki_ramayanam_config,
46
- vishnu_puranam_config,
47
  vishnu_sahasranamam_config,
48
  yt_metadata_config,
49
  brahma_sutra_config,
@@ -51,4 +69,22 @@ scripture_configurations = [
51
  desika_prabandham_config,
52
  raghuveera_gadhyam_config,
53
  narayaneeyam_config,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ]
 
1
+ from modules.config.agnipuranam import agnipuranam_config
2
  from modules.config.bhagavat_gita import bhagavat_gita_config
3
  from modules.config.bhagavata_purana import bhagavata_purana_config
4
+ from modules.config.bhavishyapuranam import bhavishyapuranam_config
5
  from modules.config.brahma_sutra import brahma_sutra_config
6
+ from modules.config.brahmandpuranam import brahmandpuranam_config
7
+ from modules.config.brahmapuranam import brahmapuranam_config
8
+ from modules.config.brahmavaivarthapurana import brahmavaivarthapurana_config
9
  from modules.config.chathusloki import chathusloki_config
10
  from modules.config.desika_prabandham import desika_prabandham_config
11
  from modules.config.divya_prabandham import divya_prabandham_config
12
  from modules.config.divya_prabandham_taniyans import divya_prabandham_taniyans_config
13
+ from modules.config.garudapuranam import garudapuranam_config
14
+ from modules.config.harivanshapuraanam import harivanshapuraanam_config
15
  from modules.config.kamba_ramayanam import kamba_ramayanam_config
16
  from modules.config.kamba_ramayanam_en import kamba_ramayanam_en_config
17
  from modules.config.katakam import katakam_config
18
+ from modules.config.kurmapuranam import kurmapuranam_config
19
+ from modules.config.lingapuranam import lingapuranam_config
20
+ from modules.config.markandeypuranam import markandeypuranam_config
21
+ from modules.config.matsyapuranam import matsyapuranam_config
22
+ from modules.config.naradapuranam import naradapuranam_config
23
  from modules.config.narayaneeyam import narayaneeyam_config
24
+ from modules.config.padmapuranam import padmapuranam_config
25
  from modules.config.pancha_sooktham import pancha_sooktham_config
26
  from modules.config.raghuveera_gadhyam import raghuveera_gadhyam_config
27
  from modules.config.shanthi_panchakam import shanthi_panchakam_config
28
+ from modules.config.shivapuraanam import shivapuraanam_config
29
  from modules.config.shukla_yajur_vedam import shukla_yajur_vedam_config
30
+ from modules.config.skandapuranam import skandapuranam_config
31
  from modules.config.sri_stavam import sri_stavam_config
32
  from modules.config.sri_vachana_bhushanam import sri_vachana_bhushanam_config
33
  from modules.config.taitriya_aranyakam import taitriya_aranyakam_config
34
  from modules.config.taitriya_brahmanam import taitriya_brahmanam_config
35
  from modules.config.taitriya_samhitha import taitriya_samhitha_config
36
  from modules.config.taitriya_upanishad import taitriya_upanishad_config
37
+ from modules.config.vaamanapuraanam import vaamanapuraanam_config
38
+ from modules.config.vaayupuraanam import vaayupuraanam_config
39
  from modules.config.valmiki_ramayanam import valmiki_ramayanam_config
40
+ from modules.config.varahapuranam import varahapuranam_config
41
+ from modules.config.vishnu_puranam import vishnupuranam_config
42
  from modules.config.vishnu_sahasranamam import vishnu_sahasranamam_config
43
  from modules.config.yt_metadata import yt_metadata_config
44
 
 
61
  taitriya_aranyakam_config,
62
  taitriya_upanishad_config,
63
  valmiki_ramayanam_config,
64
+ vishnupuranam_config,
65
  vishnu_sahasranamam_config,
66
  yt_metadata_config,
67
  brahma_sutra_config,
 
69
  desika_prabandham_config,
70
  raghuveera_gadhyam_config,
71
  narayaneeyam_config,
72
+ agnipuranam_config,
73
+ bhavishyapuranam_config,
74
+ brahmandpuranam_config,
75
+ brahmapuranam_config,
76
+ brahmavaivarthapurana_config,
77
+ garudapuranam_config,
78
+ harivanshapuraanam_config,
79
+ kurmapuranam_config,
80
+ lingapuranam_config,
81
+ markandeypuranam_config,
82
+ matsyapuranam_config,
83
+ naradapuranam_config,
84
+ padmapuranam_config,
85
+ shivapuraanam_config,
86
+ skandapuranam_config,
87
+ vaamanapuraanam_config,
88
+ vaayupuraanam_config,
89
+ varahapuranam_config,
90
  ]
modules/config/agnipuranam.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ agnipuranam_config = {
5
+ "name": "agnipuranam",
6
+ "title": "Agni Puranam",
7
+ "banner_url": convert_drive_url_to_direct(
8
+ "https://drive.google.com/file/d/1YjOR3wLNBbNovKC01aUXHa1osbjgLYbw/view?usp=drive_link"
9
+ ),
10
+ "category": ScriptureCategoryConstants.PURANAM,
11
+ "output_dir": "./output/agnipuranam",
12
+ "collection_name": "agnipuranam",
13
+ "collection_embedding_fn": "openai",
14
+ "unit": "sloka",
15
+ "unit_field": "_global_index",
16
+
17
+ "field_mapping": {
18
+ "text": "lyrics_sa",
19
+ "chapter_name": "chapter_name",
20
+ "unit_index": "_global_index",
21
+ "relative_path": lambda doc: (
22
+ " | ".join(
23
+ [
24
+ doc[key]
25
+ for key in sorted(
26
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
27
+ key=lambda x: int(x.split("_")[1])
28
+ )
29
+ ]
30
+ )
31
+ + f" || {doc.get('_global_index', '')} ||"
32
+ )
33
+ },
34
+
35
+ "metadata_fields": [
36
+ {
37
+ "name": "_global_index",
38
+ "datatype": "int",
39
+ "label": "Verse Index",
40
+ "description": "Absolute verse index",
41
+ "show_as_filter": True,
42
+ "is_unique": True,
43
+ },
44
+ {
45
+ "name": "chapter_name",
46
+ "datatype": "str",
47
+ "label": "Chapter",
48
+ "description": "Original chapter title",
49
+ "show_as_filter": True,
50
+ },
51
+ {
52
+ "name": "lyrics_sa",
53
+ "datatype": "str",
54
+ "label": "Sanskrit",
55
+ "description": "Verse text in Sanskrit",
56
+ },
57
+ ],
58
+
59
+ "pdf_path": "./data/agnipuranam.pdf",
60
+ "source": "",
61
+ "language": "san",
62
+ "example_labels": [],
63
+ "examples": [],
64
+ "llm_hints": [],
65
+ "credits": {"art": [], "data": [
66
+ {
67
+ "name": "Sanskritam",
68
+ "url": "https://www.sanskritam.world/puranas",
69
+ "role": "Data provider",
70
+ },
71
+ ], "audio": [], "video": []},
72
+ }
modules/config/bhavishyapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ bhavishyapuranam_config = {
5
+ "name": "bhavishyapuranam",
6
+ "title": "Bhavishya Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1ENd6Es_Ng7mj5ofoqmT2UGCbJM8ZFFVl/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/bhavishyapuranam",
10
+ "collection_name": "bhavishyapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/bhavishyapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/brahmandpuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ brahmandpuranam_config = {
5
+ "name": "brahmandpuranam",
6
+ "title": "Brahmanda Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1r0fV1AqASV0LsipTOPzJ7hmgswT5hNBZ/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/brahmandpuranam",
10
+ "collection_name": "brahmandpuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/brahmandpuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/brahmapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ brahmapuranam_config = {
5
+ "name": "brahmapuranam",
6
+ "title": "Brahma Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1CQn25LkeekY9c592tnzo8Msv0Ib8XiiE/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/brahmapuranam",
10
+ "collection_name": "brahmapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/brahmapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/brahmavaivarthapurana.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ brahmavaivarthapurana_config = {
5
+ "name": "brahmavaivarthapurana",
6
+ "title": "Brahma Vaivarta Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1SRLUvGvJaQ3SUaoEQMgy0ACTPGh1Sw6S/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/brahmavaivarthapurana",
10
+ "collection_name": "brahmavaivarthapurana",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/brahmavaivarthapurana.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/garudapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ garudapuranam_config = {
5
+ "name": "garudapuranam",
6
+ "title": "Garuda Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1jhGecKwco5RzGQzSVa8PiuFKJ26LHJ0p/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/garudapuranam",
10
+ "collection_name": "garudapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/garudapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/harivanshapuraanam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ harivanshapuraanam_config = {
5
+ "name": "harivanshapuraanam",
6
+ "title": "Harivansha Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1uWkzBJWin19aS7_TE545E5cwOXGpTPrx/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/harivanshapuraanam",
10
+ "collection_name": "harivanshapuraanam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/harivanshapuraanam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/kurmapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ kurmapuranam_config = {
5
+ "name": "kurmapuranam",
6
+ "title": "Kurma Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1Y1epvVgGzyKlWSFEI2ep8bNwT6G7G6II/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/kurmapuranam",
10
+ "collection_name": "kurmapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/kurmapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/lingapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ lingapuranam_config = {
5
+ "name": "lingapuranam",
6
+ "title": "Linga Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/12Jg8WOnwwp2dCvFLKf2UN1LrFc4bpK7Q/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/lingapuranam",
10
+ "collection_name": "lingapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/lingapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/markandeypuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ markandeypuranam_config = {
5
+ "name": "markandeypuranam",
6
+ "title": "Markandeya Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/101ibf90vyBpKdcMQCvooQ30JEBL2XMlm/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/markandeypuranam",
10
+ "collection_name": "markandeypuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/markandeypuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/matsyapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ matsyapuranam_config = {
5
+ "name": "matsyapuranam",
6
+ "title": "Matsya Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1DxlpvcRqHGTFsE0tDgxThzUy-F0gmlMb/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/matsyapuranam",
10
+ "collection_name": "matsyapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/matsyapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/naradapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ naradapuranam_config = {
5
+ "name": "naradapuranam",
6
+ "title": "Narada Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1M-qDqwo9iCweONGDzhqUyjYcjJFgWpRd/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/naradapuranam",
10
+ "collection_name": "naradapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/naradapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/padmapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ padmapuranam_config = {
5
+ "name": "padmapuranam",
6
+ "title": "Padma Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1BC5tIabDwAFw4-UmXdZcae3FrI7vR3ek/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/padmapuranam",
10
+ "collection_name": "padmapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/padmapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/shivapuraanam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ shivapuraanam_config = {
5
+ "name": "shivapuraanam",
6
+ "title": "Shiva Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1kgLfcskt00H_lkJtSgf8FDyJqiikl3Eu/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/shivapuraanam",
10
+ "collection_name": "shivapuraanam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/shivapuraanam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/skandapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ skandapuranam_config = {
5
+ "name": "skandapuranam",
6
+ "title": "Skanda Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1J-yM-vFJFye0yV2oOsUAo23yv7PBfy2G/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/skandapuranam",
10
+ "collection_name": "skandapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/skandapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/vaamanapuraanam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ vaamanapuraanam_config = {
5
+ "name": "vaamanapuraanam",
6
+ "title": "Vaamana Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1xnRARh9wnfQ7oy7GxSr7-1mKvRj8Iq64/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/vaamanapuraanam",
10
+ "collection_name": "vaamanapuraanam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/vaamanapuraanam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/vaayupuraanam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ vaayupuraanam_config = {
5
+ "name": "vaayupuraanam",
6
+ "title": "Vaayu Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1nQqZKmGdLnFxSDcqsaq7PP5na1SZQ23h/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/vaayupuraanam",
10
+ "collection_name": "vaayupuraanam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/vaayupuraanam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/varahapuranam.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
+
4
+ varahapuranam_config = {
5
+ "name": "varahapuranam",
6
+ "title": "Varaaha Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1jAZe95sUSH5iuNw2wdQKW7cLcf37ArgK/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/varahapuranam",
10
+ "collection_name": "varahapuranam",
11
+ "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
+ "metadata_fields": [
35
+ {
36
+ "name": "_global_index",
37
+ "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
+ "show_as_filter": True,
41
+ "is_unique": True,
42
+ },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
+ ],
57
+
58
+ "pdf_path": "./data/varahapuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
63
+ "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
+ }
modules/config/vishnu_puranam.py CHANGED
@@ -1,46 +1,65 @@
1
  from modules.config.categories import ScriptureCategoryConstants
 
2
 
3
-
4
- vishnu_puranam_config = {
5
- "name": "vishnu_puranam",
6
- "title": "Sri Vishnu Puranam",
7
- "category" : ScriptureCategoryConstants.PURANAM,
8
- "output_dir": "./output/vishnu_puranam",
9
  "collection_name": "vishnu_puranam_openai",
10
  "collection_embedding_fn": "openai",
11
- "unit": "page",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "metadata_fields": [
13
  {
14
- "name": "file",
15
- "label": "File Name",
16
- "datatype": "str",
17
- "description": "name of the file from which the information was extracted",
18
- },
19
- {
20
- "name": "page",
21
  "datatype": "int",
22
- "label": "Page Number",
23
- "description": "Page number from the source",
24
  "show_as_filter": True,
25
  "is_unique": True,
26
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  ],
28
- "pdf_path": "./data/vishnu_puranam.pdf",
29
- "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
30
- "language": "san+eng",
31
- "example_labels": [
32
- "Vishnu's form",
33
- "About the five elements",
34
- "About Garuda",
35
- "Weapons of Vishnu",
36
- "Vishnu's form (all scriptures)",
37
- ],
38
- "examples": [
39
- "describe Vishnu's form as defined in vishnu puranam",
40
- "five elements and their significance as per vishnu puranam",
41
- "What is the significance of Garuda? Show some verses from vishnu puranam that describe him.",
42
- "What weapons does Vishnu hold as mentioned in vishnu puranam?",
43
- "How is the form of Vishnu described across the scriptures?",
44
- ],
45
  "llm_hints": [],
 
46
  }
 
1
  from modules.config.categories import ScriptureCategoryConstants
2
+ from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
3
 
4
+ vishnupuranam_config = {
5
+ "name": "vishnupuranam",
6
+ "title": "Vishnu Puranam",
7
+ "banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1ny6M0c6jfCLtYRaROffb1erRc3u8WB2u/view?usp=drive_link"),
8
+ "category": ScriptureCategoryConstants.PURANAM,
9
+ "output_dir": "./output/vishnupuranam",
10
  "collection_name": "vishnu_puranam_openai",
11
  "collection_embedding_fn": "openai",
12
+
13
+ "unit": "sloka",
14
+ "unit_field": "_global_index",
15
+
16
+ "field_mapping": {
17
+ "text": "lyrics_sa",
18
+ "chapter_name": "chapter_name",
19
+ "unit_index": "_global_index",
20
+ "relative_path": lambda doc: (
21
+ " | ".join(
22
+ [
23
+ doc[key]
24
+ for key in sorted(
25
+ [k for k in doc.keys() if k.startswith("title_") and doc[k]],
26
+ key=lambda x: int(x.split("_")[1])
27
+ )
28
+ ]
29
+ )
30
+ + f" || {doc.get('_global_index', '')} ||"
31
+ )
32
+ },
33
+
34
  "metadata_fields": [
35
  {
36
+ "name": "_global_index",
 
 
 
 
 
 
37
  "datatype": "int",
38
+ "label": "Verse Index",
39
+ "description": "Absolute verse index",
40
  "show_as_filter": True,
41
  "is_unique": True,
42
  },
43
+ {
44
+ "name": "chapter_name",
45
+ "datatype": "str",
46
+ "label": "Chapter",
47
+ "description": "Original chapter title",
48
+ "show_as_filter": True,
49
+ },
50
+ {
51
+ "name": "lyrics_sa",
52
+ "datatype": "str",
53
+ "label": "Sanskrit",
54
+ "description": "Verse text in Sanskrit",
55
+ },
56
  ],
57
+
58
+ "pdf_path": "./data/vishnupuranam.pdf",
59
+ "source": "",
60
+ "language": "san",
61
+ "example_labels": [],
62
+ "examples": [],
 
 
 
 
 
 
 
 
 
 
 
63
  "llm_hints": [],
64
+ "credits": {"art": [], "data": [], "audio": [], "video": []},
65
  }