Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files- .gitignore +4 -1
- copy_chromadb.py +96 -1
- db.py +20 -9
- modules/config/__init__.py +38 -2
- modules/config/agnipuranam.py +72 -0
- modules/config/bhavishyapuranam.py +65 -0
- modules/config/brahmandpuranam.py +65 -0
- modules/config/brahmapuranam.py +65 -0
- modules/config/brahmavaivarthapurana.py +65 -0
- modules/config/garudapuranam.py +65 -0
- modules/config/harivanshapuraanam.py +65 -0
- modules/config/kurmapuranam.py +65 -0
- modules/config/lingapuranam.py +65 -0
- modules/config/markandeypuranam.py +65 -0
- modules/config/matsyapuranam.py +65 -0
- modules/config/naradapuranam.py +65 -0
- modules/config/padmapuranam.py +65 -0
- modules/config/shivapuraanam.py +65 -0
- modules/config/skandapuranam.py +65 -0
- modules/config/vaamanapuraanam.py +65 -0
- modules/config/vaayupuraanam.py +65 -0
- modules/config/varahapuranam.py +65 -0
- modules/config/vishnu_puranam.py +52 -33
.gitignore
CHANGED
|
@@ -15,4 +15,7 @@ outputs/
|
|
| 15 |
chromadb-store_20251112.zip
|
| 16 |
chromadb-store_20251118.zip
|
| 17 |
chroma_exports/
|
| 18 |
-
chroma_exports.zip
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
chromadb-store_20251112.zip
|
| 16 |
chromadb-store_20251118.zip
|
| 17 |
chroma_exports/
|
| 18 |
+
chroma_exports.zip
|
| 19 |
+
chromadb-store-CORRUPTED/
|
| 20 |
+
chromadb-store_BLOATED.zip
|
| 21 |
+
chromadb-store_OLD.zip
|
copy_chromadb.py
CHANGED
|
@@ -102,7 +102,102 @@ db_config = {
|
|
| 102 |
"source_db_path": "../bhagavata_purana_ai/chromadb_store",
|
| 103 |
"source_collection_name": "bhagavata_purana",
|
| 104 |
"destination_collection_name": "bhagavata_purana",
|
| 105 |
-
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
}
|
| 107 |
|
| 108 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
|
|
|
| 102 |
"source_db_path": "../bhagavata_purana_ai/chromadb_store",
|
| 103 |
"source_collection_name": "bhagavata_purana",
|
| 104 |
"destination_collection_name": "bhagavata_purana",
|
| 105 |
+
},
|
| 106 |
+
"agnipuranam": {
|
| 107 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 108 |
+
"source_collection_name": "agnipuranam",
|
| 109 |
+
"destination_collection_name": "agnipuranam"
|
| 110 |
+
},
|
| 111 |
+
"bhavishyapuranam": {
|
| 112 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 113 |
+
"source_collection_name": "bhavishyapuranam",
|
| 114 |
+
"destination_collection_name": "bhavishyapuranam"
|
| 115 |
+
},
|
| 116 |
+
"brahmandpuranam": {
|
| 117 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 118 |
+
"source_collection_name": "brahmandpuranam",
|
| 119 |
+
"destination_collection_name": "brahmandpuranam"
|
| 120 |
+
},
|
| 121 |
+
"brahmapuranam": {
|
| 122 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 123 |
+
"source_collection_name": "brahmapuranam",
|
| 124 |
+
"destination_collection_name": "brahmapuranam"
|
| 125 |
+
},
|
| 126 |
+
"brahmavaivarthapurana": {
|
| 127 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 128 |
+
"source_collection_name": "brahmavaivarthapurana",
|
| 129 |
+
"destination_collection_name": "brahmavaivarthapurana"
|
| 130 |
+
},
|
| 131 |
+
"garudapuranam": {
|
| 132 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 133 |
+
"source_collection_name": "garudapuranam",
|
| 134 |
+
"destination_collection_name": "garudapuranam"
|
| 135 |
+
},
|
| 136 |
+
"harivanshapuraanam": {
|
| 137 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 138 |
+
"source_collection_name": "harivanshapuraanam",
|
| 139 |
+
"destination_collection_name": "harivanshapuraanam"
|
| 140 |
+
},
|
| 141 |
+
"kurmapuranam": {
|
| 142 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 143 |
+
"source_collection_name": "kurmapuranam",
|
| 144 |
+
"destination_collection_name": "kurmapuranam"
|
| 145 |
+
},
|
| 146 |
+
"lingapuranam": {
|
| 147 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 148 |
+
"source_collection_name": "lingapuranam",
|
| 149 |
+
"destination_collection_name": "lingapuranam"
|
| 150 |
+
},
|
| 151 |
+
"markandeypuranam": {
|
| 152 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 153 |
+
"source_collection_name": "markandeypuranam",
|
| 154 |
+
"destination_collection_name": "markandeypuranam"
|
| 155 |
+
},
|
| 156 |
+
"matsyapuranam": {
|
| 157 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 158 |
+
"source_collection_name": "matsyapuranam",
|
| 159 |
+
"destination_collection_name": "matsyapuranam"
|
| 160 |
+
},
|
| 161 |
+
"naradapuranam": {
|
| 162 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 163 |
+
"source_collection_name": "naradapuranam",
|
| 164 |
+
"destination_collection_name": "naradapuranam"
|
| 165 |
+
},
|
| 166 |
+
"padmapuranam": {
|
| 167 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 168 |
+
"source_collection_name": "padmapuranam",
|
| 169 |
+
"destination_collection_name": "padmapuranam"
|
| 170 |
+
},
|
| 171 |
+
"shivapuraanam": {
|
| 172 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 173 |
+
"source_collection_name": "shivapuraanam",
|
| 174 |
+
"destination_collection_name": "shivapuraanam"
|
| 175 |
+
},
|
| 176 |
+
"skandapuranam": {
|
| 177 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 178 |
+
"source_collection_name": "skandapuranam",
|
| 179 |
+
"destination_collection_name": "skandapuranam"
|
| 180 |
+
},
|
| 181 |
+
"vaamanapuraanam": {
|
| 182 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 183 |
+
"source_collection_name": "vaamanapuraanam",
|
| 184 |
+
"destination_collection_name": "vaamanapuraanam"
|
| 185 |
+
},
|
| 186 |
+
"vaayupuraanam": {
|
| 187 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 188 |
+
"source_collection_name": "vaayupuraanam",
|
| 189 |
+
"destination_collection_name": "vaayupuraanam"
|
| 190 |
+
},
|
| 191 |
+
"varahapuranam": {
|
| 192 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 193 |
+
"source_collection_name": "varahapuranam",
|
| 194 |
+
"destination_collection_name": "varahapuranam"
|
| 195 |
+
},
|
| 196 |
+
"vishnupuranam": {
|
| 197 |
+
"source_db_path": "../puranas_ai/chromadb_store",
|
| 198 |
+
"source_collection_name": "vishnupuranam",
|
| 199 |
+
"destination_collection_name": "vishnu_puranam_openai"
|
| 200 |
+
}
|
| 201 |
}
|
| 202 |
|
| 203 |
parser = argparse.ArgumentParser(description="My app with database parameter")
|
db.py
CHANGED
|
@@ -824,15 +824,26 @@ class SanatanDatabase:
|
|
| 824 |
)
|
| 825 |
|
| 826 |
# Fetch all records (keep embeddings for upsert)
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 836 |
|
| 837 |
ids = results["ids"]
|
| 838 |
metadatas = results["metadatas"]
|
|
|
|
| 824 |
)
|
| 825 |
|
| 826 |
# Fetch all records (keep embeddings for upsert)
|
| 827 |
+
MAX_RETRIES = 3
|
| 828 |
+
RETRY_DELAY = 5 # seconds
|
| 829 |
+
|
| 830 |
+
for attempt in range(1, MAX_RETRIES + 1):
|
| 831 |
+
try:
|
| 832 |
+
results = collection.get(include=["metadatas", "documents", "embeddings"])
|
| 833 |
+
break # success → exit loop
|
| 834 |
+
except Exception as e:
|
| 835 |
+
logger.error(
|
| 836 |
+
"build_global_index_for_all_scriptures:%s Error getting data from chromadb (attempt %s/%s)",
|
| 837 |
+
scripture_name,
|
| 838 |
+
attempt,
|
| 839 |
+
MAX_RETRIES,
|
| 840 |
+
exc_info=True,
|
| 841 |
+
)
|
| 842 |
+
|
| 843 |
+
if attempt == MAX_RETRIES:
|
| 844 |
+
# still failing after 3 attempts
|
| 845 |
+
return
|
| 846 |
+
time.sleep(RETRY_DELAY) # wait before retry
|
| 847 |
|
| 848 |
ids = results["ids"]
|
| 849 |
metadatas = results["metadatas"]
|
modules/config/__init__.py
CHANGED
|
@@ -1,26 +1,44 @@
|
|
|
|
|
| 1 |
from modules.config.bhagavat_gita import bhagavat_gita_config
|
| 2 |
from modules.config.bhagavata_purana import bhagavata_purana_config
|
|
|
|
| 3 |
from modules.config.brahma_sutra import brahma_sutra_config
|
|
|
|
|
|
|
|
|
|
| 4 |
from modules.config.chathusloki import chathusloki_config
|
| 5 |
from modules.config.desika_prabandham import desika_prabandham_config
|
| 6 |
from modules.config.divya_prabandham import divya_prabandham_config
|
| 7 |
from modules.config.divya_prabandham_taniyans import divya_prabandham_taniyans_config
|
|
|
|
|
|
|
| 8 |
from modules.config.kamba_ramayanam import kamba_ramayanam_config
|
| 9 |
from modules.config.kamba_ramayanam_en import kamba_ramayanam_en_config
|
| 10 |
from modules.config.katakam import katakam_config
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from modules.config.narayaneeyam import narayaneeyam_config
|
|
|
|
| 12 |
from modules.config.pancha_sooktham import pancha_sooktham_config
|
| 13 |
from modules.config.raghuveera_gadhyam import raghuveera_gadhyam_config
|
| 14 |
from modules.config.shanthi_panchakam import shanthi_panchakam_config
|
|
|
|
| 15 |
from modules.config.shukla_yajur_vedam import shukla_yajur_vedam_config
|
|
|
|
| 16 |
from modules.config.sri_stavam import sri_stavam_config
|
| 17 |
from modules.config.sri_vachana_bhushanam import sri_vachana_bhushanam_config
|
| 18 |
from modules.config.taitriya_aranyakam import taitriya_aranyakam_config
|
| 19 |
from modules.config.taitriya_brahmanam import taitriya_brahmanam_config
|
| 20 |
from modules.config.taitriya_samhitha import taitriya_samhitha_config
|
| 21 |
from modules.config.taitriya_upanishad import taitriya_upanishad_config
|
|
|
|
|
|
|
| 22 |
from modules.config.valmiki_ramayanam import valmiki_ramayanam_config
|
| 23 |
-
from modules.config.
|
|
|
|
| 24 |
from modules.config.vishnu_sahasranamam import vishnu_sahasranamam_config
|
| 25 |
from modules.config.yt_metadata import yt_metadata_config
|
| 26 |
|
|
@@ -43,7 +61,7 @@ scripture_configurations = [
|
|
| 43 |
taitriya_aranyakam_config,
|
| 44 |
taitriya_upanishad_config,
|
| 45 |
valmiki_ramayanam_config,
|
| 46 |
-
|
| 47 |
vishnu_sahasranamam_config,
|
| 48 |
yt_metadata_config,
|
| 49 |
brahma_sutra_config,
|
|
@@ -51,4 +69,22 @@ scripture_configurations = [
|
|
| 51 |
desika_prabandham_config,
|
| 52 |
raghuveera_gadhyam_config,
|
| 53 |
narayaneeyam_config,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
]
|
|
|
|
| 1 |
+
from modules.config.agnipuranam import agnipuranam_config
|
| 2 |
from modules.config.bhagavat_gita import bhagavat_gita_config
|
| 3 |
from modules.config.bhagavata_purana import bhagavata_purana_config
|
| 4 |
+
from modules.config.bhavishyapuranam import bhavishyapuranam_config
|
| 5 |
from modules.config.brahma_sutra import brahma_sutra_config
|
| 6 |
+
from modules.config.brahmandpuranam import brahmandpuranam_config
|
| 7 |
+
from modules.config.brahmapuranam import brahmapuranam_config
|
| 8 |
+
from modules.config.brahmavaivarthapurana import brahmavaivarthapurana_config
|
| 9 |
from modules.config.chathusloki import chathusloki_config
|
| 10 |
from modules.config.desika_prabandham import desika_prabandham_config
|
| 11 |
from modules.config.divya_prabandham import divya_prabandham_config
|
| 12 |
from modules.config.divya_prabandham_taniyans import divya_prabandham_taniyans_config
|
| 13 |
+
from modules.config.garudapuranam import garudapuranam_config
|
| 14 |
+
from modules.config.harivanshapuraanam import harivanshapuraanam_config
|
| 15 |
from modules.config.kamba_ramayanam import kamba_ramayanam_config
|
| 16 |
from modules.config.kamba_ramayanam_en import kamba_ramayanam_en_config
|
| 17 |
from modules.config.katakam import katakam_config
|
| 18 |
+
from modules.config.kurmapuranam import kurmapuranam_config
|
| 19 |
+
from modules.config.lingapuranam import lingapuranam_config
|
| 20 |
+
from modules.config.markandeypuranam import markandeypuranam_config
|
| 21 |
+
from modules.config.matsyapuranam import matsyapuranam_config
|
| 22 |
+
from modules.config.naradapuranam import naradapuranam_config
|
| 23 |
from modules.config.narayaneeyam import narayaneeyam_config
|
| 24 |
+
from modules.config.padmapuranam import padmapuranam_config
|
| 25 |
from modules.config.pancha_sooktham import pancha_sooktham_config
|
| 26 |
from modules.config.raghuveera_gadhyam import raghuveera_gadhyam_config
|
| 27 |
from modules.config.shanthi_panchakam import shanthi_panchakam_config
|
| 28 |
+
from modules.config.shivapuraanam import shivapuraanam_config
|
| 29 |
from modules.config.shukla_yajur_vedam import shukla_yajur_vedam_config
|
| 30 |
+
from modules.config.skandapuranam import skandapuranam_config
|
| 31 |
from modules.config.sri_stavam import sri_stavam_config
|
| 32 |
from modules.config.sri_vachana_bhushanam import sri_vachana_bhushanam_config
|
| 33 |
from modules.config.taitriya_aranyakam import taitriya_aranyakam_config
|
| 34 |
from modules.config.taitriya_brahmanam import taitriya_brahmanam_config
|
| 35 |
from modules.config.taitriya_samhitha import taitriya_samhitha_config
|
| 36 |
from modules.config.taitriya_upanishad import taitriya_upanishad_config
|
| 37 |
+
from modules.config.vaamanapuraanam import vaamanapuraanam_config
|
| 38 |
+
from modules.config.vaayupuraanam import vaayupuraanam_config
|
| 39 |
from modules.config.valmiki_ramayanam import valmiki_ramayanam_config
|
| 40 |
+
from modules.config.varahapuranam import varahapuranam_config
|
| 41 |
+
from modules.config.vishnu_puranam import vishnupuranam_config
|
| 42 |
from modules.config.vishnu_sahasranamam import vishnu_sahasranamam_config
|
| 43 |
from modules.config.yt_metadata import yt_metadata_config
|
| 44 |
|
|
|
|
| 61 |
taitriya_aranyakam_config,
|
| 62 |
taitriya_upanishad_config,
|
| 63 |
valmiki_ramayanam_config,
|
| 64 |
+
vishnupuranam_config,
|
| 65 |
vishnu_sahasranamam_config,
|
| 66 |
yt_metadata_config,
|
| 67 |
brahma_sutra_config,
|
|
|
|
| 69 |
desika_prabandham_config,
|
| 70 |
raghuveera_gadhyam_config,
|
| 71 |
narayaneeyam_config,
|
| 72 |
+
agnipuranam_config,
|
| 73 |
+
bhavishyapuranam_config,
|
| 74 |
+
brahmandpuranam_config,
|
| 75 |
+
brahmapuranam_config,
|
| 76 |
+
brahmavaivarthapurana_config,
|
| 77 |
+
garudapuranam_config,
|
| 78 |
+
harivanshapuraanam_config,
|
| 79 |
+
kurmapuranam_config,
|
| 80 |
+
lingapuranam_config,
|
| 81 |
+
markandeypuranam_config,
|
| 82 |
+
matsyapuranam_config,
|
| 83 |
+
naradapuranam_config,
|
| 84 |
+
padmapuranam_config,
|
| 85 |
+
shivapuraanam_config,
|
| 86 |
+
skandapuranam_config,
|
| 87 |
+
vaamanapuraanam_config,
|
| 88 |
+
vaayupuraanam_config,
|
| 89 |
+
varahapuranam_config,
|
| 90 |
]
|
modules/config/agnipuranam.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
agnipuranam_config = {
|
| 5 |
+
"name": "agnipuranam",
|
| 6 |
+
"title": "Agni Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct(
|
| 8 |
+
"https://drive.google.com/file/d/1YjOR3wLNBbNovKC01aUXHa1osbjgLYbw/view?usp=drive_link"
|
| 9 |
+
),
|
| 10 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 11 |
+
"output_dir": "./output/agnipuranam",
|
| 12 |
+
"collection_name": "agnipuranam",
|
| 13 |
+
"collection_embedding_fn": "openai",
|
| 14 |
+
"unit": "sloka",
|
| 15 |
+
"unit_field": "_global_index",
|
| 16 |
+
|
| 17 |
+
"field_mapping": {
|
| 18 |
+
"text": "lyrics_sa",
|
| 19 |
+
"chapter_name": "chapter_name",
|
| 20 |
+
"unit_index": "_global_index",
|
| 21 |
+
"relative_path": lambda doc: (
|
| 22 |
+
" | ".join(
|
| 23 |
+
[
|
| 24 |
+
doc[key]
|
| 25 |
+
for key in sorted(
|
| 26 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 27 |
+
key=lambda x: int(x.split("_")[1])
|
| 28 |
+
)
|
| 29 |
+
]
|
| 30 |
+
)
|
| 31 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 32 |
+
)
|
| 33 |
+
},
|
| 34 |
+
|
| 35 |
+
"metadata_fields": [
|
| 36 |
+
{
|
| 37 |
+
"name": "_global_index",
|
| 38 |
+
"datatype": "int",
|
| 39 |
+
"label": "Verse Index",
|
| 40 |
+
"description": "Absolute verse index",
|
| 41 |
+
"show_as_filter": True,
|
| 42 |
+
"is_unique": True,
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"name": "chapter_name",
|
| 46 |
+
"datatype": "str",
|
| 47 |
+
"label": "Chapter",
|
| 48 |
+
"description": "Original chapter title",
|
| 49 |
+
"show_as_filter": True,
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"name": "lyrics_sa",
|
| 53 |
+
"datatype": "str",
|
| 54 |
+
"label": "Sanskrit",
|
| 55 |
+
"description": "Verse text in Sanskrit",
|
| 56 |
+
},
|
| 57 |
+
],
|
| 58 |
+
|
| 59 |
+
"pdf_path": "./data/agnipuranam.pdf",
|
| 60 |
+
"source": "",
|
| 61 |
+
"language": "san",
|
| 62 |
+
"example_labels": [],
|
| 63 |
+
"examples": [],
|
| 64 |
+
"llm_hints": [],
|
| 65 |
+
"credits": {"art": [], "data": [
|
| 66 |
+
{
|
| 67 |
+
"name": "Sanskritam",
|
| 68 |
+
"url": "https://www.sanskritam.world/puranas",
|
| 69 |
+
"role": "Data provider",
|
| 70 |
+
},
|
| 71 |
+
], "audio": [], "video": []},
|
| 72 |
+
}
|
modules/config/bhavishyapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
bhavishyapuranam_config = {
|
| 5 |
+
"name": "bhavishyapuranam",
|
| 6 |
+
"title": "Bhavishya Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1ENd6Es_Ng7mj5ofoqmT2UGCbJM8ZFFVl/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/bhavishyapuranam",
|
| 10 |
+
"collection_name": "bhavishyapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/bhavishyapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/brahmandpuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
brahmandpuranam_config = {
|
| 5 |
+
"name": "brahmandpuranam",
|
| 6 |
+
"title": "Brahmanda Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1r0fV1AqASV0LsipTOPzJ7hmgswT5hNBZ/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/brahmandpuranam",
|
| 10 |
+
"collection_name": "brahmandpuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/brahmandpuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/brahmapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
brahmapuranam_config = {
|
| 5 |
+
"name": "brahmapuranam",
|
| 6 |
+
"title": "Brahma Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1CQn25LkeekY9c592tnzo8Msv0Ib8XiiE/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/brahmapuranam",
|
| 10 |
+
"collection_name": "brahmapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/brahmapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/brahmavaivarthapurana.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
brahmavaivarthapurana_config = {
|
| 5 |
+
"name": "brahmavaivarthapurana",
|
| 6 |
+
"title": "Brahma Vaivarta Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1SRLUvGvJaQ3SUaoEQMgy0ACTPGh1Sw6S/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/brahmavaivarthapurana",
|
| 10 |
+
"collection_name": "brahmavaivarthapurana",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/brahmavaivarthapurana.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/garudapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
garudapuranam_config = {
|
| 5 |
+
"name": "garudapuranam",
|
| 6 |
+
"title": "Garuda Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1jhGecKwco5RzGQzSVa8PiuFKJ26LHJ0p/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/garudapuranam",
|
| 10 |
+
"collection_name": "garudapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/garudapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/harivanshapuraanam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
harivanshapuraanam_config = {
|
| 5 |
+
"name": "harivanshapuraanam",
|
| 6 |
+
"title": "Harivansha Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1uWkzBJWin19aS7_TE545E5cwOXGpTPrx/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/harivanshapuraanam",
|
| 10 |
+
"collection_name": "harivanshapuraanam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/harivanshapuraanam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/kurmapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
kurmapuranam_config = {
|
| 5 |
+
"name": "kurmapuranam",
|
| 6 |
+
"title": "Kurma Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1Y1epvVgGzyKlWSFEI2ep8bNwT6G7G6II/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/kurmapuranam",
|
| 10 |
+
"collection_name": "kurmapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/kurmapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/lingapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
lingapuranam_config = {
|
| 5 |
+
"name": "lingapuranam",
|
| 6 |
+
"title": "Linga Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/12Jg8WOnwwp2dCvFLKf2UN1LrFc4bpK7Q/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/lingapuranam",
|
| 10 |
+
"collection_name": "lingapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/lingapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/markandeypuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
markandeypuranam_config = {
|
| 5 |
+
"name": "markandeypuranam",
|
| 6 |
+
"title": "Markandeya Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/101ibf90vyBpKdcMQCvooQ30JEBL2XMlm/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/markandeypuranam",
|
| 10 |
+
"collection_name": "markandeypuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/markandeypuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/matsyapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
matsyapuranam_config = {
|
| 5 |
+
"name": "matsyapuranam",
|
| 6 |
+
"title": "Matsya Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1DxlpvcRqHGTFsE0tDgxThzUy-F0gmlMb/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/matsyapuranam",
|
| 10 |
+
"collection_name": "matsyapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/matsyapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/naradapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
naradapuranam_config = {
|
| 5 |
+
"name": "naradapuranam",
|
| 6 |
+
"title": "Narada Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1M-qDqwo9iCweONGDzhqUyjYcjJFgWpRd/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/naradapuranam",
|
| 10 |
+
"collection_name": "naradapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/naradapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/padmapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
padmapuranam_config = {
|
| 5 |
+
"name": "padmapuranam",
|
| 6 |
+
"title": "Padma Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1BC5tIabDwAFw4-UmXdZcae3FrI7vR3ek/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/padmapuranam",
|
| 10 |
+
"collection_name": "padmapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/padmapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/shivapuraanam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
shivapuraanam_config = {
|
| 5 |
+
"name": "shivapuraanam",
|
| 6 |
+
"title": "Shiva Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1kgLfcskt00H_lkJtSgf8FDyJqiikl3Eu/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/shivapuraanam",
|
| 10 |
+
"collection_name": "shivapuraanam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/shivapuraanam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/skandapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
skandapuranam_config = {
|
| 5 |
+
"name": "skandapuranam",
|
| 6 |
+
"title": "Skanda Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1J-yM-vFJFye0yV2oOsUAo23yv7PBfy2G/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/skandapuranam",
|
| 10 |
+
"collection_name": "skandapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/skandapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/vaamanapuraanam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
vaamanapuraanam_config = {
|
| 5 |
+
"name": "vaamanapuraanam",
|
| 6 |
+
"title": "Vaamana Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1xnRARh9wnfQ7oy7GxSr7-1mKvRj8Iq64/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/vaamanapuraanam",
|
| 10 |
+
"collection_name": "vaamanapuraanam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/vaamanapuraanam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/vaayupuraanam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
vaayupuraanam_config = {
|
| 5 |
+
"name": "vaayupuraanam",
|
| 6 |
+
"title": "Vaayu Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1nQqZKmGdLnFxSDcqsaq7PP5na1SZQ23h/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/vaayupuraanam",
|
| 10 |
+
"collection_name": "vaayupuraanam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/vaayupuraanam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/varahapuranam.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
+
|
| 4 |
+
varahapuranam_config = {
|
| 5 |
+
"name": "varahapuranam",
|
| 6 |
+
"title": "Varaaha Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1jAZe95sUSH5iuNw2wdQKW7cLcf37ArgK/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/varahapuranam",
|
| 10 |
+
"collection_name": "varahapuranam",
|
| 11 |
+
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
+
"metadata_fields": [
|
| 35 |
+
{
|
| 36 |
+
"name": "_global_index",
|
| 37 |
+
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
+
"show_as_filter": True,
|
| 41 |
+
"is_unique": True,
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
+
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/varahapuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
| 63 |
+
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
+
}
|
modules/config/vishnu_puranam.py
CHANGED
|
@@ -1,46 +1,65 @@
|
|
| 1 |
from modules.config.categories import ScriptureCategoryConstants
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"category"
|
| 8 |
-
"output_dir": "./output/
|
| 9 |
"collection_name": "vishnu_puranam_openai",
|
| 10 |
"collection_embedding_fn": "openai",
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"metadata_fields": [
|
| 13 |
{
|
| 14 |
-
"name": "
|
| 15 |
-
"label": "File Name",
|
| 16 |
-
"datatype": "str",
|
| 17 |
-
"description": "name of the file from which the information was extracted",
|
| 18 |
-
},
|
| 19 |
-
{
|
| 20 |
-
"name": "page",
|
| 21 |
"datatype": "int",
|
| 22 |
-
"label": "
|
| 23 |
-
"description": "
|
| 24 |
"show_as_filter": True,
|
| 25 |
"is_unique": True,
|
| 26 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
],
|
| 28 |
-
|
| 29 |
-
"
|
| 30 |
-
"
|
| 31 |
-
"
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
"About Garuda",
|
| 35 |
-
"Weapons of Vishnu",
|
| 36 |
-
"Vishnu's form (all scriptures)",
|
| 37 |
-
],
|
| 38 |
-
"examples": [
|
| 39 |
-
"describe Vishnu's form as defined in vishnu puranam",
|
| 40 |
-
"five elements and their significance as per vishnu puranam",
|
| 41 |
-
"What is the significance of Garuda? Show some verses from vishnu puranam that describe him.",
|
| 42 |
-
"What weapons does Vishnu hold as mentioned in vishnu puranam?",
|
| 43 |
-
"How is the form of Vishnu described across the scriptures?",
|
| 44 |
-
],
|
| 45 |
"llm_hints": [],
|
|
|
|
| 46 |
}
|
|
|
|
| 1 |
from modules.config.categories import ScriptureCategoryConstants
|
| 2 |
+
from modules.google_drive.google_drive_utils import convert_drive_url_to_direct
|
| 3 |
|
| 4 |
+
vishnupuranam_config = {
|
| 5 |
+
"name": "vishnupuranam",
|
| 6 |
+
"title": "Vishnu Puranam",
|
| 7 |
+
"banner_url": convert_drive_url_to_direct("https://drive.google.com/file/d/1ny6M0c6jfCLtYRaROffb1erRc3u8WB2u/view?usp=drive_link"),
|
| 8 |
+
"category": ScriptureCategoryConstants.PURANAM,
|
| 9 |
+
"output_dir": "./output/vishnupuranam",
|
| 10 |
"collection_name": "vishnu_puranam_openai",
|
| 11 |
"collection_embedding_fn": "openai",
|
| 12 |
+
|
| 13 |
+
"unit": "sloka",
|
| 14 |
+
"unit_field": "_global_index",
|
| 15 |
+
|
| 16 |
+
"field_mapping": {
|
| 17 |
+
"text": "lyrics_sa",
|
| 18 |
+
"chapter_name": "chapter_name",
|
| 19 |
+
"unit_index": "_global_index",
|
| 20 |
+
"relative_path": lambda doc: (
|
| 21 |
+
" | ".join(
|
| 22 |
+
[
|
| 23 |
+
doc[key]
|
| 24 |
+
for key in sorted(
|
| 25 |
+
[k for k in doc.keys() if k.startswith("title_") and doc[k]],
|
| 26 |
+
key=lambda x: int(x.split("_")[1])
|
| 27 |
+
)
|
| 28 |
+
]
|
| 29 |
+
)
|
| 30 |
+
+ f" || {doc.get('_global_index', '')} ||"
|
| 31 |
+
)
|
| 32 |
+
},
|
| 33 |
+
|
| 34 |
"metadata_fields": [
|
| 35 |
{
|
| 36 |
+
"name": "_global_index",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"datatype": "int",
|
| 38 |
+
"label": "Verse Index",
|
| 39 |
+
"description": "Absolute verse index",
|
| 40 |
"show_as_filter": True,
|
| 41 |
"is_unique": True,
|
| 42 |
},
|
| 43 |
+
{
|
| 44 |
+
"name": "chapter_name",
|
| 45 |
+
"datatype": "str",
|
| 46 |
+
"label": "Chapter",
|
| 47 |
+
"description": "Original chapter title",
|
| 48 |
+
"show_as_filter": True,
|
| 49 |
+
},
|
| 50 |
+
{
|
| 51 |
+
"name": "lyrics_sa",
|
| 52 |
+
"datatype": "str",
|
| 53 |
+
"label": "Sanskrit",
|
| 54 |
+
"description": "Verse text in Sanskrit",
|
| 55 |
+
},
|
| 56 |
],
|
| 57 |
+
|
| 58 |
+
"pdf_path": "./data/vishnupuranam.pdf",
|
| 59 |
+
"source": "",
|
| 60 |
+
"language": "san",
|
| 61 |
+
"example_labels": [],
|
| 62 |
+
"examples": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
"llm_hints": [],
|
| 64 |
+
"credits": {"art": [], "data": [], "audio": [], "video": []},
|
| 65 |
}
|