Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files
config.py
CHANGED
|
@@ -431,7 +431,17 @@ class SanatanConfig:
|
|
| 431 |
"title": "Chathusloki by Sri Alavandar",
|
| 432 |
"output_dir": "./output/chathusloki",
|
| 433 |
"collection_name": "chathusloki",
|
| 434 |
-
"unit": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
"metadata_fields": [
|
| 436 |
{
|
| 437 |
"name": "sloka_number",
|
|
@@ -641,8 +651,12 @@ class SanatanConfig:
|
|
| 641 |
canonical_doc["language"] = config.get("language")
|
| 642 |
canonical_doc["unit"] = config.get("unit")
|
| 643 |
canonical_doc["document"] = document_text
|
| 644 |
-
if
|
|
|
|
|
|
|
|
|
|
| 645 |
canonical_doc["text"] = canonical_doc["document"]
|
|
|
|
| 646 |
canonical_doc["verse"] = resolve_field("verse")
|
| 647 |
|
| 648 |
return canonical_doc
|
|
|
|
| 431 |
"title": "Chathusloki by Sri Alavandar",
|
| 432 |
"output_dir": "./output/chathusloki",
|
| 433 |
"collection_name": "chathusloki",
|
| 434 |
+
"unit": "verse",
|
| 435 |
+
"unit_field" : "sloka_number",
|
| 436 |
+
"field_mapping": {
|
| 437 |
+
"text": "sloka_devanagari",
|
| 438 |
+
"title": lambda doc: f"Verse {doc.get('verse','')}",
|
| 439 |
+
"unit_index": "sloka_number",
|
| 440 |
+
"transliteration": "sloka_english_transliteration",
|
| 441 |
+
"word_by_word_native": "meaning",
|
| 442 |
+
"author": lambda doc: f"Sri Aalavandhaar",
|
| 443 |
+
"relative_path": lambda doc: f"Chathusloki-{doc.get("verse","")}",
|
| 444 |
+
},
|
| 445 |
"metadata_fields": [
|
| 446 |
{
|
| 447 |
"name": "sloka_number",
|
|
|
|
| 651 |
canonical_doc["language"] = config.get("language")
|
| 652 |
canonical_doc["unit"] = config.get("unit")
|
| 653 |
canonical_doc["document"] = document_text
|
| 654 |
+
if (
|
| 655 |
+
canonical_doc.get("text", "-") == "-"
|
| 656 |
+
or canonical_doc.get("text", None) is None
|
| 657 |
+
):
|
| 658 |
canonical_doc["text"] = canonical_doc["document"]
|
| 659 |
+
canonical_doc["document"] = "-"
|
| 660 |
canonical_doc["verse"] = resolve_field("verse")
|
| 661 |
|
| 662 |
return canonical_doc
|
db.py
CHANGED
|
@@ -129,10 +129,17 @@ class SanatanDatabase:
|
|
| 129 |
}
|
| 130 |
Or a dict with "error" key if something went wrong.
|
| 131 |
"""
|
| 132 |
-
logger.info("
|
| 133 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
| 134 |
|
| 135 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
response = collection.get(
|
| 137 |
limit=1,
|
| 138 |
# offset=index, # pagination via offset
|
|
|
|
| 129 |
}
|
| 130 |
Or a dict with "error" key if something went wrong.
|
| 131 |
"""
|
| 132 |
+
logger.info("fetching document #%d from [%s]", index, collection_name)
|
| 133 |
collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
| 134 |
|
| 135 |
try:
|
| 136 |
+
# show a sample data record
|
| 137 |
+
response = collection.get(
|
| 138 |
+
limit=1,
|
| 139 |
+
# offset=index, # pagination via offset
|
| 140 |
+
include=["metadatas", "documents"],
|
| 141 |
+
)
|
| 142 |
+
print(response)
|
| 143 |
response = collection.get(
|
| 144 |
limit=1,
|
| 145 |
# offset=index, # pagination via offset
|
server.py
CHANGED
|
@@ -214,7 +214,7 @@ async def get_scripture(req: ScriptureRequest):
|
|
| 214 |
raw_doc = SanatanDatabase().fetch_document_by_index(
|
| 215 |
collection_name=config["collection_name"],
|
| 216 |
index=req.unit_index,
|
| 217 |
-
unit_name=config
|
| 218 |
)
|
| 219 |
|
| 220 |
if not raw_doc or isinstance(raw_doc, str):
|
|
|
|
| 214 |
raw_doc = SanatanDatabase().fetch_document_by_index(
|
| 215 |
collection_name=config["collection_name"],
|
| 216 |
index=req.unit_index,
|
| 217 |
+
unit_name=config.get("unit_field", config.get("unit"))
|
| 218 |
)
|
| 219 |
|
| 220 |
if not raw_doc or isinstance(raw_doc, str):
|