vikramvasudevan commited on
Commit
730c13e
·
verified ·
1 Parent(s): faece1b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.py +16 -2
  2. db.py +8 -1
  3. server.py +1 -1
config.py CHANGED
@@ -431,7 +431,17 @@ class SanatanConfig:
431
  "title": "Chathusloki by Sri Alavandar",
432
  "output_dir": "./output/chathusloki",
433
  "collection_name": "chathusloki",
434
- "unit": "slokam",
 
 
 
 
 
 
 
 
 
 
435
  "metadata_fields": [
436
  {
437
  "name": "sloka_number",
@@ -641,8 +651,12 @@ class SanatanConfig:
641
  canonical_doc["language"] = config.get("language")
642
  canonical_doc["unit"] = config.get("unit")
643
  canonical_doc["document"] = document_text
644
- if canonical_doc["text"] == "-" or canonical_doc["text"] is None:
 
 
 
645
  canonical_doc["text"] = canonical_doc["document"]
 
646
  canonical_doc["verse"] = resolve_field("verse")
647
 
648
  return canonical_doc
 
431
  "title": "Chathusloki by Sri Alavandar",
432
  "output_dir": "./output/chathusloki",
433
  "collection_name": "chathusloki",
434
+ "unit": "verse",
435
+ "unit_field" : "sloka_number",
436
+ "field_mapping": {
437
+ "text": "sloka_devanagari",
438
+ "title": lambda doc: f"Verse {doc.get('verse','')}",
439
+ "unit_index": "sloka_number",
440
+ "transliteration": "sloka_english_transliteration",
441
+ "word_by_word_native": "meaning",
442
+ "author": lambda doc: f"Sri Aalavandhaar",
443
+ "relative_path": lambda doc: f"Chathusloki-{doc.get("verse","")}",
444
+ },
445
  "metadata_fields": [
446
  {
447
  "name": "sloka_number",
 
651
  canonical_doc["language"] = config.get("language")
652
  canonical_doc["unit"] = config.get("unit")
653
  canonical_doc["document"] = document_text
654
+ if (
655
+ canonical_doc.get("text", "-") == "-"
656
+ or canonical_doc.get("text", None) is None
657
+ ):
658
  canonical_doc["text"] = canonical_doc["document"]
659
+ canonical_doc["document"] = "-"
660
  canonical_doc["verse"] = resolve_field("verse")
661
 
662
  return canonical_doc
db.py CHANGED
@@ -129,10 +129,17 @@ class SanatanDatabase:
129
  }
130
  Or a dict with "error" key if something went wrong.
131
  """
132
- logger.info("Fetch document #%d from [%s]", index, collection_name)
133
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
134
 
135
  try:
 
 
 
 
 
 
 
136
  response = collection.get(
137
  limit=1,
138
  # offset=index, # pagination via offset
 
129
  }
130
  Or a dict with "error" key if something went wrong.
131
  """
132
+ logger.info("fetching document #%d from [%s]", index, collection_name)
133
  collection = self.chroma_client.get_or_create_collection(name=collection_name)
134
 
135
  try:
136
+ # show a sample data record
137
+ response = collection.get(
138
+ limit=1,
139
+ # offset=index, # pagination via offset
140
+ include=["metadatas", "documents"],
141
+ )
142
+ print(response)
143
  response = collection.get(
144
  limit=1,
145
  # offset=index, # pagination via offset
server.py CHANGED
@@ -214,7 +214,7 @@ async def get_scripture(req: ScriptureRequest):
214
  raw_doc = SanatanDatabase().fetch_document_by_index(
215
  collection_name=config["collection_name"],
216
  index=req.unit_index,
217
- unit_name=config["unit"]
218
  )
219
 
220
  if not raw_doc or isinstance(raw_doc, str):
 
214
  raw_doc = SanatanDatabase().fetch_document_by_index(
215
  collection_name=config["collection_name"],
216
  index=req.unit_index,
217
+ unit_name=config.get("unit_field", config.get("unit"))
218
  )
219
 
220
  if not raw_doc or isinstance(raw_doc, str):