File size: 6,941 Bytes
fd1b271
bbb5184
fd1b271
 
d434239
fd1b271
 
 
 
 
 
6a252fb
fd1b271
6a252fb
fd1b271
e8bcdf0
 
 
fd1b271
210796c
d023803
210796c
fd1b271
 
 
 
 
210796c
fd1b271
 
d023803
fd1b271
d023803
fd1b271
210796c
fd1b271
 
 
 
63d1774
fd1b271
 
06f448a
fd1b271
d4b0094
6a252fb
fd1b271
9879992
06f448a
fd1b271
9879992
d023803
 
9879992
d023803
 
63d1774
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d023803
2919500
 
 
fd1b271
9879992
2919500
 
 
fd1b271
9879992
fd1b271
 
 
2919500
 
 
 
 
 
fd1b271
 
 
 
 
 
 
 
 
 
210796c
fd1b271
 
 
 
 
a1180f7
845dddf
a1180f7
 
f3f0477
a1180f7
845dddf
 
a1180f7
845dddf
 
a1180f7
 
 
f3f0477
a1180f7
845dddf
 
 
 
a1180f7
 
 
 
 
 
 
 
 
 
 
d023803
a1180f7
 
 
 
 
d023803
 
 
a1180f7
845dddf
 
d434239
845dddf
a1180f7
c6893be
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import logging
from typing import Any, Literal
from dotenv import load_dotenv
from config import SanatanConfig
from db import MetadataWhereClause, SanatanDatabase

load_dotenv(override=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sanatanDatabase = SanatanDatabase()
sanatanConfig = SanatanConfig()
allowedCollections = Literal[
    *[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
]
allowedScriptureTitles = Literal[
    *[scripture["title"] for scripture in sanatanConfig.scriptures]
]

def format_scripture_answer(
    collection_name: allowedCollections, question: str, query_tool_output: str
):
    """
    Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.

    This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.

    The generated prompt will guide the assistant to respond using only that scriptureโ€™s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
    """

    prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**.

You must answer the question using **only** the content from *{collection_name}* provided in the context below.  
- Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.  
- Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.  
- Do **not** use verse numbers or line references unless clearly mentioned in the context.  

If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ€” but **explicitly mention that it is an interpretation**.

If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this."**

### ๐Ÿงพ Answer  
- Present a brief summary of your response in concise **English**.  

### ๐Ÿ•‰๏ธ Scripture  
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}

### ๐Ÿ•ฎ Chapter Title  
- Mention the chapter(s) from which the references were taken.  Use the field *title* here from the context if available. For example `TVM 1.8.3`

### ๐Ÿ•ฎ Verse Number
- Mention the *verse number* from which the references were taken.  

### ๐Ÿ”— Reference Link(s)
- Provide reference link(s) (`html_url`) if one is available in the context.

### ๐Ÿ“œ Native Verse(s) - Original
- Include the original native verses as-is

### ๐Ÿ“œ Native Verse(s) - Sanitized

- Task: Sanitize the native verses **without adding, removing, or inventing text**. Only fix obvious encoding or typographical errors.
- Sanitization rules:
  1. Correct garbled Unicode characters.
  2. Fix broken diacritics, pulli markers, vowel signs, and punctuation.
  3. Preserve **original spacing, line breaks, and character order**.
- Do not translate, transliterate, or interpret.
- Do not hallucinate or generate new verses.
- Output should only be the **cleaned, original verses**.
- The output in this section **MUST** be in native script not english or transliterated english.
> If you are unsure about a character, leave it as it is rather than guessing.


### ๐Ÿ“œ English Transliteration  
- For each verse above, provide the **matching English transliteration**.  
- Maintain the **same order** as the verses listed above.

### ๐Ÿ“œ English Translation  
- Provide the **English meaning** for each verse listed above.  
- Again, follow the **same order**.  
- Do **not** repeat the original verse here โ€” just the translation.

### ๐Ÿ“œ Notes  
- Bullet any extra points or cross-references from explanatory notes **only if present in the context**.  
- Do **not** include anything that is not supported or implied in the context.

โš ๏ธ Do **not duplicate content** across sections.  
- Each section has a distinct purpose.  
- If a verse is shown in `๐Ÿ“œ Supporting Verse(s)`, do **not** repeat it in the Translation section.  
- Only transliterations and meanings should appear in their respective sections.


**Question:**  
{question}

---

**Context:**  
{query_tool_output}

---

Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
"""

    return prompt


def query(
    collection_name: allowedCollections,
    query: str | None = None,
    metadata_where_clause: MetadataWhereClause | None = None,
    n_results=3,
    search_type: Literal["semantic", "literal", "random"] = "semantic",
):
    """
    Search a scripture collection.

    Parameters:
    - collection_name (str): The name of the scripture collection to search (use the exact name from the metadata configuration. ...
    - query (str): The search query - this is the semantic or literal query you want to search for. if you want to perform a random search or just want to search by metadata only, can be passed as None ..
    - metadata_where_clause: MetadataWhereClause - Set to None if no metadata filters are requested. Always set when user mentions a specific prabandham, azhwar, or any other known field from the configuration. Example: {\"prabandham_name\": \"Thiruvaimozhi\"}. use the `conditional_operator` to filter based on $and or $or conditions. use `groups` to combine multiple queries into one.
    - n_results (int): Number of results to return. Default is 3.
    - search_type: can be one of semantic, literal or random.

    Returns:
    - A list of matching results.
    """
    logger.info(
        "%s Search: collection [%s] for [%s] | metadata_where_clause=%s",
        search_type,
        collection_name,
        query,
        metadata_where_clause,
    )
    if search_type != "random" and metadata_where_clause is None and query is None:
        raise Exception(
            "Invalid input: when search type is not random, either metadata_where_clause or query should be provided"
        )
    try:
        if metadata_where_clause is not None:
            sanatanConfig.is_metadata_field_allowed(
                collection_name=collection_name,
                metadata_where_clause=metadata_where_clause,
            )
    except:
        raise

    response = sanatanDatabase.search(
        collection_name=collection_name,
        query=query,
        metadata_where_clause=metadata_where_clause,
        n_results=n_results,
        search_type=search_type,
    )

    return "\n\n".join(
        f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
        for doc, meta, id_ in zip(
            response["documents"], response["metadatas"], response["ids"]
        )
    )