vikramvasudevan commited on
Commit
f3f0477
·
verified ·
1 Parent(s): abda69a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app.py +18 -0
  2. graph_helper.py +73 -62
  3. sanatan_assistant.py +6 -6
app.py CHANGED
@@ -359,6 +359,24 @@ chatInterface = gr.ChatInterface(
359
  additional_inputs=[thread_id, debug_checkbox],
360
  chatbot=chatbot,
361
  css="""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  .spinner {
363
  display: inline-block;
364
  width: 1em;
 
359
  additional_inputs=[thread_id, debug_checkbox],
360
  chatbot=chatbot,
361
  css="""
362
+ table {
363
+ border-collapse: collapse;
364
+ width: 90%;
365
+ }
366
+
367
+ table, th, td {
368
+ border: 1px solid #ddd;
369
+ padding: 6px;
370
+ font-size: small;
371
+ }
372
+
373
+ td {
374
+ word-wrap: break-word;
375
+ white-space: pre-wrap; /* preserves line breaks but wraps long lines */
376
+ max-width: 300px; /* control width */
377
+ vertical-align: top;
378
+ }
379
+
380
  .spinner {
381
  display: inline-block;
382
  width: 1em;
graph_helper.py CHANGED
@@ -73,68 +73,79 @@ def generate_graph() -> CompiledStateGraph:
73
  SystemMessage(
74
  content=(
75
  """
76
- You are a response validator. Your job is to:
77
-
78
- 1. Fix encoding issues in native-language verses (e.g., Tamil, Sanskrit) **only** for garbled/malformed characters. Do NOT paraphrase, invent, or alter meaning.
79
-
80
- 2. Validate content authenticity:
81
- - Extract key entities from the original user query and from the assistant's response. Key entities include pasuram/verse numbers, azhwar names, prabandham names, divya desam names, deity/context (e.g., "Krishna playing flute"), etc.
82
- - Compare them literally (exact matches for numeric IDs such as pasuram numbers). If a required entity (e.g., pasuram number) does not match exactly, mark it INCORRECT.
83
- - List any missing, altered, or hallucinated entities.
84
-
85
- 3. Return the corrected assistant message **first**. After the corrected message, append a standalone HTML evaluation block that begins on its own line (i.e., include a blank line before the block). **Do not** wrap the evaluation block in Markdown fences. The evaluation block MUST be either:
86
-
87
- A. An HTML block using a proper HTML `<table>` (preferred). Use the following structure and styles:
88
-
89
- <div style="font-size:0.9em; opacity:0.5; margin-top:10px;">
90
- <hr>
91
- <strong>Original user query:</strong> ...<br/><br/>
92
- <strong>Entity comparison:</strong><br/>
93
- <table style="border-collapse:collapse; margin-top:6px;">
94
- <thead>
95
- <tr>
96
- <th style="border:1px solid #ddd; padding:6px; text-align:left;">Expected</th>
97
- <th style="border:1px solid #ddd; padding:6px; text-align:left;">Found</th>
98
- </tr>
99
- </thead>
100
- <tbody>
101
- <tr><td style="border:1px solid #ddd; padding:6px;">value</td><td style="border:1px solid #ddd; padding:6px;">value</td></tr>
102
- </tbody>
103
- </table>
104
- <br/>
105
- <strong>Verdict:</strong> ...<br/>
106
- <strong>Confidence:</strong> ...<br/>
107
- <strong>Badge:</strong> <span style="display:inline-block; padding:3px 8px; border-radius:6px; color:#fff; font-weight:600;">...</span><br/>
108
- <strong>Note:</strong> ...
109
- </div>
110
-
111
- B. **If** the environment strips `<table>` tags, fall back to an ASCII table inside a `<pre>` within the styled `<div>` (monospace + same opacity). Example fallback:
112
-
113
- <div style="font-size:0.9em; opacity:0.75; margin-top:10px; font-family:monospace;">
114
- <pre>
115
- Original user query: ...
116
-
117
- Entity comparison:
118
-
119
- | Expected | Found |
120
- |----------|-------|
121
- | 1145 | 696 |
122
-
123
- Verdict: Incorrect
124
- Confidence: 40% — The verse number does not match.
125
- Badge: 🔴
126
- Note: ...
127
- </pre>
128
- </div>
129
-
130
- 4. Always leave a blank line before the HTML `<div>` so renderers treat it as a separate block.
131
-
132
- 5. Be strict: if the user asked for pasuram 1145 but the assistant returned 696 (or any other number), mark the verdict **Incorrect** and set confidence under 50% (e.g., 30–45%) with a short justification.
133
-
134
- 6. If you fixed native text, explicitly list the fixes in the Note.
135
-
136
- Return only: (a) the corrected assistant message, and then (b) the standalone evaluation HTML block described above (no extra explanation).
137
-
 
 
 
 
 
 
 
 
 
 
 
138
  """
139
  )
140
  ),
 
73
  SystemMessage(
74
  content=(
75
  """
76
+ You are a strict validator for LLM responses to scripture queries.
77
+
78
+ Your tasks:
79
+ 0. Treat your input as `original_llm_response`.
80
+ 1. Compare the original user query to the LLM’s answer.
81
+ 2. Identify the scripture context (e.g., Divya Prabandham, Bhagavad Gita, Upanishads, Ramayana, etc.).
82
+ 3. Based on the scripture context, dynamically choose the appropriate entity columns for validation:
83
+ - **Divya Prabandham** → azhwar, prabandham, location/deity
84
+ - **Bhagavad Gita** → chapter, verse number(s), speaker, listener
85
+ - **Upanishads** section, mantra number, rishi, deity
86
+ - **Ramayana/Mahabharata** → book/kanda, section/sarga, character(s), location
87
+ - **Other** pick the 3–4 most relevant contextual entities from the scripture’s metadata.
88
+ 4. Verify (from `original_llm_response`):
89
+ - Correct verse number(s)
90
+ - Keyword/context match
91
+ - All scripture-specific entity fields
92
+ - Native verse text quality
93
+ 5. **Repair any garbled Tamil/Sanskrit characters** in the verse:
94
+ - Restore correct letters, diacritics, and punctuation.
95
+ - Replace broken Unicode with proper characters.
96
+ - Correct vowel signs, consonants, and pulli markers.
97
+ - Preserve original spacing and line breaks.
98
+ The repaired version is `fixed_llm_response`.
99
+ 6. Output in this exact order:
100
+ ---
101
+ <!-- **Step 1 Repaired LLM Response in Markdown:** -->
102
+ <!-- BEGIN_MARKDOWN -->
103
+ fixed_llm_response
104
+ <!-- END_MARKDOWN -->
105
+
106
+ <!-- **Step 2 – Validation Table:** -->
107
+ <div style="font-size: small; opacity: 0.6;">
108
+ <hr>
109
+ <b>Original user query:</b> {{original_user_query}}
110
+
111
+ <table border="1" cellpadding="4" cellspacing="0" style="border-collapse: collapse; width: 100%;">
112
+ <tr>
113
+ <th>Expected</th>
114
+ <th>Found</th>
115
+ <th>Match?</th>
116
+ </tr>
117
+ <tr>
118
+ <td>verse number(s)</td>
119
+ <td>{{found_verse_numbers}}</td>
120
+ <td>{{match_status_for_verse}}</td>
121
+ </tr>
122
+ <tr>
123
+ <td>keyword/context</td>
124
+ <td>{{found_keyword}}</td>
125
+ <td>{{match_status_for_keyword}}</td>
126
+ </tr>
127
+ {{dynamic_entity_rows}}
128
+ <tr>
129
+ <td>native verse text</td>
130
+ <td style="white-space: normal; word-break: break-word;">{{cleaned_native_text}}</td>
131
+ <td>{{garbled_fix_status}}</td>
132
+ </tr>
133
+ </table>
134
+
135
+ <p><b>Verdict:</b> {{Verdict}}<br>
136
+ <b>Confidence score:</b> {{Confidence}}% {{Justification}}<br>
137
+ <span style="background-color:{{badge_color_code}}; color:white; padding:2px 6px; border-radius:4px;">{{badge_emoji}}</span></p>
138
+ </div>
139
+
140
+ ---
141
+
142
+ Where:
143
+ - `{{dynamic_entity_rows}}` is context-specific entity rows.
144
+ - `{{cleaned_native_text}}` must be taken from the repaired `fixed_llm_response`.
145
+ - ✅, ❌, ⚠️ remain for matches.
146
+ - Hidden markers (`<!-- BEGIN_MARKDOWN -->`) make it parseable without showing literal marker text.
147
+
148
+
149
  """
150
  )
151
  ),
sanatan_assistant.py CHANGED
@@ -94,14 +94,14 @@ Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always cle
94
  return prompt
95
 
96
 
97
- def query(collection_name: allowedCollections, query: str, n_results=5):
98
  """
99
  Search a scripture collection.
100
 
101
  Parameters:
102
  - collection_name (str): The name of the scripture collection to search. ...
103
  - query (str): The search query.
104
- - n_results (int): Number of results to return. Default is 5.
105
 
106
  Returns:
107
  - A list of matching results.
@@ -126,7 +126,7 @@ def query_by_metadata_field(
126
  "$in", "$eq", "$gt", "$gte", "$lt", "$lte", "$ne", "$nin"
127
  ],
128
  metadata_value: Any,
129
- n_results=5,
130
  ):
131
  """
132
  Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.
@@ -137,7 +137,7 @@ def query_by_metadata_field(
137
  - metadata_field (str) : The name of the metadata field. e.g. azhwar_name
138
  - metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
139
  - metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
140
- - n_results (int): Number of results to return. Default is 5.
141
 
142
  Returns:
143
  - A list of matching results.
@@ -169,7 +169,7 @@ def query_by_metadata_field(
169
  def query_by_literal_text(
170
  collection_name: allowedCollections,
171
  literal_to_search_for: str,
172
- n_results=5,
173
  ):
174
  """
175
  Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.
@@ -177,7 +177,7 @@ def query_by_literal_text(
177
  Parameters:
178
  - collection_name (str): The name of the scripture collection to search. ...
179
  - literal_to_search_for (str): The search query.
180
- - n_results (int): Number of results to return. Default is 5.
181
 
182
  Returns:
183
  - A list of matching results.
 
94
  return prompt
95
 
96
 
97
+ def query(collection_name: allowedCollections, query: str, n_results=3):
98
  """
99
  Search a scripture collection.
100
 
101
  Parameters:
102
  - collection_name (str): The name of the scripture collection to search. ...
103
  - query (str): The search query.
104
+ - n_results (int): Number of results to return. Default is 3.
105
 
106
  Returns:
107
  - A list of matching results.
 
126
  "$in", "$eq", "$gt", "$gte", "$lt", "$lte", "$ne", "$nin"
127
  ],
128
  metadata_value: Any,
129
+ n_results=3,
130
  ):
131
  """
132
  Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.
 
137
  - metadata_field (str) : The name of the metadata field. e.g. azhwar_name
138
  - metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
139
  - metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
140
+ - n_results (int): Number of results to return. Default is 3.
141
 
142
  Returns:
143
  - A list of matching results.
 
169
  def query_by_literal_text(
170
  collection_name: allowedCollections,
171
  literal_to_search_for: str,
172
+ n_results=3,
173
  ):
174
  """
175
  Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.
 
177
  Parameters:
178
  - collection_name (str): The name of the scripture collection to search. ...
179
  - literal_to_search_for (str): The search query.
180
+ - n_results (int): Number of results to return. Default is 3.
181
 
182
  Returns:
183
  - A list of matching results.