Spaces:
Running
on
Zero
Running
on
Zero
import re | |
import html | |
import json | |
def clean_text(text): | |
"""Clean text with common issues like HTML entities and escaped quotes.""" | |
if not text or not isinstance(text, str): | |
return text | |
# Fix incomplete HTML entities | |
incomplete_entities = {''': ''', '"': '"', '<': '<', '>': '>', '&': '&'} | |
for incomplete, complete in incomplete_entities.items(): | |
text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text) | |
# Convert HTML entities to characters | |
try: | |
text = html.unescape(text) | |
except Exception: | |
pass | |
# Handle escaped quotes and other special characters | |
replacements = { | |
r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\", | |
# Also normalize fancy quotes | |
'"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'" | |
} | |
for pattern, replacement in replacements.items(): | |
text = text.replace(pattern, replacement) | |
# Remove trailing backslash if present | |
if text.rstrip().endswith('\\'): | |
text = text.rstrip().rstrip('\\') | |
return text | |
def get_context_html(example, show_full=False): | |
"""Format context chunks into HTML for display.""" | |
html_output = "" | |
# Process insufficient context warning if needed | |
if example.get("insufficient", False): | |
insufficient_reason = example.get("insufficient_reason", "") | |
reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>" | |
html_output += f""" | |
<div class="insufficient-alert"> | |
<strong> | |
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;"> | |
<path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path> | |
<line x1="12" y1="9" x2="12" y2="13"></line> | |
<line x1="12" y1="17" x2="12.01" y2="17"></line> | |
</svg> | |
Insufficient Context | |
</strong> | |
{reason_html} | |
</div> | |
""" | |
html_output += '<div class="context-items-container">' | |
# Display full contexts or highlighted contexts based on toggle | |
if show_full: | |
# Show full context - directly use the strings from the list in full_contexts | |
if "full_contexts" in example and example["full_contexts"]: | |
for context_item in example["full_contexts"]: | |
if isinstance(context_item, dict) and 'content' in context_item: | |
content = context_item.get('content', '') | |
elif isinstance(context_item, str): | |
content = context_item | |
else: | |
content = str(context_item) | |
# Escape HTML entities for safe display | |
escaped_content = html.escape(content) | |
# Create the context item box - no headers | |
html_output += f'<div class="context-item">{escaped_content}</div>' | |
else: | |
# Show highlighted contexts | |
if "contexts" in example and example["contexts"]: | |
for context_item in example["contexts"]: | |
if isinstance(context_item, dict): | |
content = context_item.get('content', '') | |
is_primary = context_item.get('is_primary', False) | |
# Extra class for primary context styling | |
extra_class = " primary-context" if is_primary else "" | |
# Use content directly as it already has HTML highlighting | |
html_output += f'<div class="context-item{extra_class}">{content}</div>' | |
elif isinstance(context_item, str): | |
# For direct string contexts | |
html_output += f'<div class="context-item">{context_item}</div>' | |
else: | |
html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>' | |
html_output += '</div>' | |
return html_output |