Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,451 Bytes
8a142a6 d9de1e9 8a142a6 347797e d9de1e9 8a142a6 347797e 8a142a6 347797e 8a142a6 347797e 8a142a6 347797e 8a142a6 347797e 8a142a6 347797e d9de1e9 8a142a6 d9de1e9 8a142a6 347797e 8a142a6 d9de1e9 8a142a6 347797e 8a142a6 d9de1e9 8a142a6 347797e 8a142a6 347797e 8a142a6 347797e 8a142a6 d9de1e9 8a142a6 d9de1e9 8a142a6 d9de1e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import re
import html
import json
def clean_text(text):
"""Clean text with common issues like HTML entities and escaped quotes."""
if not text or not isinstance(text, str):
return text
# Fix incomplete HTML entities
incomplete_entities = {''': ''', '"': '"', '<': '<', '>': '>', '&': '&'}
for incomplete, complete in incomplete_entities.items():
text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
# Convert HTML entities to characters
try:
text = html.unescape(text)
except Exception:
pass
# Handle escaped quotes and other special characters
replacements = {
r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
# Also normalize fancy quotes
'"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
}
for pattern, replacement in replacements.items():
text = text.replace(pattern, replacement)
# Remove trailing backslash if present
if text.rstrip().endswith('\\'):
text = text.rstrip().rstrip('\\')
return text
def get_context_html(example, show_full=False):
"""Format context chunks into HTML for display."""
html_output = ""
# Process insufficient context warning if needed
if example.get("insufficient", False):
insufficient_reason = example.get("insufficient_reason", "")
reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
html_output += f"""
<div class="insufficient-alert">
<strong>
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
<path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
<line x1="12" y1="9" x2="12" y2="13"></line>
<line x1="12" y1="17" x2="12.01" y2="17"></line>
</svg>
Insufficient Context
</strong>
{reason_html}
</div>
"""
html_output += '<div class="context-items-container">'
# Display full contexts or highlighted contexts based on toggle
if show_full:
# Show full context - directly use the strings from the list in full_contexts
if "full_contexts" in example and example["full_contexts"]:
for context_item in example["full_contexts"]:
if isinstance(context_item, dict) and 'content' in context_item:
content = context_item.get('content', '')
elif isinstance(context_item, str):
content = context_item
else:
content = str(context_item)
# Escape HTML entities for safe display
escaped_content = html.escape(content)
# Create the context item box - no headers
html_output += f'<div class="context-item">{escaped_content}</div>'
else:
# Show highlighted contexts
if "contexts" in example and example["contexts"]:
for context_item in example["contexts"]:
if isinstance(context_item, dict):
content = context_item.get('content', '')
is_primary = context_item.get('is_primary', False)
# Extra class for primary context styling
extra_class = " primary-context" if is_primary else ""
# Use content directly as it already has HTML highlighting
html_output += f'<div class="context-item{extra_class}">{content}</div>'
elif isinstance(context_item, str):
# For direct string contexts
html_output += f'<div class="context-item">{context_item}</div>'
else:
html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'
html_output += '</div>'
return html_output |