SLM-RAG-Arena

Running on Zero

File size: 4,451 Bytes

import re
import html
import json

def clean_text(text):
    """Clean text with common issues like HTML entities and escaped quotes."""
    if not text or not isinstance(text, str):
        return text
    
    # Fix incomplete HTML entities
    incomplete_entities = {'&#x27': '&#x27;', '&quot': '&quot;', '&lt': '&lt;', '&gt': '&gt;', '&amp': '&amp;'}
    for incomplete, complete in incomplete_entities.items():
        text = re.sub(f"{re.escape(incomplete)}(?!;)", complete, text)
    
    # Convert HTML entities to characters
    try:
        text = html.unescape(text)
    except Exception:
        pass
    
    # Handle escaped quotes and other special characters
    replacements = {
        r'\"': '"', r"\'": "'", r"\n": "\n", r"\t": "\t", r"\\": "\\",
        # Also normalize fancy quotes
        '"': '"', '"': '"', ''': "'", ''': "'", '`': "'", '´': "'"
    }
    for pattern, replacement in replacements.items():
        text = text.replace(pattern, replacement)
    
    # Remove trailing backslash if present
    if text.rstrip().endswith('\\'):
        text = text.rstrip().rstrip('\\')
    
    return text

def get_context_html(example, show_full=False):
    """Format context chunks into HTML for display."""
    html_output = ""

    # Process insufficient context warning if needed
    if example.get("insufficient", False):
        insufficient_reason = example.get("insufficient_reason", "")
        reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
        
        html_output += f"""
        <div class="insufficient-alert">
            <strong>
                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
                    <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
                    <line x1="12" y1="9" x2="12" y2="13"></line>
                    <line x1="12" y1="17" x2="12.01" y2="17"></line>
                </svg>
                Insufficient Context
            </strong>
            {reason_html}
        </div>
        """

    html_output += '<div class="context-items-container">'
    
    # Display full contexts or highlighted contexts based on toggle
    if show_full:
        # Show full context - directly use the strings from the list in full_contexts
        if "full_contexts" in example and example["full_contexts"]:
            for context_item in example["full_contexts"]:
                if isinstance(context_item, dict) and 'content' in context_item:
                    content = context_item.get('content', '')
                elif isinstance(context_item, str):
                    content = context_item
                else:
                    content = str(context_item)
                    
                # Escape HTML entities for safe display
                escaped_content = html.escape(content)
                
                # Create the context item box - no headers
                html_output += f'<div class="context-item">{escaped_content}</div>'
    else:
        # Show highlighted contexts
        if "contexts" in example and example["contexts"]:
            for context_item in example["contexts"]:
                if isinstance(context_item, dict):
                    content = context_item.get('content', '')
                    is_primary = context_item.get('is_primary', False)
                    
                    # Extra class for primary context styling
                    extra_class = " primary-context" if is_primary else ""
                    
                    # Use content directly as it already has HTML highlighting
                    html_output += f'<div class="context-item{extra_class}">{content}</div>'
                elif isinstance(context_item, str):
                    # For direct string contexts
                    html_output += f'<div class="context-item">{context_item}</div>'
        else:
            html_output += '<div class="context-item">No context available. Try toggling to full context view.</div>'
    
    html_output += '</div>'

    return html_output