File size: 8,547 Bytes
8a142a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import re
import json

def debug_text(text, label="Text"):
    """Helper function to debug text processing issues"""
    print(f"\n--- DEBUG {label} ---")
    print(f"Length: {len(text)}")
    print(f"First 100 chars: {text[:100]}")
    print(f"Contains highlight_start: {'[[highlight_start]]' in text}")
    print(f"Contains start_highlight: {'[[start_highlight]]' in text}")
    print("-------------------------\n")

def clean_json_text(text):
    """
    Handle text that came from JSON and might have JSON escaping.
    This handles the case of text like: "the sky isn\\'t falling"
    """
    # First attempt to clean JSON-style escapes
    try:
        # Try to treat the string as if it were a JSON string
        if '\\' in text:
            # Create a valid JSON string with the text as content
            json_str = json.dumps({"text": text})
            # Parse it back to get properly unescaped text
            parsed = json.loads(json_str)
            return parsed["text"]
    except Exception:
        # If that fails, continue with the original text
        pass
    
    return text

def process_highlights(text):
    """
    Process highlight markers in text to create HTML highlighted text.
    Handles both standard format and alternative format.
    Also properly handles escaped quotes.
    """
    # Debug info
    # debug_text(text, "Before processing")
    
    # Clean JSON escaping
    text = clean_json_text(text)
    
    # Process highlight tags
    pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]'
    replacement = r'<span class="highlight">\1</span>'
    highlighted_text = re.sub(pattern1, replacement, text)
    
    pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]'
    highlighted_text = re.sub(pattern2, replacement, highlighted_text)
    
    # Debug info
    # debug_text(highlighted_text, "After processing")
    
    return highlighted_text

def process_table_with_highlights(markdown_table):
    """
    Special function to process markdown tables with highlights.
    Ensures the table structure is preserved while applying highlights.
    """
    # First, split the table into lines
    lines = markdown_table.strip().split('\n')
    processed_lines = []
    
    for line in lines:
        # Process highlights in each line
        processed_line = process_highlights(line)
        processed_lines.append(processed_line)
    
    return convert_markdown_table_to_html('\n'.join(processed_lines))

def convert_markdown_table_to_html(markdown_text):
    """
    Converts a markdown table to an HTML table.
    """
    # Clean JSON escaping
    markdown_text = clean_json_text(markdown_text)
    
    lines = markdown_text.strip().split('\n')
    table_lines = [line for line in lines if line.strip().startswith('|')]
    
    if len(table_lines) < 2:  # Need at least header and separator
        return markdown_text  # Return original if not a proper table
    
    html = '<table class="md-table">'
    
    # Check if we have a header row
    if len(table_lines) >= 2 and '---' in table_lines[1]:
        # Process header
        header_cells = table_lines[0].split('|')[1:-1] if table_lines[0].strip().endswith('|') else table_lines[0].split('|')[1:]
        html += '<thead><tr>'
        for cell in header_cells:
            # Process highlights in the cell
            processed_cell = process_highlights(cell.strip())
            html += f'<th>{processed_cell}</th>'
        html += '</tr></thead>'
        
        # Process data rows (skip the separator row at index 1)
        html += '<tbody>'
        for line in table_lines[2:]:
            if not line.strip():
                continue
                
            cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:]
            html += '<tr>'
            for cell in cells:
                # Process highlights in the cell
                processed_cell = process_highlights(cell.strip())
                html += f'<td>{processed_cell}</td>'
            html += '</tr>'
        html += '</tbody>'
    else:
        # No header row, treat all rows as data
        html += '<tbody>'
        for line in table_lines:
            if not line.strip():
                continue
                
            cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:]
            html += '<tr>'
            for cell in cells:
                # Process highlights in the cell
                processed_cell = process_highlights(cell.strip())
                html += f'<td>{processed_cell}</td>'
            html += '</tr>'
        html += '</tbody>'
    
    html += '</table>'
    return html

def get_context_html(example, show_full=False):
    """
    Formats the context chunks into an HTML string for display using specific CSS classes.
    Includes an alert for insufficient context and applies highlighting.
    
    Parameters:
    - example: The example data containing contexts
    - show_full: Boolean indicating whether to show full context
    """
    html = ""

    # Add insufficient context warning if needed
    if example.get("insufficient", False):
        insufficient_reason = example.get("insufficient_reason", "")
        reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
        
        html += f"""
        <div class="insufficient-alert">
            <strong>
                <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
                    <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
                    <line x1="12" y1="9" x2="12" y2="13"></line>
                    <line x1="12" y1="17" x2="12.01" y2="17"></line>
                </svg>
                Insufficient Context
            </strong>
            {reason_html}
        </div>
        """

    # Create container div for all context items
    html += '<div class="context-items-container">'
    
    # Determine which context to display based on show_full flag
    if show_full and "full_contexts" in example and example["full_contexts"]:
        # If showing full context, create individual items for each chunk without headers
        for context_item in example["full_contexts"]:
            context_text = context_item.get('content', '')
            
            # Check for markdown table format (both standard and newline format)
            if '|' in context_text and ('\n|' in context_text or '\n-' in context_text):
                # Process as a table
                html += f'<div class="context-item">{process_table_with_highlights(context_text)}</div>'
            else:
                # Regular text content - process highlights
                processed_text = process_highlights(context_text)
                html += f'<div class="context-item">{processed_text}</div>'
    else:
        # Show the highlighted context items
        if "contexts" in example and example["contexts"]:
            for context_item in example["contexts"]:
                chunk_num = context_item.get('chunk_num', '')
                context_text = context_item.get('content', '')
                is_primary = context_item.get('is_primary', False)
                
                # Add appropriate class for primary chunks
                extra_class = " primary-context" if is_primary else ""
                
                # Check for markdown table format
                if '|' in context_text and ('\n|' in context_text or '\n-' in context_text):
                    # Process as a table
                    html += f'<div class="context-item{extra_class}">{process_table_with_highlights(context_text)}</div>'
                else:
                    # Regular text with potential highlights
                    processed_text = process_highlights(context_text)
                    html += f'<div class="context-item{extra_class}">{processed_text}</div>'
        else:
            # If no contexts available, show a message
            html += '<div class="context-item">No context available. Try toggling to full context view.</div>'
    
    # Close the container div
    html += '</div>'

    return html