import re import json def debug_text(text, label="Text"): """Helper function to debug text processing issues""" print(f"\n--- DEBUG {label} ---") print(f"Length: {len(text)}") print(f"First 100 chars: {text[:100]}") print(f"Contains highlight_start: {'[[highlight_start]]' in text}") print(f"Contains start_highlight: {'[[start_highlight]]' in text}") print("-------------------------\n") def clean_json_text(text): """ Handle text that came from JSON and might have JSON escaping. This handles the case of text like: "the sky isn\\'t falling" """ # First attempt to clean JSON-style escapes try: # Try to treat the string as if it were a JSON string if '\\' in text: # Create a valid JSON string with the text as content json_str = json.dumps({"text": text}) # Parse it back to get properly unescaped text parsed = json.loads(json_str) return parsed["text"] except Exception: # If that fails, continue with the original text pass return text def process_highlights(text): """ Process highlight markers in text to create HTML highlighted text. Handles both standard format and alternative format. Also properly handles escaped quotes. """ # Debug info # debug_text(text, "Before processing") # Clean JSON escaping text = clean_json_text(text) # Process highlight tags pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]' replacement = r'\1' highlighted_text = re.sub(pattern1, replacement, text) pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]' highlighted_text = re.sub(pattern2, replacement, highlighted_text) # Debug info # debug_text(highlighted_text, "After processing") return highlighted_text def process_table_with_highlights(markdown_table): """ Special function to process markdown tables with highlights. Ensures the table structure is preserved while applying highlights. """ # First, split the table into lines lines = markdown_table.strip().split('\n') processed_lines = [] for line in lines: # Process highlights in each line processed_line = process_highlights(line) processed_lines.append(processed_line) return convert_markdown_table_to_html('\n'.join(processed_lines)) def convert_markdown_table_to_html(markdown_text): """ Converts a markdown table to an HTML table. """ # Clean JSON escaping markdown_text = clean_json_text(markdown_text) lines = markdown_text.strip().split('\n') table_lines = [line for line in lines if line.strip().startswith('|')] if len(table_lines) < 2: # Need at least header and separator return markdown_text # Return original if not a proper table html = '' # Check if we have a header row if len(table_lines) >= 2 and '---' in table_lines[1]: # Process header header_cells = table_lines[0].split('|')[1:-1] if table_lines[0].strip().endswith('|') else table_lines[0].split('|')[1:] html += '' for cell in header_cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'' html += '' # Process data rows (skip the separator row at index 1) html += '' for line in table_lines[2:]: if not line.strip(): continue cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] html += '' for cell in cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'' html += '' html += '' else: # No header row, treat all rows as data html += '' for line in table_lines: if not line.strip(): continue cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:] html += '' for cell in cells: # Process highlights in the cell processed_cell = process_highlights(cell.strip()) html += f'' html += '' html += '' html += '

{processed_cell}
{processed_cell}
{processed_cell}

' return html def get_context_html(example, show_full=False): """ Formats the context chunks into an HTML string for display using specific CSS classes. Includes an alert for insufficient context and applies highlighting. Parameters: - example: The example data containing contexts - show_full: Boolean indicating whether to show full context """ html = "" # Add insufficient context warning if needed if example.get("insufficient", False): insufficient_reason = example.get("insufficient_reason", "") reason_html = f"

{insufficient_reason}

" if insufficient_reason else "

The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.

" html += f"""

Insufficient Context {reason_html}

""" # Create container div for all context items html += '

' # Determine which context to display based on show_full flag if show_full and "full_contexts" in example and example["full_contexts"]: # If showing full context, create individual items for each chunk without headers for context_item in example["full_contexts"]: context_text = context_item.get('content', '') # Check for markdown table format (both standard and newline format) if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): # Process as a table html += f'

{process_table_with_highlights(context_text)}

' else: # Regular text content - process highlights processed_text = process_highlights(context_text) html += f'

{processed_text}

' else: # Show the highlighted context items if "contexts" in example and example["contexts"]: for context_item in example["contexts"]: chunk_num = context_item.get('chunk_num', '') context_text = context_item.get('content', '') is_primary = context_item.get('is_primary', False) # Add appropriate class for primary chunks extra_class = " primary-context" if is_primary else "" # Check for markdown table format if '|' in context_text and ('\n|' in context_text or '\n-' in context_text): # Process as a table html += f'

{process_table_with_highlights(context_text)}

' else: # Regular text with potential highlights processed_text = process_highlights(context_text) html += f'

{processed_text}

' else: # If no contexts available, show a message html += '

No context available. Try toggling to full context view.

' # Close the container div html += '

' return html