Spaces:
Running
Running
from __future__ import annotations | |
"""Conversation display helpers for vis_gradio. | |
This module encapsulates everything related to: | |
• safely parsing model responses (lists / dicts / JSON strings) | |
• pretty-printing embedded dictionaries for readability | |
• converting multiple conversation formats to the OpenAI chat list format | |
• rendering that list as HTML (including accordion grouping + raw JSON viewer). | |
Moving this logic out of utils.py keeps the latter lean and focussed on general | |
analytics utilities. | |
""" | |
from typing import List, Dict, Any | |
import ast | |
import json | |
import html | |
import markdown | |
import re | |
__all__: List[str] = [ | |
"convert_to_openai_format", | |
"display_openai_conversation_html", | |
"pretty_print_embedded_dicts", | |
] | |
# --------------------------------------------------------------------------- | |
# Pretty-printing helpers | |
# --------------------------------------------------------------------------- | |
def _find_balanced_spans(text: str): | |
"""Return (start, end) spans of balanced {...} or [...] regions in *text*.""" | |
spans, stack = [], [] | |
for i, ch in enumerate(text): | |
if ch in "{[": | |
stack.append((ch, i)) | |
elif ch in "]}" and stack: | |
opener, start = stack.pop() | |
if (opener, ch) in {("{", "}"), ("[", "]")} and not stack: | |
spans.append((start, i + 1)) | |
return spans | |
def _try_parse_slice(slice_: str): | |
"""Attempt to parse *slice_* into a Python object; return None on failure.""" | |
try: | |
return ast.literal_eval(slice_) | |
except Exception: | |
try: | |
return json.loads(slice_) | |
except Exception: | |
return None | |
def _find_code_spans(text: str) -> List[tuple]: | |
"""Return spans for markdown code regions to be preserved as-is. | |
Includes: | |
- fenced code blocks delimited by ``` ... ``` | |
- inline code segments delimited by `...` | |
""" | |
spans: List[tuple] = [] | |
# Fenced blocks ``` ... ``` (language spec allowed after opening fence) | |
idx = 0 | |
while True: | |
start = text.find("```", idx) | |
if start == -1: | |
break | |
# Find the end fence | |
end = text.find("```", start + 3) | |
if end == -1: | |
# Unclosed fence: treat rest of string as code | |
spans.append((start, len(text))) | |
break | |
spans.append((start, end + 3)) | |
idx = end + 3 | |
# Inline code `...` | |
for m in re.finditer(r"`[^`]*`", text, flags=re.DOTALL): | |
spans.append((m.start(), m.end())) | |
# Sort and merge overlapping spans | |
spans.sort() | |
merged: List[tuple] = [] | |
for s, e in spans: | |
if not merged or s > merged[-1][1]: | |
merged.append((s, e)) | |
else: | |
merged[-1] = (merged[-1][0], max(merged[-1][1], e)) | |
return merged | |
def _is_inside_any_span(start: int, end: int, spans: List[tuple]) -> bool: | |
for s, e in spans: | |
if start >= s and end <= e: | |
return True | |
return False | |
def pretty_print_embedded_dicts(text: str) -> str: | |
"""Replace dicts or list-of-dicts with a `<pre>` block, except inside code. | |
Dict-like regions that fall within markdown code spans (inline backticks | |
or fenced code blocks) are left untouched so code examples render verbatim. | |
""" | |
if not text: | |
return text | |
code_spans = _find_code_spans(text) | |
new_parts, last_idx = [], 0 | |
for start, end in _find_balanced_spans(text): | |
candidate = text[start:end] | |
parsed = _try_parse_slice(candidate) | |
is_good = isinstance(parsed, dict) or ( | |
isinstance(parsed, list) and parsed and all(isinstance(d, dict) for d in parsed) | |
) | |
if is_good and not _is_inside_any_span(start, end, code_spans): | |
new_parts.append(html.escape(text[last_idx:start], quote=False)) | |
pretty = json.dumps(parsed, indent=2, ensure_ascii=False) | |
new_parts.append( | |
f"<pre style='background:#f8f9fa;padding:10px;border-radius:4px;overflow-x:auto;'>{pretty}</pre>" | |
) | |
last_idx = end | |
new_parts.append(html.escape(text[last_idx:], quote=False)) | |
return "".join(new_parts) | |
# --------------------------------------------------------------------------- | |
# Format conversion | |
# --------------------------------------------------------------------------- | |
def convert_to_openai_format(response_data: Any): | |
"""Convert various response payloads into the OpenAI chat format list.""" | |
if isinstance(response_data, list): | |
return response_data | |
if isinstance(response_data, str): | |
# Try Python literal first (handles single quotes) | |
try: | |
parsed = ast.literal_eval(response_data) | |
if isinstance(parsed, list): | |
return parsed | |
except (ValueError, SyntaxError): | |
pass | |
# Try JSON | |
try: | |
parsed = json.loads(response_data) | |
if isinstance(parsed, list): | |
return parsed | |
except json.JSONDecodeError: | |
pass | |
# Fallback plain-text assistant message | |
return [{"role": "assistant", "content": response_data}] | |
# Fallback for any other type | |
return [{"role": "assistant", "content": str(response_data)}] | |
# --------------------------------------------------------------------------- | |
# HTML rendering | |
# --------------------------------------------------------------------------- | |
def _markdown(text: str, *, pretty_print_dicts: bool = True) -> str: | |
"""Render markdown, optionally pretty-printing any embedded dicts.""" | |
processed = pretty_print_embedded_dicts(text) if pretty_print_dicts else html.escape(text, quote=False) | |
# Configure extensions for proper code block handling | |
extensions = ["fenced_code"] | |
extension_configs = {} | |
try: | |
import pygments | |
extensions.append("codehilite") | |
extension_configs['codehilite'] = { | |
'css_class': 'highlight', | |
'use_pygments': True, | |
'guess_lang': True, | |
'linenums': False | |
} | |
except ImportError: | |
pass | |
# Convert newlines to <br> only outside of code blocks | |
# Process fenced code blocks first, then handle line breaks | |
result = markdown.markdown(processed, extensions=extensions, extension_configs=extension_configs) | |
# Add line breaks for non-code content (simple approach) | |
# This replaces single newlines with <br> but preserves code blocks | |
import re | |
# Split by code blocks to avoid affecting them | |
code_block_pattern = r'(<pre[^>]*>.*?</pre>|<code[^>]*>.*?</code>)' | |
parts = re.split(code_block_pattern, result, flags=re.DOTALL) | |
for i in range(0, len(parts), 2): # Process non-code parts only | |
if i < len(parts): | |
# Replace single newlines with <br>, but not double newlines (paragraphs) | |
parts[i] = re.sub(r'(?<!\n)\n(?!\n)', '<br>\n', parts[i]) | |
return ''.join(parts) | |
def display_openai_conversation_html(conversation_data: List[Dict[str, Any]], *, use_accordion: bool = True, pretty_print_dicts: bool = True, evidence: Any = None) -> str: | |
"""Convert an OpenAI-style conversation list into styled HTML for Gradio.""" | |
from .examples_helpers import annotate_text_with_evidence_placeholders, HIGHLIGHT_START, HIGHLIGHT_END | |
if not conversation_data: | |
return "<p>No conversation data available</p>" | |
# Collapsed raw JSON section for debugging | |
raw_json = json.dumps(conversation_data, indent=2, ensure_ascii=False) | |
html_out = f""" | |
<details style="margin: 8px 0;"> | |
<summary style="cursor: pointer; font-weight: 600;"> | |
Click to see raw response ({len(conversation_data)}) | |
</summary> | |
<div style="padding: 8px 15px;"> | |
<pre style="white-space: pre-wrap; word-wrap: break-word; background: #f8f9fa; padding: 10px; border-radius: 4px; overflow-x: auto;">{raw_json}</pre> | |
</div> | |
</details> | |
""" | |
role_colors = { | |
"system": "#ff6b6b", | |
"info": "#4ecdc4", | |
"assistant": "#45b7d1", | |
"tool": "#96ceb4", | |
"user": "#feca57", | |
} | |
def _maybe_annotate(content_str: str) -> str: | |
if evidence is None or not isinstance(content_str, str) or not content_str.strip(): | |
return content_str | |
return annotate_text_with_evidence_placeholders(content_str, evidence) | |
def _replace_placeholders_with_mark(html_str: str) -> str: | |
if not html_str: | |
return html_str | |
return ( | |
html_str | |
.replace(HIGHLIGHT_START, "<mark class=\"evidence-highlight\">") | |
.replace(HIGHLIGHT_END, "</mark>") | |
) | |
def _format_msg(role: str, content: Any) -> str: | |
if isinstance(content, dict) or (isinstance(content, list) and content and all(isinstance(d, dict) for d in content)): | |
if pretty_print_dicts: | |
content_html = ( | |
f"<pre style='background: #f8f9fa; padding: 10px; border-radius: 4px; overflow-x: auto;'>{json.dumps(content, indent=2, ensure_ascii=False)}</pre>" | |
) | |
else: | |
content_html = f"<code>{html.escape(json.dumps(content, ensure_ascii=False))}</code>" | |
elif isinstance(content, str): | |
# Insert highlight placeholders before markdown so offsets make sense in plain text | |
annotated = _maybe_annotate(content) | |
content_html = _markdown(annotated, pretty_print_dicts=pretty_print_dicts) | |
# Convert placeholders to <mark> after markdown | |
content_html = _replace_placeholders_with_mark(content_html) | |
elif content is None: | |
content_html = "<em>(No content)</em>" | |
else: | |
content_html = str(content) | |
color = role_colors.get(role.lower(), "#95a5a6") | |
return ( | |
f"<div style='border-left: 4px solid {color}; margin: 8px 0; background-color: #ffffff; padding: 12px; border-radius: 0 8px 8px 0;'>" | |
f"<div style='font-weight: 600; color: {color}; margin-bottom: 8px; text-transform: capitalize; font-size: 14px;'>{role}</div>" | |
f"<div style='color: #333; line-height: 1.6; font-family: \"Segoe UI\", Tahoma, Geneva, Verdana, sans-serif;'>{content_html}</div>" | |
"</div>" | |
) | |
if use_accordion: | |
system_msgs, info_msgs, other_msgs = [], [], [] | |
for m in conversation_data: | |
if not isinstance(m, dict): | |
continue | |
role = m.get("role", "unknown").lower() | |
content = m.get("content", "") | |
if isinstance(content, dict) and "text" in content: | |
content = content["text"] | |
if role == "system": | |
system_msgs.append((role, content)) | |
elif role == "info": | |
info_msgs.append((role, content)) | |
else: | |
other_msgs.append((role, content)) | |
def _accordion(title: str, items: List): | |
if not items: | |
return "" | |
inner = "".join(_format_msg(r, c) for r, c in items) | |
return ( | |
f"<details style='margin: 8px 0;'>" | |
f"<summary style='cursor: pointer; font-weight: 600;'>" | |
f"{html.escape(title)} ({len(items)})" # e.g. "Click to see system messages (3)" | |
f"</summary>" | |
f"<div style='padding: 8px 15px;'>{inner}</div>" | |
"</details>" | |
) | |
html_out += _accordion("Click to see system messages", system_msgs) | |
html_out += _accordion("Click to see info messages", info_msgs) | |
for r, c in other_msgs: | |
html_out += _format_msg(r, c) | |
else: | |
# No accordion: just render everything | |
for m in conversation_data: | |
if not isinstance(m, dict): | |
continue | |
role = m.get("role", "unknown").lower() | |
content = m.get("content", "") | |
if isinstance(content, dict) and "text" in content: | |
content = content["text"] | |
html_out += _format_msg(role, content) | |
# CSS for proper code block styling and summary hover effects | |
css_styles = """ | |
<style> | |
.evidence-highlight { background: #ffff8b; padding: 0 2px; } | |
:root { | |
/* Code block color palette - GitHub Light inspired */ | |
--code-bg: #f6f8fa; | |
--code-text: #24292f; | |
--code-comment: #6a737d; | |
--code-keyword: #d73a49; | |
--code-string: #032f62; | |
--code-number: #005cc5; | |
--code-operator: #24292f; | |
--code-function: #6f42c1; | |
--code-border: #d0d7de; | |
/* Inline code colors - same light theme */ | |
--inline-code-bg: #f3f4f6; | |
--inline-code-text: #24292f; | |
--inline-code-border: #d1d5db; | |
/* Code block structure */ | |
--code-border-radius: 8px; | |
--code-padding: 16px; | |
--code-font-size: 14px; | |
--code-line-height: 1.5; | |
--code-font-family: 'JetBrains Mono', 'Fira Code', 'Cascadia Code', 'SF Mono', Consolas, 'Liberation Mono', Menlo, Courier, monospace; | |
} | |
/* Base code styling */ | |
pre, code { | |
font-family: var(--code-font-family) !important; | |
font-size: var(--code-font-size) !important; | |
line-height: var(--code-line-height) !important; | |
font-variant-ligatures: normal !important; | |
-webkit-font-smoothing: antialiased !important; | |
-moz-osx-font-smoothing: grayscale !important; | |
} | |
/* Fenced code blocks - light theme */ | |
.highlight, .codehilite, pre.highlight, pre.codehilite, | |
.language-python, .language-text, .language-bash { | |
background: var(--code-bg) !important; | |
color: var(--code-text) !important; | |
border: 1px solid var(--code-border) !important; | |
border-radius: var(--code-border-radius) !important; | |
padding: var(--code-padding) !important; | |
margin: 12px 0 !important; | |
overflow-x: auto !important; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05) !important; | |
position: relative !important; | |
white-space: pre !important; | |
display: block !important; | |
} | |
.highlight pre, .codehilite pre { | |
background: transparent !important; | |
color: inherit !important; | |
margin: 0 !important; | |
padding: 0 !important; | |
border: none !important; | |
border-radius: 0 !important; | |
overflow: visible !important; | |
white-space: pre !important; | |
display: block !important; | |
} | |
/* Ensure code blocks preserve formatting */ | |
.highlight code, .codehilite code { | |
white-space: pre !important; | |
display: block !important; | |
padding: 0 !important; | |
margin: 0 !important; | |
background: transparent !important; | |
border: none !important; | |
font-size: inherit !important; | |
line-height: inherit !important; | |
} | |
/* Add language label for fenced blocks */ | |
.highlight::before, .codehilite::before { | |
content: 'python'; | |
position: absolute; | |
top: 8px; | |
right: 12px; | |
background: rgba(0, 0, 0, 0.05); | |
color: #586069; | |
padding: 2px 8px; | |
border-radius: 4px; | |
font-size: 11px; | |
font-weight: 500; | |
text-transform: uppercase; | |
letter-spacing: 0.5px; | |
} | |
/* Syntax highlighting for Python - Light theme */ | |
.highlight .k, .codehilite .k, /* keywords */ | |
.highlight .kn, .codehilite .kn, /* keyword.namespace */ | |
.highlight .kp, .codehilite .kp, /* keyword.pseudo */ | |
.highlight .kr, .codehilite .kr, /* keyword.reserved */ | |
.highlight .kt, .codehilite .kt /* keyword.type */ | |
{ | |
color: var(--code-keyword) !important; | |
font-weight: 600 !important; | |
} | |
.highlight .s, .codehilite .s, /* strings */ | |
.highlight .s1, .codehilite .s1, /* string.single */ | |
.highlight .s2, .codehilite .s2, /* string.double */ | |
.highlight .se, .codehilite .se /* string.escape */ | |
{ | |
color: var(--code-string) !important; | |
} | |
.highlight .c, .codehilite .c, /* comments */ | |
.highlight .c1, .codehilite .c1, /* comment.single */ | |
.highlight .cm, .codehilite .cm /* comment.multiline */ | |
{ | |
color: var(--code-comment) !important; | |
font-style: italic !important; | |
} | |
.highlight .m, .codehilite .m, /* numbers */ | |
.highlight .mi, .codehilite .mi, /* number.integer */ | |
.highlight .mf, .codehilite .mf, /* number.float */ | |
.highlight .mo, .codehilite .mo /* number.octal */ | |
{ | |
color: var(--code-number) !important; | |
font-weight: 600 !important; | |
} | |
.highlight .nf, .codehilite .nf, /* function names */ | |
.highlight .fm, .codehilite .fm /* function.magic */ | |
{ | |
color: var(--code-function) !important; | |
font-weight: 600 !important; | |
} | |
.highlight .o, .codehilite .o, /* operators */ | |
.highlight .ow, .codehilite .ow /* operator.word */ | |
{ | |
color: var(--code-operator) !important; | |
} | |
/* Inline code - light theme */ | |
p code, li code, div code, span code, | |
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { | |
background: var(--inline-code-bg) !important; | |
color: var(--inline-code-text) !important; | |
border: 1px solid var(--inline-code-border) !important; | |
padding: 2px 6px !important; | |
border-radius: 4px !important; | |
font-size: 0.9em !important; | |
font-weight: 600 !important; | |
white-space: nowrap !important; | |
box-shadow: none !important; | |
display: inline !important; | |
} | |
/* Code blocks inside paragraphs should not be treated as inline */ | |
p pre, li pre, div pre { | |
background: var(--code-bg) !important; | |
color: var(--code-text) !important; | |
border: 1px solid var(--code-border) !important; | |
border-radius: var(--code-border-radius) !important; | |
padding: var(--code-padding) !important; | |
margin: 8px 0 !important; | |
white-space: pre !important; | |
overflow-x: auto !important; | |
display: block !important; | |
} | |
/* Scrollbar styling for code blocks - light theme */ | |
.highlight::-webkit-scrollbar, .codehilite::-webkit-scrollbar, | |
pre::-webkit-scrollbar { | |
height: 8px !important; | |
background: #f1f3f4 !important; | |
border-radius: 4px !important; | |
} | |
.highlight::-webkit-scrollbar-thumb, .codehilite::-webkit-scrollbar-thumb, | |
pre::-webkit-scrollbar-thumb { | |
background: #c1c8cd !important; | |
border-radius: 4px !important; | |
} | |
.highlight::-webkit-scrollbar-thumb:hover, .codehilite::-webkit-scrollbar-thumb:hover, | |
pre::-webkit-scrollbar-thumb:hover { | |
background: #a8b3ba !important; | |
} | |
</style> | |
""" | |
if use_accordion: | |
css_styles += """ | |
/* Accordion styling */ | |
details > summary { | |
list-style: none !important; | |
cursor: pointer !important; | |
} | |
details > summary:hover { | |
background-color: transparent !important; | |
box-shadow: none !important; | |
transform: none !important; | |
} | |
details > summary::-webkit-details-marker, | |
details > summary::marker { | |
display: none !important; | |
} | |
""" | |
css_styles += "</style>" | |
html_out = css_styles + html_out | |
return html_out |