visualize_eval_results / src /substitutions_visualizer.py
Yoad
Add subs visualizer
7b3ae60
import pandas as pd
import streamlit as st
from jiwer import process_words
from visual_eval.evaluator import extract_substitution_samples, HebrewTextNormalizer
subs_table_styles = """
<style>
.sub-table {
background: white;
width: 100%;
border-collapse: collapse;
color: black;
}
.sub-row {
cursor: pointer;
transition: all 0.2s;
}
.sub-row .txt {
text-align: center;
}
.sub-row:nth-child(even):hover {
background: #eee;
}
.sub-row:nth-child(odd):hover {
background: #eee;
}
.sub-row:nth-child(even):hover + .sub-row {
background: #eee;
}
.sub-row:nth-child(even):has(+ .sub-row:hover) {
background: #eee;
}
.sub-row.ref {
color: green;
}
.sub-row.ref .ctx {
text-align: end;
}
.sub-row.hyp {
color: red;
border-bottom: 1px solid black;
}
.sub-row.hyp .ctx {
text-align: start;
}
</style>
"""
@st.cache_data
def visualize_substitutions(ref, hyp):
norm = HebrewTextNormalizer()
wer_word_output = process_words(norm(ref), norm(hyp))
subs_rows = []
for sample in extract_substitution_samples(wer_word_output):
subs_rows.append(
{
"ref": " ".join(sample.ref),
"hyp": " ".join(sample.hyp),
"hyp_ctx": " ".join(
wer_word_output.hypotheses[0][slice(*sample.hyp_context_span)]
),
"ref_ctx": " ".join(
wer_word_output.references[0][slice(*sample.ref_context_span)]
),
}
)
sub_rows_html = []
for row in subs_rows:
sub_rows_html.append(
f"""
<tr class="sub-row ref">
<td class="ctx">{row['ref_ctx']}</td>
<td class="txt">{row['ref']}</td>
<td></td>
</tr>
<tr class="sub-row hyp">
<td></td>
<td class="txt">{row['hyp']}</td>
<td class="ctx">{row['hyp_ctx']}</td>
</tr>
"""
)
st.subheader("Substitutions List")
table_html = f"""
{subs_table_styles}
<table class="sub-table" dir="rtl" lang="he">
<tr>
<th style="text-align: end;">Ref Context</th>
<th style="text-align: center;">Ref/Hyp</th>
<th style="text-align: start;">Hyp Context</th>
</tr>
{"".join(sub_rows_html)}
</table>
"""
st.html(table_html)