Spaces:
Running
Running
feat: add multiple example plots
Browse files- README.md +1 -8
- data/bigwig_dataset.csv +0 -0
- src/streamlit_app.py +538 -6
README.md
CHANGED
|
@@ -10,11 +10,4 @@ tags:
|
|
| 10 |
pinned: false
|
| 11 |
short_description: NTv3 Benchmark
|
| 12 |
license: apache-2.0
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
# Welcome to Streamlit!
|
| 16 |
-
|
| 17 |
-
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 18 |
-
|
| 19 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 20 |
-
forums](https://discuss.streamlit.io).
|
|
|
|
| 10 |
pinned: false
|
| 11 |
short_description: NTv3 Benchmark
|
| 12 |
license: apache-2.0
|
| 13 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/bigwig_dataset.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/streamlit_app.py
CHANGED
|
@@ -4,6 +4,8 @@ import os
|
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
| 6 |
import plotly.express as px
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# ---------------------------------------------------------------------
|
| 9 |
# Page config (must be the first Streamlit command)
|
|
@@ -68,6 +70,15 @@ MODEL_COLORS = {
|
|
| 68 |
|
| 69 |
MODEL_NAMES = list(MODEL_COLORS.keys())
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
_LAST_UPDATED = "Dec 10, 2025"
|
| 72 |
_INTRO = """
|
| 73 |
Benchmark across gene annotation and functionnal tracks.
|
|
@@ -181,12 +192,12 @@ _ALL_ASSAYS = (
|
|
| 181 |
_ALL_MODELS = MODEL_NAMES[:]
|
| 182 |
|
| 183 |
_BENCHMARKS = {
|
| 184 |
-
"
|
| 185 |
"df": _PEARSON_DF,
|
| 186 |
"metric_label": "Pearson correlation",
|
| 187 |
"has_assay_type": True,
|
| 188 |
},
|
| 189 |
-
"
|
| 190 |
"df": _MCC_DF,
|
| 191 |
"metric_label": "MCC",
|
| 192 |
"has_assay_type": False,
|
|
@@ -260,6 +271,10 @@ def build_leaderboard(
|
|
| 260 |
|
| 261 |
agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
|
| 262 |
agg = agg[["Model", "Num entries", "Mean score"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
return agg
|
| 264 |
|
| 265 |
|
|
@@ -276,6 +291,303 @@ def build_bar_df(
|
|
| 276 |
)
|
| 277 |
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
# ---------------------------------------------------------------------
|
| 280 |
# UI helpers
|
| 281 |
# ---------------------------------------------------------------------
|
|
@@ -318,12 +630,30 @@ def main():
|
|
| 318 |
|
| 319 |
# Species toggles, but only for species present in this benchmark
|
| 320 |
st.sidebar.subheader("Species")
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
selected_species: List[str] = []
|
| 323 |
for sp in available_species:
|
| 324 |
-
if sidebar_toggle(sp, value=True, key=f"species_{benchmark_name}_{sp}"):
|
| 325 |
selected_species.append(sp)
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
# Assay toggles (Pearson only), based on filtered species
|
| 328 |
if cfg.get("has_assay_type", False):
|
| 329 |
st.sidebar.subheader("Assay types")
|
|
@@ -343,9 +673,10 @@ def main():
|
|
| 343 |
else:
|
| 344 |
selected_assays = []
|
| 345 |
|
|
|
|
| 346 |
# Bed track / dataset toggles (MCC only), based on species selection
|
| 347 |
selected_datasets: List[str] = []
|
| 348 |
-
if benchmark_name == "
|
| 349 |
st.sidebar.subheader("Genome annotations")
|
| 350 |
if selected_species:
|
| 351 |
df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
|
|
@@ -377,33 +708,234 @@ def main():
|
|
| 377 |
|
| 378 |
with col1:
|
| 379 |
st.subheader("๐
Leaderboard (per model)")
|
|
|
|
|
|
|
|
|
|
| 380 |
if leaderboard_df.empty:
|
| 381 |
st.info("No data for the selected filters.")
|
| 382 |
else:
|
| 383 |
st.dataframe(leaderboard_df, use_container_width=True)
|
| 384 |
|
|
|
|
| 385 |
with col2:
|
| 386 |
st.subheader("๐ Mean score per model")
|
| 387 |
if bar_df.empty:
|
| 388 |
st.info("No data for the selected filters.")
|
| 389 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
fig = px.bar(
|
| 391 |
bar_df,
|
| 392 |
x="Model",
|
| 393 |
y="Mean score",
|
| 394 |
color="Model",
|
| 395 |
color_discrete_map=MODEL_COLORS,
|
|
|
|
| 396 |
)
|
| 397 |
fig.update_layout(
|
| 398 |
barmode="group",
|
| 399 |
height=500,
|
| 400 |
-
xaxis_title="
|
| 401 |
yaxis_title="Mean score",
|
| 402 |
plot_bgcolor="rgba(0,0,0,0)",
|
| 403 |
paper_bgcolor="rgba(0,0,0,0)",
|
|
|
|
| 404 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
st.plotly_chart(fig, use_container_width=True)
|
| 406 |
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
if __name__ == "__main__":
|
| 409 |
main()
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
| 6 |
import plotly.express as px
|
| 7 |
+
from plotly.subplots import make_subplots
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
|
| 10 |
# ---------------------------------------------------------------------
|
| 11 |
# Page config (must be the first Streamlit command)
|
|
|
|
| 70 |
|
| 71 |
MODEL_NAMES = list(MODEL_COLORS.keys())
|
| 72 |
|
| 73 |
+
PLANT_SPECIES = ["tomato", "rice", "maize", "arabidopsis"]
|
| 74 |
+
ANIMAL_SPECIES = ["human", "chicken", "cattle"]
|
| 75 |
+
|
| 76 |
+
SPECIES_GROUPS = {
|
| 77 |
+
"Plants": PLANT_SPECIES,
|
| 78 |
+
"Animals": ANIMAL_SPECIES, # (your code calls these HUMAN_SPECIES, but theyโre the โanimalโ set)
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
|
| 82 |
_LAST_UPDATED = "Dec 10, 2025"
|
| 83 |
_INTRO = """
|
| 84 |
Benchmark across gene annotation and functionnal tracks.
|
|
|
|
| 192 |
_ALL_MODELS = MODEL_NAMES[:]
|
| 193 |
|
| 194 |
_BENCHMARKS = {
|
| 195 |
+
"Functional Tracks": {
|
| 196 |
"df": _PEARSON_DF,
|
| 197 |
"metric_label": "Pearson correlation",
|
| 198 |
"has_assay_type": True,
|
| 199 |
},
|
| 200 |
+
"Genome Annotation": {
|
| 201 |
"df": _MCC_DF,
|
| 202 |
"metric_label": "MCC",
|
| 203 |
"has_assay_type": False,
|
|
|
|
| 271 |
|
| 272 |
agg = agg.sort_values("Mean score", ascending=False).reset_index(drop=True)
|
| 273 |
agg = agg[["Model", "Num entries", "Mean score"]]
|
| 274 |
+
|
| 275 |
+
# Ensure the index starts with 1
|
| 276 |
+
agg.index += 1
|
| 277 |
+
|
| 278 |
return agg
|
| 279 |
|
| 280 |
|
|
|
|
| 291 |
)
|
| 292 |
|
| 293 |
|
| 294 |
+
def build_category_model_df(
|
| 295 |
+
benchmark_name: str,
|
| 296 |
+
selected_species: List[str],
|
| 297 |
+
selected_assays: List[str],
|
| 298 |
+
selected_models: List[str],
|
| 299 |
+
selected_datasets: List[str],
|
| 300 |
+
) -> pd.DataFrame:
|
| 301 |
+
"""
|
| 302 |
+
Mean score per (category, Model) after applying the same filters.
|
| 303 |
+
Category = assay_type (Functional Tracks) or datasets (Genome Annotation).
|
| 304 |
+
"""
|
| 305 |
+
cfg = _BENCHMARKS[benchmark_name]
|
| 306 |
+
df = filter_base_df(
|
| 307 |
+
benchmark_name,
|
| 308 |
+
selected_species,
|
| 309 |
+
selected_assays,
|
| 310 |
+
selected_models,
|
| 311 |
+
selected_datasets,
|
| 312 |
+
)
|
| 313 |
+
if df.empty:
|
| 314 |
+
return pd.DataFrame(columns=["Category", "Model", "Mean score"])
|
| 315 |
+
|
| 316 |
+
# Pick the right breakdown column
|
| 317 |
+
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 318 |
+
category_col = "assay_type"
|
| 319 |
+
category_label = "Assay type"
|
| 320 |
+
else:
|
| 321 |
+
category_col = "datasets"
|
| 322 |
+
category_label = "Dataset"
|
| 323 |
+
|
| 324 |
+
if category_col not in df.columns:
|
| 325 |
+
return pd.DataFrame(columns=["Category", "Model", "Mean score"])
|
| 326 |
+
|
| 327 |
+
out = (
|
| 328 |
+
df.groupby([category_col, "Model"], as_index=False)["Score"]
|
| 329 |
+
.mean()
|
| 330 |
+
.rename(columns={category_col: "Category", "Score": "Mean score"})
|
| 331 |
+
)
|
| 332 |
+
out["Mean score"] = out["Mean score"].round(3)
|
| 333 |
+
out.attrs["category_label"] = category_label # for nicer axis title
|
| 334 |
+
return out
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def plot_breakdown_facets_sorted_models(
|
| 338 |
+
breakdown_df: pd.DataFrame,
|
| 339 |
+
metric_label: str,
|
| 340 |
+
height: int = 420,
|
| 341 |
+
):
|
| 342 |
+
categories = list(breakdown_df["Category"].dropna().unique())
|
| 343 |
+
categories = sorted(categories)
|
| 344 |
+
|
| 345 |
+
n = len(categories)
|
| 346 |
+
if n == 0:
|
| 347 |
+
return None
|
| 348 |
+
|
| 349 |
+
rows = 1
|
| 350 |
+
cols = n # ๐ everything in one row
|
| 351 |
+
|
| 352 |
+
# Global y-range (consistent scale)
|
| 353 |
+
y_min = breakdown_df["Mean score"].min()
|
| 354 |
+
y_max = breakdown_df["Mean score"].max()
|
| 355 |
+
pad = 0.05 * (y_max - y_min if y_max > y_min else 1.0)
|
| 356 |
+
y_range = [y_min - pad, y_max + pad]
|
| 357 |
+
|
| 358 |
+
fig = make_subplots(
|
| 359 |
+
rows=rows,
|
| 360 |
+
cols=cols,
|
| 361 |
+
subplot_titles=categories,
|
| 362 |
+
shared_yaxes=True,
|
| 363 |
+
horizontal_spacing=0.04, # tighter spacing
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
for i, cat in enumerate(categories):
|
| 367 |
+
r = (i // cols) + 1
|
| 368 |
+
c = (i % cols) + 1
|
| 369 |
+
|
| 370 |
+
sub = (
|
| 371 |
+
breakdown_df[breakdown_df["Category"] == cat]
|
| 372 |
+
.sort_values("Mean score", ascending=True)
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
fig.add_trace(
|
| 376 |
+
go.Bar(
|
| 377 |
+
x=sub["Model"],
|
| 378 |
+
y=sub["Mean score"],
|
| 379 |
+
marker_color=[MODEL_COLORS.get(m, "#808080") for m in sub["Model"]],
|
| 380 |
+
showlegend=False,
|
| 381 |
+
),
|
| 382 |
+
row=r,
|
| 383 |
+
col=c,
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
fig.update_xaxes(showticklabels=False, title_text="", row=r, col=c)
|
| 387 |
+
fig.update_yaxes(range=y_range, title_text="", row=r, col=c) # ๐ apply range
|
| 388 |
+
|
| 389 |
+
fig.update_layout(
|
| 390 |
+
height=height,
|
| 391 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 392 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 393 |
+
margin=dict(t=60, l=10, r=10, b=10),
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
# Single y-axis label on the leftmost panel
|
| 397 |
+
fig.update_yaxes(title_text=metric_label, row=1, col=1)
|
| 398 |
+
|
| 399 |
+
return fig
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def build_radar_df(
|
| 403 |
+
benchmark_name: str,
|
| 404 |
+
selected_species: List[str],
|
| 405 |
+
selected_assays: List[str],
|
| 406 |
+
selected_models: List[str],
|
| 407 |
+
selected_datasets: List[str],
|
| 408 |
+
) -> pd.DataFrame:
|
| 409 |
+
cfg = _BENCHMARKS[benchmark_name]
|
| 410 |
+
|
| 411 |
+
df = filter_base_df(
|
| 412 |
+
benchmark_name,
|
| 413 |
+
selected_species,
|
| 414 |
+
selected_assays,
|
| 415 |
+
selected_models,
|
| 416 |
+
selected_datasets,
|
| 417 |
+
)
|
| 418 |
+
|
| 419 |
+
if df.empty:
|
| 420 |
+
return pd.DataFrame()
|
| 421 |
+
|
| 422 |
+
# Choose axis column
|
| 423 |
+
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 424 |
+
axis_col = "assay_type"
|
| 425 |
+
axis_label = "Assay type"
|
| 426 |
+
else:
|
| 427 |
+
axis_col = "datasets"
|
| 428 |
+
axis_label = "Dataset"
|
| 429 |
+
|
| 430 |
+
radar_df = (
|
| 431 |
+
df.groupby([axis_col, "Model"], as_index=False)["Score"]
|
| 432 |
+
.mean()
|
| 433 |
+
.rename(columns={axis_col: "Axis", "Score": "Value"})
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
radar_df.attrs["axis_label"] = axis_label
|
| 437 |
+
return radar_df
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
def build_pairwise_scatter_df(
|
| 441 |
+
benchmark_name: str,
|
| 442 |
+
selected_species: List[str],
|
| 443 |
+
selected_assays: List[str],
|
| 444 |
+
selected_models: List[str],
|
| 445 |
+
selected_datasets: List[str],
|
| 446 |
+
model_a: str,
|
| 447 |
+
model_b: str,
|
| 448 |
+
) -> pd.DataFrame:
|
| 449 |
+
"""
|
| 450 |
+
Returns a per-track dataframe with columns:
|
| 451 |
+
Track, Model A, Model B, (optional) species, (optional) assay_type, datasets
|
| 452 |
+
Where each row corresponds to a specific track (datasets [+ assay_type]).
|
| 453 |
+
"""
|
| 454 |
+
cfg = _BENCHMARKS[benchmark_name]
|
| 455 |
+
|
| 456 |
+
# Filter using the same UI toggles, but ensure the chosen models are included
|
| 457 |
+
models_for_filter = list(set(selected_models + [model_a, model_b])) if selected_models else [model_a, model_b]
|
| 458 |
+
|
| 459 |
+
df = filter_base_df(
|
| 460 |
+
benchmark_name,
|
| 461 |
+
selected_species,
|
| 462 |
+
selected_assays,
|
| 463 |
+
models_for_filter,
|
| 464 |
+
selected_datasets,
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
if df.empty:
|
| 468 |
+
return pd.DataFrame()
|
| 469 |
+
|
| 470 |
+
# Define what โa specific trackโ means
|
| 471 |
+
track_cols = ["datasets"]
|
| 472 |
+
if cfg.get("has_assay_type", False) and "assay_type" in df.columns:
|
| 473 |
+
track_cols = ["assay_type", "datasets"]
|
| 474 |
+
|
| 475 |
+
# (Optional) keep species in hover if multiple are selected
|
| 476 |
+
keep_species = "species" in df.columns and (selected_species is None or len(selected_species) != 1)
|
| 477 |
+
|
| 478 |
+
id_cols = (["species"] if keep_species else []) + track_cols
|
| 479 |
+
|
| 480 |
+
# Pivot into two model columns
|
| 481 |
+
wide = (
|
| 482 |
+
df[df["Model"].isin([model_a, model_b])]
|
| 483 |
+
.pivot_table(index=id_cols, columns="Model", values="Score", aggfunc="mean")
|
| 484 |
+
.reset_index()
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
# Require both values to exist for a dot
|
| 488 |
+
if model_a not in wide.columns or model_b not in wide.columns:
|
| 489 |
+
return pd.DataFrame()
|
| 490 |
+
|
| 491 |
+
wide = wide.dropna(subset=[model_a, model_b])
|
| 492 |
+
|
| 493 |
+
# Nice โTrackโ label for display
|
| 494 |
+
if "assay_type" in wide.columns:
|
| 495 |
+
wide["Track"] = wide["assay_type"].astype(str) + " / " + wide["datasets"].astype(str)
|
| 496 |
+
else:
|
| 497 |
+
wide["Track"] = wide["datasets"].astype(str)
|
| 498 |
+
|
| 499 |
+
# Rename for plotting
|
| 500 |
+
wide = wide.rename(columns={model_a: "Model A", model_b: "Model B"})
|
| 501 |
+
|
| 502 |
+
return wide
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
def build_violin_df(
|
| 506 |
+
benchmark_name: str,
|
| 507 |
+
selected_species: List[str],
|
| 508 |
+
selected_assays: List[str],
|
| 509 |
+
selected_models: List[str],
|
| 510 |
+
selected_datasets: List[str],
|
| 511 |
+
) -> pd.DataFrame:
|
| 512 |
+
# Use the same base filtering, but keep all per-track rows
|
| 513 |
+
df = filter_base_df(
|
| 514 |
+
benchmark_name,
|
| 515 |
+
selected_species,
|
| 516 |
+
selected_assays,
|
| 517 |
+
selected_models,
|
| 518 |
+
selected_datasets,
|
| 519 |
+
)
|
| 520 |
+
# Keep only needed columns
|
| 521 |
+
keep = ["Model", "Score"]
|
| 522 |
+
for col in ["species", "assay_type", "datasets"]:
|
| 523 |
+
if col in df.columns:
|
| 524 |
+
keep.append(col)
|
| 525 |
+
return df[keep].copy()
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
def plot_radar(
|
| 529 |
+
radar_df: pd.DataFrame,
|
| 530 |
+
metric_label: str,
|
| 531 |
+
height: int = 600,
|
| 532 |
+
):
|
| 533 |
+
if radar_df.empty:
|
| 534 |
+
return None
|
| 535 |
+
|
| 536 |
+
axes = radar_df["Axis"].unique().tolist()
|
| 537 |
+
|
| 538 |
+
# Global radial range
|
| 539 |
+
r_min = radar_df["Value"].min()
|
| 540 |
+
r_max = radar_df["Value"].max()
|
| 541 |
+
pad = 0.05 * (r_max - r_min if r_max > r_min else 1.0)
|
| 542 |
+
r_range = [r_min - pad, r_max + pad]
|
| 543 |
+
|
| 544 |
+
fig = go.Figure()
|
| 545 |
+
|
| 546 |
+
for model in radar_df["Model"].unique():
|
| 547 |
+
sub = radar_df[radar_df["Model"] == model]
|
| 548 |
+
|
| 549 |
+
# Ensure consistent axis ordering
|
| 550 |
+
sub = sub.set_index("Axis").reindex(axes)
|
| 551 |
+
|
| 552 |
+
fig.add_trace(
|
| 553 |
+
go.Scatterpolar(
|
| 554 |
+
r=sub["Value"],
|
| 555 |
+
theta=axes,
|
| 556 |
+
fill="toself",
|
| 557 |
+
name=model,
|
| 558 |
+
line_color=MODEL_COLORS.get(model),
|
| 559 |
+
opacity=0.75,
|
| 560 |
+
)
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
fig.update_layout(
|
| 564 |
+
height=height,
|
| 565 |
+
polar=dict(
|
| 566 |
+
bgcolor="rgba(0,0,0,0)", # ๐ polar background
|
| 567 |
+
radialaxis=dict(
|
| 568 |
+
title=metric_label,
|
| 569 |
+
range=r_range,
|
| 570 |
+
tickformat=".2f",
|
| 571 |
+
showgrid=True,
|
| 572 |
+
gridcolor="rgba(0,0,0,0.15)", # subtle grid
|
| 573 |
+
),
|
| 574 |
+
angularaxis=dict(
|
| 575 |
+
showgrid=True,
|
| 576 |
+
gridcolor="rgba(0,0,0,0.15)",
|
| 577 |
+
),
|
| 578 |
+
),
|
| 579 |
+
paper_bgcolor="rgba(0,0,0,0)", # ๐ entire figure background
|
| 580 |
+
plot_bgcolor="rgba(0,0,0,0)", # ๐ plot area
|
| 581 |
+
showlegend=True,
|
| 582 |
+
legend_title_text="Model",
|
| 583 |
+
margin=dict(t=40, b=40, l=40, r=40),
|
| 584 |
+
)
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
return fig
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
|
| 591 |
# ---------------------------------------------------------------------
|
| 592 |
# UI helpers
|
| 593 |
# ---------------------------------------------------------------------
|
|
|
|
| 630 |
|
| 631 |
# Species toggles, but only for species present in this benchmark
|
| 632 |
st.sidebar.subheader("Species")
|
| 633 |
+
|
| 634 |
+
# Toggle: Plants vs Animals
|
| 635 |
+
species_group = st.sidebar.radio(
|
| 636 |
+
"Group",
|
| 637 |
+
options=["Animals", "Plants"],
|
| 638 |
+
index=0,
|
| 639 |
+
horizontal=True,
|
| 640 |
+
key=f"species_group_{benchmark_name}",
|
| 641 |
+
)
|
| 642 |
+
|
| 643 |
+
available_species_all = sorted(df_bench["species"].unique())
|
| 644 |
+
allowed_species = set(SPECIES_GROUPS[species_group]).intersection(available_species_all)
|
| 645 |
+
available_species = sorted(allowed_species)
|
| 646 |
+
|
| 647 |
selected_species: List[str] = []
|
| 648 |
for sp in available_species:
|
| 649 |
+
if sidebar_toggle(sp, value=True, key=f"species_{benchmark_name}_{species_group}_{sp}"):
|
| 650 |
selected_species.append(sp)
|
| 651 |
|
| 652 |
+
# (Optional) If no species exist for that group in this benchmark
|
| 653 |
+
if not available_species:
|
| 654 |
+
st.sidebar.info(f"No {species_group.lower()} species available for this benchmark.")
|
| 655 |
+
|
| 656 |
+
|
| 657 |
# Assay toggles (Pearson only), based on filtered species
|
| 658 |
if cfg.get("has_assay_type", False):
|
| 659 |
st.sidebar.subheader("Assay types")
|
|
|
|
| 673 |
else:
|
| 674 |
selected_assays = []
|
| 675 |
|
| 676 |
+
|
| 677 |
# Bed track / dataset toggles (MCC only), based on species selection
|
| 678 |
selected_datasets: List[str] = []
|
| 679 |
+
if benchmark_name == "Genome Annotation":
|
| 680 |
st.sidebar.subheader("Genome annotations")
|
| 681 |
if selected_species:
|
| 682 |
df_for_tracks = df_bench[df_bench["species"].isin(selected_species)]
|
|
|
|
| 708 |
|
| 709 |
with col1:
|
| 710 |
st.subheader("๐
Leaderboard (per model)")
|
| 711 |
+
st.write("\n") # ๐ spacer to match plotly padding
|
| 712 |
+
st.write("\n")
|
| 713 |
+
st.write("\n")
|
| 714 |
if leaderboard_df.empty:
|
| 715 |
st.info("No data for the selected filters.")
|
| 716 |
else:
|
| 717 |
st.dataframe(leaderboard_df, use_container_width=True)
|
| 718 |
|
| 719 |
+
|
| 720 |
with col2:
|
| 721 |
st.subheader("๐ Mean score per model")
|
| 722 |
if bar_df.empty:
|
| 723 |
st.info("No data for the selected filters.")
|
| 724 |
else:
|
| 725 |
+
# Order models by performance (least -> most)
|
| 726 |
+
bar_df = bar_df.sort_values("Mean score", ascending=True)
|
| 727 |
+
|
| 728 |
+
model_order = bar_df["Model"].tolist()
|
| 729 |
+
|
| 730 |
fig = px.bar(
|
| 731 |
bar_df,
|
| 732 |
x="Model",
|
| 733 |
y="Mean score",
|
| 734 |
color="Model",
|
| 735 |
color_discrete_map=MODEL_COLORS,
|
| 736 |
+
category_orders={"Model": model_order}, # enforce ordering on x
|
| 737 |
)
|
| 738 |
fig.update_layout(
|
| 739 |
barmode="group",
|
| 740 |
height=500,
|
| 741 |
+
xaxis_title="",
|
| 742 |
yaxis_title="Mean score",
|
| 743 |
plot_bgcolor="rgba(0,0,0,0)",
|
| 744 |
paper_bgcolor="rgba(0,0,0,0)",
|
| 745 |
+
bargap=0.08,
|
| 746 |
)
|
| 747 |
+
|
| 748 |
+
# Hide x-axis model names (same style as the panels)
|
| 749 |
+
fig.update_xaxes(showticklabels=False)
|
| 750 |
+
|
| 751 |
st.plotly_chart(fig, use_container_width=True)
|
| 752 |
|
| 753 |
|
| 754 |
+
# --- Breakdown plot: assay_type (Functional Tracks) OR datasets (Genome Annotation) ---
|
| 755 |
+
breakdown_df = build_category_model_df(
|
| 756 |
+
benchmark_name, selected_species, selected_assays, selected_models, selected_datasets
|
| 757 |
+
)
|
| 758 |
+
|
| 759 |
+
st.subheader("๐งช Mean score by assay type / dataset (all models)")
|
| 760 |
+
if breakdown_df.empty:
|
| 761 |
+
st.info("No data for the selected filters.")
|
| 762 |
+
else:
|
| 763 |
+
fig_breakdown = plot_breakdown_facets_sorted_models(
|
| 764 |
+
breakdown_df,
|
| 765 |
+
metric_label=cfg["metric_label"],
|
| 766 |
+
height=300,
|
| 767 |
+
)
|
| 768 |
+
st.plotly_chart(fig_breakdown, use_container_width=True)
|
| 769 |
+
|
| 770 |
+
st.subheader("๐ธ๏ธ Performance by assay type / dataset (radar)")
|
| 771 |
+
radar_df = build_radar_df(
|
| 772 |
+
benchmark_name,
|
| 773 |
+
selected_species,
|
| 774 |
+
selected_assays,
|
| 775 |
+
selected_models,
|
| 776 |
+
selected_datasets,
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
if radar_df.empty:
|
| 780 |
+
st.info("No data for the selected filters.")
|
| 781 |
+
else:
|
| 782 |
+
fig_radar = plot_radar(
|
| 783 |
+
radar_df,
|
| 784 |
+
metric_label=cfg["metric_label"],
|
| 785 |
+
)
|
| 786 |
+
st.plotly_chart(fig_radar, use_container_width=True)
|
| 787 |
+
|
| 788 |
+
st.subheader("โ๏ธ Model comparison")
|
| 789 |
+
|
| 790 |
+
left, right = st.columns([1, 1], gap="large")
|
| 791 |
+
|
| 792 |
+
with left:
|
| 793 |
+
st.markdown("#### Head-to-head (per track)")
|
| 794 |
+
|
| 795 |
+
model_picker_options = selected_models if selected_models else _ALL_MODELS
|
| 796 |
+
default_a = model_picker_options[0] if model_picker_options else _ALL_MODELS[0]
|
| 797 |
+
default_b = model_picker_options[1] if len(model_picker_options) > 1 else (
|
| 798 |
+
_ALL_MODELS[1] if len(_ALL_MODELS) > 1 else default_a
|
| 799 |
+
)
|
| 800 |
+
|
| 801 |
+
cA, cB = st.columns([1, 1])
|
| 802 |
+
with cA:
|
| 803 |
+
model_a = st.selectbox(
|
| 804 |
+
"Model A (y-axis)",
|
| 805 |
+
options=model_picker_options,
|
| 806 |
+
index=model_picker_options.index(default_a) if default_a in model_picker_options else 0,
|
| 807 |
+
key=f"pair_model_a_{benchmark_name}",
|
| 808 |
+
)
|
| 809 |
+
with cB:
|
| 810 |
+
b_options = [m for m in model_picker_options if m != model_a] or model_picker_options
|
| 811 |
+
model_b = st.selectbox(
|
| 812 |
+
"Model B (x-axis)",
|
| 813 |
+
options=b_options,
|
| 814 |
+
index=0,
|
| 815 |
+
key=f"pair_model_b_{benchmark_name}",
|
| 816 |
+
)
|
| 817 |
+
|
| 818 |
+
scatter_df = build_pairwise_scatter_df(
|
| 819 |
+
benchmark_name,
|
| 820 |
+
selected_species,
|
| 821 |
+
selected_assays,
|
| 822 |
+
selected_models,
|
| 823 |
+
selected_datasets,
|
| 824 |
+
model_a,
|
| 825 |
+
model_b,
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
if scatter_df.empty:
|
| 829 |
+
st.info("No overlapping tracks for the selected filters (or one model is missing values).")
|
| 830 |
+
else:
|
| 831 |
+
min_v = float(min(scatter_df["Model A"].min(), scatter_df["Model B"].min()))
|
| 832 |
+
max_v = float(max(scatter_df["Model A"].max(), scatter_df["Model B"].max()))
|
| 833 |
+
pad = 0.05 * (max_v - min_v if max_v > min_v else 1.0)
|
| 834 |
+
axis_range = [min_v - pad, max_v + pad]
|
| 835 |
+
tick_step = (axis_range[1] - axis_range[0]) / 5
|
| 836 |
+
|
| 837 |
+
hover_cols = ["Track"]
|
| 838 |
+
for c in ["species", "assay_type", "datasets"]:
|
| 839 |
+
if c in scatter_df.columns:
|
| 840 |
+
hover_cols.append(c)
|
| 841 |
+
|
| 842 |
+
# Model A on Y, Model B on X
|
| 843 |
+
fig_scatter = px.scatter(
|
| 844 |
+
scatter_df,
|
| 845 |
+
x="Model B",
|
| 846 |
+
y="Model A",
|
| 847 |
+
hover_name="Track",
|
| 848 |
+
hover_data=hover_cols,
|
| 849 |
+
)
|
| 850 |
+
|
| 851 |
+
# Red diagonal y=x
|
| 852 |
+
fig_scatter.add_shape(
|
| 853 |
+
type="line",
|
| 854 |
+
x0=axis_range[0], y0=axis_range[0],
|
| 855 |
+
x1=axis_range[1], y1=axis_range[1],
|
| 856 |
+
xref="x", yref="y",
|
| 857 |
+
line=dict(color="red", dash="dot", width=2),
|
| 858 |
+
)
|
| 859 |
+
|
| 860 |
+
# Square + identical scale/ticks (works even with use_container_width=True)
|
| 861 |
+
fig_scatter.update_layout(
|
| 862 |
+
height=550,
|
| 863 |
+
margin=dict(l=60, r=20, t=20, b=60),
|
| 864 |
+
xaxis=dict(
|
| 865 |
+
title=f"{model_b} โ {cfg['metric_label']}",
|
| 866 |
+
range=axis_range,
|
| 867 |
+
dtick=tick_step,
|
| 868 |
+
constrain="domain",
|
| 869 |
+
),
|
| 870 |
+
yaxis=dict(
|
| 871 |
+
title=f"{model_a} โ {cfg['metric_label']}",
|
| 872 |
+
range=axis_range,
|
| 873 |
+
dtick=tick_step,
|
| 874 |
+
scaleanchor="x", # lock y to x
|
| 875 |
+
scaleratio=1,
|
| 876 |
+
constrain="domain",
|
| 877 |
+
),
|
| 878 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 879 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 880 |
+
)
|
| 881 |
+
|
| 882 |
+
st.plotly_chart(fig_scatter, use_container_width=True)
|
| 883 |
+
|
| 884 |
+
with right:
|
| 885 |
+
st.markdown("#### All models (distribution across tracks)")
|
| 886 |
+
|
| 887 |
+
violin_df = build_violin_df(
|
| 888 |
+
benchmark_name,
|
| 889 |
+
selected_species,
|
| 890 |
+
selected_assays,
|
| 891 |
+
selected_models,
|
| 892 |
+
selected_datasets,
|
| 893 |
+
)
|
| 894 |
+
|
| 895 |
+
if violin_df.empty:
|
| 896 |
+
st.info("No data for the selected filters.")
|
| 897 |
+
else:
|
| 898 |
+
# Order models by median performance (least -> most)
|
| 899 |
+
model_order = (
|
| 900 |
+
violin_df
|
| 901 |
+
.groupby("Model")["Score"]
|
| 902 |
+
.median()
|
| 903 |
+
.sort_values(ascending=True)
|
| 904 |
+
.index
|
| 905 |
+
.tolist()
|
| 906 |
+
)
|
| 907 |
+
|
| 908 |
+
fig_violin = px.violin(
|
| 909 |
+
violin_df,
|
| 910 |
+
x="Model",
|
| 911 |
+
y="Score",
|
| 912 |
+
color="Model",
|
| 913 |
+
color_discrete_map=MODEL_COLORS,
|
| 914 |
+
box=True, # keep inner boxplot
|
| 915 |
+
points=False, # ๐ remove all dots
|
| 916 |
+
category_orders={"Model": model_order}, # ๐ enforce ordering
|
| 917 |
+
)
|
| 918 |
+
|
| 919 |
+
fig_violin.update_layout(
|
| 920 |
+
height=650,
|
| 921 |
+
xaxis_title="",
|
| 922 |
+
yaxis_title=cfg["metric_label"],
|
| 923 |
+
plot_bgcolor="rgba(0,0,0,0)",
|
| 924 |
+
paper_bgcolor="rgba(0,0,0,0)",
|
| 925 |
+
showlegend=False,
|
| 926 |
+
)
|
| 927 |
+
|
| 928 |
+
fig_violin.update_traces(
|
| 929 |
+
box_visible=True,
|
| 930 |
+
meanline_visible=False,
|
| 931 |
+
)
|
| 932 |
+
|
| 933 |
+
# Optional: hide model names if you prefer a cleaner look
|
| 934 |
+
# fig_violin.update_xaxes(showticklabels=False)
|
| 935 |
+
|
| 936 |
+
st.plotly_chart(fig_violin, use_container_width=True)
|
| 937 |
+
|
| 938 |
+
|
| 939 |
+
|
| 940 |
if __name__ == "__main__":
|
| 941 |
main()
|