Spaces:

EmbodiedCity
/

iWorld-Bench

Running

App Files Files Community

iWorldBench commited on Mar 1

Commit

4097ba4

0 Parent(s):

Initial commit: iWorld-Bench leaderboard with full code and data

Browse files

Files changed (11) hide show

.gitignore +8 -0
app.py +229 -0
data/results.csv +15 -0
requirements.txt +6 -0
src/__init__.py +0 -0
src/data_loader.py +103 -0
src/leaderboard.py +68 -0
src/plotter.py +54 -0
src/radar_plotter.py +52 -0
src/styling.py +128 -0
src/utils.py +31 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+bench/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.vscode/
+.idea/
+.DS_Store

app.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""iWorld-Bench Leaderboard - Hugging Face Space"""
+from typing import Optional, List
+import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+from src.data_loader import DataLoader
+from src.leaderboard import Leaderboard
+from src.plotter import Plotter
+from src.radar_plotter import RadarPlotter
+from src.styling import dataframe_to_html, get_academic_css
+from src.utils import get_metric_choices, clean_metric_names
+# Initialize components
+data_loader = DataLoader(results_dir="./data")
+leaderboard = Leaderboard(data_loader)
+plotter = Plotter(data_loader)
+radar_plotter = RadarPlotter(data_loader)
+DEFAULT_METRIC = "Average ⭐"
+def reload_data():
+    msg = data_loader.reload_data()
+    if data_loader.df_all is None or data_loader.df_all.empty:
+        dummy_fig, ax = plt.subplots(figsize=(6, 3))
+        ax.text(0.5, 0.5, msg, ha="center", va="center")
+        ax.axis("off")
+        placeholder_html = "<div class='placeholder'>No data available</div>"
+        return msg, gr.update(choices=["All"], value="All"), \
+               gr.update(choices=["All"], value="All"), \
+               gr.update(choices=["All"], value="All"), \
+               placeholder_html, dummy_fig
+    open_source_choices = data_loader.get_open_source_choices()
+    year_choices = data_loader.get_year_choices()
+    category_choices = data_loader.get_category_choices()
+    all_metrics_with_markers = [m for m in get_metric_choices() if m != "Average ⭐"]
+    table_df = leaderboard.update_leaderboard(
+        metric="Average",
+        top_k=25,
+        model_filter="",
+        open_source_filter="All",
+        year_filter="All",
+        category_filter="All",
+        sort_mode="Auto",
+        selected_metrics=clean_metric_names(all_metrics_with_markers),
+    )
+    radar_fig = radar_plotter.create_radar_chart()
+    html_table = dataframe_to_html(table_df)
+    return msg, \
+           gr.update(choices=open_source_choices, value="All"), \
+           gr.update(choices=year_choices, value="All"), \
+           gr.update(choices=category_choices, value="All"), \
+           html_table, radar_fig
+def update_leaderboard_wrapper(metric, top_k, model_filter, open_source_filter,
+                               year_filter, category_filter, sort_mode, selected_metrics):
+    clean_metric = clean_metric_names([metric])[0]
+    clean_selected_metrics = clean_metric_names(selected_metrics)
+    table_df = leaderboard.update_leaderboard(
+        clean_metric, top_k, model_filter, open_source_filter,
+        year_filter, category_filter, sort_mode, clean_selected_metrics
+    )
+    displayed_models = table_df["Model"].tolist() if not table_df.empty else []
+    if displayed_models and data_loader.df_all is not None:
+        radar_df = data_loader.df_all[data_loader.df_all["Model"].isin(displayed_models)].copy()
+    else:
+        radar_df = pd.DataFrame()
+    radar_fig = radar_plotter.create_radar_chart(radar_df)
+    html_table = dataframe_to_html(table_df)
+    return html_table, radar_fig
+def create_comparison_plot_wrapper(model_filter, open_source_filter, year_filter,
+                                  category_filter, selected_plot_metric, plot_sort_mode):
+    clean_metric = clean_metric_names([selected_plot_metric])[0]
+    return plotter.create_comparison_plot(model_filter, open_source_filter,
+                                         year_filter, category_filter,
+                                         clean_metric, plot_sort_mode)
+# Define CSS once (outside the main block to be used in Blocks)
+academic_css = get_academic_css()
+with gr.Blocks(css=academic_css) as demo:
+    gr.Markdown(
+        """
+# <span class="emoji">🌍</span> iWorld-Bench Leaderboard
+<span class="subtitle">A Benchmark for Interactive World Models with a Unified Action Generation Framework</span>
+**[📄 Paper](https://arxiv.org/abs/xxx) | [💻 Code](https://github.com/xxx/iworld-bench) | [🌐 Website](https://xxx.github.io/iworld-bench)**
+        """,
+        elem_id="title"
+    )
+    status_box = gr.Markdown("Loading results...", elem_id="status")
+    with gr.Row():
+        with gr.Column(scale=2):
+            metric_choices = get_metric_choices()
+            metric_dropdown = gr.Dropdown(
+                label="Primary Ranking Metric",
+                choices=metric_choices,
+                value=DEFAULT_METRIC,
+                interactive=True,
+            )
+        with gr.Column(scale=1):
+            sort_mode_radio = gr.Radio(
+                label="Sort Order",
+                choices=["Auto", "Ascending (low → high)", "Descending (high → low)"],
+                value="Auto",
+                interactive=True,
+            )
+            topk_slider = gr.Slider(
+                label="Display Top-K Models",
+                minimum=3, maximum=50, value=25, step=1,
+                interactive=True,
+            )
+    with gr.Row():
+        metrics_select = gr.CheckboxGroup(
+            label="Additional Metrics to Display (📊 indicates dimension metrics)",
+            choices=[m for m in metric_choices if m != "Average ⭐"],
+            value=[m for m in metric_choices if m != "Average ⭐"],
+            interactive=True,
+        )
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_filter_box = gr.Textbox(
+                label="Filter by Model Name",
+                placeholder="Enter model name (partial match)",
+                interactive=True,
+            )
+        with gr.Column(scale=1):
+            open_source_dropdown = gr.Dropdown(
+                label="Filter by Open Source",
+                choices=["All"],
+                value="All",
+                interactive=True,
+            )
+        with gr.Column(scale=1):
+            year_dropdown = gr.Dropdown(
+                label="Filter by Year",
+                choices=["All"],
+                value="All",
+                interactive=True,
+            )
+        with gr.Column(scale=1):
+            category_dropdown = gr.Dropdown(
+                label="Filter by Category",
+                choices=["All"],
+                value="All",
+                interactive=True,
+            )
+    with gr.Row():
+        reload_button = gr.Button("🔄 Reload Data", variant="secondary", size="sm")
+        update_button = gr.Button("✅ Update Leaderboard", variant="primary", size="sm")
+    leaderboard_html = gr.HTML(
+        label="Leaderboard Table",
+        value="<div class='placeholder'>Leaderboard will be displayed here...</div>"
+    )
+    with gr.Row():
+        radar_plot = gr.Plot(label="Dimension Radar Chart", format="png")
+    with gr.Row():
+        with gr.Column(scale=2):
+            plot_metric_radio = gr.Radio(
+                label="Select Metric for Comparison Plot",
+                choices=metric_choices,
+                value=DEFAULT_METRIC,
+                interactive=True,
+            )
+        with gr.Column(scale=1):
+            plot_sort_radio = gr.Radio(
+                label="Plot Sort Order",
+                choices=["Ascending (low → high)", "Descending (high → low)"],
+                value="Descending (high → low)",
+                interactive=True,
+            )
+            plot_update_button = gr.Button("📊 Generate Comparison Plot", variant="primary", size="sm")
+    comparison_plot = gr.Plot(label="Model Comparison Visualization", format="png")
+    reload_button.click(
+        fn=reload_data,
+        inputs=[],
+        outputs=[status_box, open_source_dropdown, year_dropdown, category_dropdown, leaderboard_html, radar_plot],
+    )
+    update_button.click(
+        fn=update_leaderboard_wrapper,
+        inputs=[
+            metric_dropdown, topk_slider, model_filter_box,
+            open_source_dropdown, year_dropdown, category_dropdown,
+            sort_mode_radio, metrics_select,
+        ],
+        outputs=[leaderboard_html, radar_plot],
+    )
+    plot_update_button.click(
+        fn=create_comparison_plot_wrapper,
+        inputs=[
+            model_filter_box, open_source_dropdown, year_dropdown, category_dropdown,
+            plot_metric_radio, plot_sort_radio,
+        ],
+        outputs=[comparison_plot],
+    )
+    demo.load(
+        fn=reload_data,
+        inputs=[],
+        outputs=[status_box, open_source_dropdown, year_dropdown, category_dropdown, leaderboard_html, radar_plot],
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+    )

data/results.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+Model,Category,Average,Image Quality,Brightness Consistency,Color Temperature,Sharpness Retention,Motion Smoothness,Trajectory Accuracy,Memory Symmetry,Trajectory Alignment,Year
+NVIDIA Cosmos,Text-Conditioned,0.6275,0.6778,0.6952,0.7170,0.4363,0.9907,0.4955,0.3738,0.6419,2024
+HunyuanVideo-1.5,Text-Conditioned,0.7188,0.7128,0.7027,0.7477,0.5545,0.9908,0.6844,0.6336,0.6449,2024
+WAN 2.2,Text-Conditioned,0.5731,0.5545,0.3886,0.3411,0.3428,0.9557,0.6514,0.4480,0.5703,2024
+CogVideoX-I2V,Text-Conditioned,0.6963,0.6521,0.8988,0.8129,0.7951,0.9938,0.5950,0.6010,0.4084,2024
+YUME 1.5,Text-Conditioned,0.6209,0.6232,0.3810,0.4165,0.4023,0.9765,0.7113,0.5276,0.5988,2024
+Matrix-game 2.0,One-hot,0.5663,0.4851,0.2963,0.2937,0.4149,0.9848,0.7008,0.3311,0.6362,2024
+HY-World 1.5,One-hot,0.7873,0.6675,0.8051,0.7819,0.6634,0.9921,0.7472,0.8481,0.6776,2024
+CameraCtrl,Intrinsics/Extrinsics,0.5762,0.4473,0.3717,0.2511,0.4545,0.9796,0.6778,0.4279,0.6097,2024
+MotionCtrl,Intrinsics/Extrinsics,0.5486,0.4562,0.3980,0.2012,0.4294,0.9735,0.6730,0.3098,0.5932,2024
+CamI2V,Intrinsics/Extrinsics,0.5765,0.5284,0.4343,0.3568,0.4297,0.9861,0.6314,0.3631,0.6038,2024
+RealCam-I2V,Intrinsics/Extrinsics,0.6865,0.6227,0.4130,0.5547,0.6269,0.9860,0.5630,0.7948,0.6668,2024
+videox-fun-Wan,Intrinsics/Extrinsics,0.7474,0.6410,0.5972,0.5473,0.5998,0.9858,0.7172,0.9009,0.6876,2024
+AC3D,Intrinsics/Extrinsics,0.7149,0.4573,0.7307,0.6524,0.5332,0.9919,0.5785,0.9068,0.6250,2024
+ASTRA,Intrinsics/Extrinsics,0.5980,0.5335,0.5091,0.4338,0.5488,0.9799,0.6115,0.4323,0.5518,2024

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+huggingface-hub>=0.20.0
+pandas>=2.0.0
+matplotlib>=3.7.0
+numpy>=1.24.0
+plotly>=5.0.0

src/__init__.py ADDED Viewed

File without changes

src/data_loader.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import pandas as pd
+import os
+from typing import Optional, List
+class DataLoader:
+    def __init__(self, results_dir: str = "./data"):
+        self.results_dir = results_dir
+        self.df_all: Optional[pd.DataFrame] = None
+        self.reload_data()
+    def reload_data(self) -> str:
+        csv_path = os.path.join(self.results_dir, "results.csv")
+        if not os.path.exists(csv_path):
+            self._create_sample_data(csv_path)
+        try:
+            self.df_all = pd.read_csv(csv_path)
+            # Ensure numeric columns are float
+            numeric_cols = self.df_all.select_dtypes(include=['float64', 'int64']).columns
+            for col in numeric_cols:
+                self.df_all[col] = pd.to_numeric(self.df_all[col], errors='coerce')
+            return f"✅ Loaded {len(self.df_all)} models from {csv_path}"
+        except Exception as e:
+            self.df_all = None
+            return f"❌ Error loading data: {str(e)}"
+    def _create_sample_data(self, path: str):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        data = {
+            "Model": [
+                "NVIDIA Cosmos", "HunyuanVideo-1.5", "WAN 2.2", "CogVideoX-I2V", "YUME 1.5",
+                "Matrix-game 2.0", "HY-World 1.5",
+                "CameraCtrl", "MotionCtrl", "CamI2V", "RealCam-I2V", "videox-fun-Wan", "AC3D", "ASTRA"
+            ],
+            "Category": [
+                "Text-Conditioned", "Text-Conditioned", "Text-Conditioned", "Text-Conditioned", "Text-Conditioned",
+                "One-hot", "One-hot",
+                "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics",
+                "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics", "Intrinsics/Extrinsics"
+            ],
+            "Average": [0.6275, 0.7188, 0.5731, 0.6963, 0.6209, 0.5663, 0.7873,
+                        0.5762, 0.5486, 0.5765, 0.6865, 0.7474, 0.7149, 0.5980],
+            "Image Quality": [0.6778, 0.7128, 0.5545, 0.6521, 0.6232, 0.4851, 0.6675,
+                              0.4473, 0.4562, 0.5284, 0.6227, 0.6410, 0.4573, 0.5335],
+            "Brightness Consistency": [0.6952, 0.7027, 0.3886, 0.8988, 0.3810, 0.2963, 0.8051,
+                                        0.3717, 0.3980, 0.4343, 0.4130, 0.5972, 0.7307, 0.5091],
+            "Color Temperature": [0.7170, 0.7477, 0.3411, 0.8129, 0.4165, 0.2937, 0.7819,
+                                  0.2511, 0.2012, 0.3568, 0.5547, 0.5473, 0.6524, 0.4338],
+            "Sharpness Retention": [0.4363, 0.5545, 0.3428, 0.7951, 0.4023, 0.4149, 0.6634,
+                                    0.4545, 0.4294, 0.4297, 0.6269, 0.5998, 0.5332, 0.5488],
+            "Motion Smoothness": [0.9907, 0.9908, 0.9557, 0.9938, 0.9765, 0.9848, 0.9921,
+                                  0.9796, 0.9735, 0.9861, 0.9860, 0.9858, 0.9919, 0.9799],
+            "Trajectory Accuracy": [0.4955, 0.6844, 0.6514, 0.5950, 0.7113, 0.7008, 0.7472,
+                                    0.6778, 0.6730, 0.6314, 0.5630, 0.7172, 0.5785, 0.6115],
+            "Memory Symmetry": [0.3738, 0.6336, 0.4480, 0.6010, 0.5276, 0.3311, 0.8481,
+                                0.4279, 0.3098, 0.3631, 0.7948, 0.9009, 0.9068, 0.4323],
+            "Trajectory Alignment": [0.6419, 0.6449, 0.5703, 0.4084, 0.5988, 0.6362, 0.6776,
+                                     0.6097, 0.5932, 0.6038, 0.6668, 0.6876, 0.6250, 0.5518],
+            "Year": [2024] * 14
+        }
+        df = pd.DataFrame(data)
+        df.to_csv(path, index=False)
+        print(f"Created sample data at {path}")
+    def get_open_source_choices(self) -> List[str]:
+        if self.df_all is None:
+            return ["All"]
+        if "Open Source" not in self.df_all.columns:
+            return ["All"]
+        choices = ["All"] + sorted(self.df_all["Open Source"].dropna().unique().tolist())
+        return choices
+    def get_year_choices(self) -> List[str]:
+        if self.df_all is None:
+            return ["All"]
+        if "Year" not in self.df_all.columns:
+            return ["All"]
+        choices = ["All"] + sorted(self.df_all["Year"].dropna().unique().tolist(), reverse=True)
+        return choices
+    def get_category_choices(self) -> List[str]:
+        if self.df_all is None:
+            return ["All"]
+        if "Category" not in self.df_all.columns:
+            return ["All"]
+        choices = ["All"] + sorted(self.df_all["Category"].dropna().unique().tolist())
+        return choices
+    def filter_data(self, model_filter: str = "", open_source_filter: str = "All",
+                   year_filter: str = "All", category_filter: str = "All") -> pd.DataFrame:
+        if self.df_all is None:
+            return pd.DataFrame()
+        df = self.df_all.copy()
+        if model_filter:
+            df = df[df["Model"].str.contains(model_filter, case=False, na=False)]
+        if open_source_filter != "All" and "Open Source" in df.columns:
+            df = df[df["Open Source"] == open_source_filter]
+        if year_filter != "All" and "Year" in df.columns:
+            df = df[df["Year"] == int(year_filter)]
+        if category_filter != "All" and "Category" in df.columns:
+            df = df[df["Category"] == category_filter]
+        return df

src/leaderboard.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pandas as pd
+from typing import List, Optional
+from .data_loader import DataLoader
+class Leaderboard:
+    def __init__(self, data_loader: DataLoader):
+        self.data_loader = data_loader
+    def update_leaderboard(self, metric: str = "Average", top_k: int = 25,
+                          model_filter: str = "", open_source_filter: str = "All",
+                          year_filter: str = "All", category_filter: str = "All",
+                          sort_mode: str = "Auto",
+                          selected_metrics: Optional[List[str]] = None) -> pd.DataFrame:
+        df = self.data_loader.filter_data(model_filter, open_source_filter,
+                                         year_filter, category_filter)
+        if df.empty:
+            return pd.DataFrame()
+        if sort_mode == "Auto":
+            ascending = False
+        elif sort_mode == "Ascending (low → high)":
+            ascending = True
+        else:
+            ascending = False
+        if metric in df.columns:
+            df = df.sort_values(by=metric, ascending=ascending)
+        df = df.head(top_k).reset_index(drop=True)
+        df.insert(0, "Rank", range(1, len(df) + 1))
+        base_cols = ["Rank", "Model", "Category"]
+        if selected_metrics is None:
+            selected_metrics = ["Average"]
+        display_cols = base_cols.copy()
+        for m in selected_metrics:
+            if m in df.columns and m not in display_cols:
+                display_cols.append(m)
+        # Add optional link columns if they exist
+        link_cols = []
+        if "Paper" in df.columns:
+            link_cols.append("Paper")
+        if "Code" in df.columns:
+            link_cols.append("Code")
+        display_cols.extend(link_cols)
+        result_df = df[display_cols].copy()
+        # Format numeric values
+        for col in result_df.columns:
+            if col not in ["Rank", "Model", "Category", "Paper", "Code", "Open Source", "Year"]:
+                result_df[col] = result_df[col].apply(
+                    lambda x: f"{x:.4f}" if pd.notna(x) and isinstance(x, (int, float)) else "-"
+                )
+        # Create hyperlinks if columns exist
+        if "Paper" in result_df.columns:
+            result_df["Paper"] = result_df["Paper"].apply(
+                lambda x: f'<a href="{x}" target="_blank">📄</a>' if pd.notna(x) and x != "-" else "-"
+            )
+        if "Code" in result_df.columns:
+            result_df["Code"] = result_df["Code"].apply(
+                lambda x: f'<a href="{x}" target="_blank">💻</a>' if pd.notna(x) and x != "-" else "-"
+            )
+        return result_df

src/plotter.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+from typing import Optional
+from .data_loader import DataLoader
+class Plotter:
+    def __init__(self, data_loader: DataLoader):
+        self.data_loader = data_loader
+    def create_comparison_plot(self, model_filter: str = "",
+                              open_source_filter: str = "All",
+                              year_filter: str = "All",
+                              category_filter: str = "All",
+                              metric: str = "Average",
+                              sort_mode: str = "Descending (high → low)") -> plt.Figure:
+        df = self.data_loader.filter_data(model_filter, open_source_filter,
+                                         year_filter, category_filter)
+        if df.empty or metric not in df.columns:
+            fig, ax = plt.subplots(figsize=(10, 6))
+            ax.text(0.5, 0.5, "No data available", ha="center", va="center", fontsize=14)
+            ax.axis("off")
+            return fig
+        ascending = sort_mode.startswith("Ascending")
+        df = df.sort_values(by=metric, ascending=ascending)
+        if len(df) > 20:
+            df = df.head(20)
+        fig, ax = plt.subplots(figsize=(12, max(6, len(df) * 0.4)))
+        colors = {
+            "Text-Conditioned": "#3b82f6",
+            "One-hot": "#10b981",
+            "Intrinsics/Extrinsics": "#f59e0b"
+        }
+        bar_colors = [colors.get(cat, "#6b7280") for cat in df["Category"]]
+        bars = ax.barh(df["Model"], df[metric], color=bar_colors, edgecolor="white", linewidth=0.5)
+        for bar, val in zip(bars, df[metric]):
+            width = bar.get_width()
+            ax.text(width + 0.01, bar.get_y() + bar.get_height()/2,
+                   f"{val:.4f}", ha="left", va="center", fontsize=9)
+        ax.set_xlabel(metric, fontsize=12, fontweight="bold")
+        ax.set_title(f"Model Comparison - {metric}", fontsize=14, fontweight="bold", pad=20)
+        ax.set_xlim(0, df[metric].max() * 1.15)
+        ax.grid(axis="x", alpha=0.3, linestyle="--")
+        from matplotlib.patches import Patch
+        legend_elements = [Patch(facecolor=color, label=cat)
+                          for cat, color in colors.items()
+                          if cat in df["Category"].values]
+        ax.legend(handles=legend_elements, loc="lower right", title="Category")
+        plt.tight_layout()
+        return fig

src/radar_plotter.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from typing import Optional, List
+from .data_loader import DataLoader
+from .utils import get_dimension_metrics
+class RadarPlotter:
+    def __init__(self, data_loader: DataLoader):
+        self.data_loader = data_loader
+        self.dimension_metrics = get_dimension_metrics()
+    def create_radar_chart(self, df: Optional[pd.DataFrame] = None,
+                          models: Optional[List[str]] = None) -> plt.Figure:
+        if df is None or df.empty:
+            df = self.data_loader.df_all.copy() if self.data_loader.df_all is not None else pd.DataFrame()
+        if df.empty:
+            fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection='polar'))
+            ax.text(0.5, 0.5, "No data available", ha="center", va="center", transform=ax.transAxes)
+            return fig
+        if len(df) > 8:
+            df = df.nlargest(8, "Average")
+        dimensions = list(self.dimension_metrics.keys())
+        for dim_name, metrics in self.dimension_metrics.items():
+            valid_metrics = [m for m in metrics if m in df.columns]
+            if valid_metrics:
+                df[dim_name] = df[valid_metrics].mean(axis=1)
+            else:
+                df[dim_name] = 0
+        angles = np.linspace(0, 2 * np.pi, len(dimensions), endpoint=False).tolist()
+        angles += angles[:1]
+        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))
+        colors = plt.cm.tab10(np.linspace(0, 1, len(df)))
+        for idx, (_, row) in enumerate(df.iterrows()):
+            values = [row.get(dim, 0) for dim in dimensions]
+            values += values[:1]
+            ax.plot(angles, values, 'o-', linewidth=2, label=row["Model"], color=colors[idx])
+            ax.fill(angles, values, alpha=0.1, color=colors[idx])
+        ax.set_xticks(angles[:-1])
+        ax.set_xticklabels(dimensions, fontsize=11)
+        ax.set_ylim(0, 1)
+        ax.set_title("Dimension Performance Radar", fontsize=14, fontweight="bold", pad=20)
+        ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0), fontsize=9)
+        ax.grid(True, linestyle='--', alpha=0.5)
+        plt.tight_layout()
+        return fig

src/styling.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import pandas as pd
+def get_academic_css() -> str:
+    return """
+    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
+    :root {
+        --primary: #2563eb;
+        --primary-dark: #1d4ed8;
+        --accent: #06b6d4;
+        --text-dark: #1a1a1a;
+        --text-gray: #4a4a4a;
+        --border: #e5e5e5;
+        --bg-light: #f8f9fa;
+        --success: #10b981;
+    }
+    body {
+        font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
+    }
+    .gradio-container {
+        max-width: 1400px !important;
+    }
+    h1 {
+        color: var(--text-dark) !important;
+        font-weight: 700 !important;
+    }
+    .subtitle {
+        color: var(--text-gray);
+        font-size: 1.1rem;
+        margin-top: 0.5rem;
+    }
+    .emoji {
+        font-size: 1.5em;
+    }
+    .leaderboard-table {
+        width: 100%;
+        border-collapse: collapse;
+        font-size: 0.9rem;
+        margin-top: 1rem;
+    }
+    .leaderboard-table th {
+        background: linear-gradient(135deg, #64748b 0%, #94a3b8 100%);
+        color: white;
+        padding: 12px 8px;
+        text-align: center;
+        font-weight: 600;
+        position: sticky;
+        top: 0;
+    }
+    .leaderboard-table td {
+        padding: 10px 8px;
+        text-align: center;
+        border-bottom: 1px solid var(--border);
+    }
+    .leaderboard-table tr:nth-child(even) {
+        background-color: #f8fafc;
+    }
+    .leaderboard-table tr:hover {
+        background-color: #f1f5f9;
+    }
+    .rank-1 { background: linear-gradient(135deg, #ffd700 0%, #ffed4a 100%) !important; font-weight: bold; }
+    .rank-2 { background: linear-gradient(135deg, #c0c0c0 0%, #e5e7eb 100%) !important; font-weight: bold; }
+    .rank-3 { background: linear-gradient(135deg, #cd7f32 0%, #fdba74 100%) !important; font-weight: bold; }
+    .best-score {
+        color: #2c7a7b;
+        font-weight: 700;
+        background: #e6fffa;
+    }
+    .category-tag {
+        display: inline-block;
+        padding: 2px 8px;
+        border-radius: 12px;
+        font-size: 0.8rem;
+        font-weight: 500;
+    }
+    .cat-text { background: #dbeafe; color: #1e40af; }
+    .cat-onehot { background: #d1fae5; color: #065f46; }
+    .cat-camera { background: #fef3c7; color: #92400e; }
+    .leaderboard-table a {
+        color: var(--primary);
+        text-decoration: none;
+        font-size: 1.2rem;
+    }
+    .leaderboard-table a:hover {
+        opacity: 0.7;
+    }
+    button.primary {
+        background: linear-gradient(135deg, var(--primary) 0%, var(--primary-dark) 100%) !important;
+    }
+    .status-success { color: var(--success); }
+    .status-error { color: #ef4444; }
+    """
+def dataframe_to_html(df: pd.DataFrame) -> str:
+    if df.empty:
+        return "<div class='placeholder'>No data available</div>"
+    html = ['<table class="leaderboard-table">']
+    html.append("<thead><tr>")
+    for col in df.columns:
+        html.append(f"<th>{col}</th>")
+    html.append("</tr></thead>")
+    html.append("<tbody>")
+    for idx, row in df.iterrows():
+        rank_class = ""
+        if "Rank" in df.columns:
+            rank = row["Rank"]
+            if rank == 1:
+                rank_class = "rank-1"
+            elif rank == 2:
+                rank_class = "rank-2"
+            elif rank == 3:
+                rank_class = "rank-3"
+        html.append(f'<tr class="{rank_class}">')
+        for col in df.columns:
+            val = row[col]
+            if col == "Category":
+                cat_class = {
+                    "Text-Conditioned": "cat-text",
+                    "One-hot": "cat-onehot",
+                    "Intrinsics/Extrinsics": "cat-camera"
+                }.get(val, "")
+                val = f'<span class="category-tag {cat_class}">{val}</span>'
+            html.append(f"<td>{val}</td>")
+        html.append("</tr>")
+    html.append("</tbody></table>")
+    return "".join(html)

src/utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import re
+def get_metric_choices():
+    """Return list of metric choices with emoji markers."""
+    return [
+        "Average ⭐",
+        "Image Quality 📊",
+        "Brightness Consistency 📊",
+        "Color Temperature 📊",
+        "Sharpness Retention 📊",
+        "Motion Smoothness 📊",
+        "Trajectory Accuracy 📊",
+        "Memory Symmetry 📊",
+        "Trajectory Alignment 📊",
+    ]
+def clean_metric_names(metrics):
+    """Remove emoji markers from metric names."""
+    cleaned = []
+    for m in metrics:
+        clean = m.replace(" ⭐", "").replace(" 📊", "").strip()
+        cleaned.append(clean)
+    return cleaned
+def get_dimension_metrics():
+    """Return mapping from dimension to list of metrics (for radar chart)."""
+    return {
+        "Generation Quality": ["Image Quality", "Brightness Consistency", "Color Temperature", "Sharpness Retention"],
+        "Trajectory Following": ["Motion Smoothness", "Trajectory Accuracy"],
+        "Memory Ability": ["Memory Symmetry", "Trajectory Alignment"]
+    }