Spaces:

kangshijia
/

NUPA-Performance

Sleeping

File size: 35,806 Bytes

import os
import json
import math
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
from dash import dcc, html, Input, Output, State, exceptions, no_update

# Assume name is defined, e.g., name = 'main'
try:
    name
except NameError:
    name = '__main__' # Define name if not running directly

app = dash.Dash(name)

# ================== 数据准备 (Data Preparation) ==================
# (Keep the existing data preparation code)
# Define the path relative to the script's location
# Use os.path.join for better cross-platform compatibility
base_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else '.'
results_dir = os.path.join(base_dir, "test_results_report")

file_names = [
    'GPT-4o_statistics.txt',
    'GPT-4o-mini_statistics.txt',
    'Llama-3.1-8B-ft_statistics.txt',
    'Llama-3.1-8B_statistics.txt',
    'Llama-3.1-70B_statistics.txt',
    'Mixtral-8x7B_statistics.txt',
    'Qwen2-72B_statistics.txt',
    'Qwen2-7B_statistics.txt',
    'Llama-2-7b-hf_statistics.txt',
    'Llama-2-7b-hf-5-shot_statistics.txt',
    'Llama-3.1-8B-5-shot_statistics.txt',
    'Llama-3.1-70B-5-shot_statistics.txt',
    'Mixtral-8x7B-v0.1-5-shot_statistics.txt',
    'Qwen2-7B-5-shot_statistics.txt',
    'Qwen2-72B-5-shot_statistics.txt',
]

# Ensure the directory and at least one file exists for key loading
if not os.path.exists(results_dir):
    print(f"Error: Results directory not found at {results_dir}")
    # Consider exiting or providing default keys if essential
    keys = [] # Provide empty keys as fallback
else:
    first_file_path = os.path.join(results_dir, file_names[0] if file_names else '')
    if not file_names or not os.path.exists(first_file_path):
        print(f"Warning: First statistics file not found or file_names list is empty.")
        keys = [] # Provide empty keys as fallback
    else:
        # 从一个文件获取全部key, 仅用于演示 (Get all keys from one file, for demo only)
        try:
            with open(first_file_path, "r") as f:
                results = json.load(f)
            # Ensure 'exact_match' exists before accessing keys
            if "exact_match" in results and isinstance(results["exact_match"], dict):
                 keys = list(results["exact_match"].keys())
            else:
                 print(f"Warning: 'exact_match' key not found or not a dictionary in {first_file_path}. Setting keys to empty.")
                 keys = []
        except Exception as e:
            print(f"Error loading keys from {first_file_path}: {e}")
            keys = [] # Provide empty keys on error


def load_data(file_name, main_metric="exact_match", r=(0, len(keys))):
    # --- Keep existing load_data function ---
    tasks = []
    well_learned_digit = []
    has_performance_digit = []
    in_domain = []
    out_domain = []
    short_range = []
    medium_range = []
    long_range = []
    very_long_range = []
    metrics_to_extract = [
        "well_learned_digit", "has_performance_digit", "in_domain", "out_domain",
        "short_range", "medium_range", "long_range", "very_long_range"
    ]

    file_path = os.path.join(results_dir, file_name)
    if not os.path.exists(file_path):
        print(f"Warning: File not found {file_path}, skipping.")
        # Return empty lists of the correct structure
        return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

    try:
        with open(file_path, "r") as f:
            stats = json.load(f)
    except Exception as e:
        print(f"Error reading or parsing {file_path}: {e}")
        # Return empty lists of the correct structure
        return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

    if main_metric not in stats:
        print(f"Warning: Metric '{main_metric}' not found in {file_name}, skipping.")
        return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

    stats_exm = stats[main_metric]
    # Ensure stats_exm is a dictionary before proceeding
    if not isinstance(stats_exm, dict):
        print(f"Warning: Metric '{main_metric}' in {file_name} is not a dictionary. Skipping.")
        return tuple([[] for _ in range(len(metrics_to_extract) + 1)])


    # Check if keys exist before accessing
    valid_keys = [k for k in keys[r[0]:r[1]] if k in stats_exm]
    if len(valid_keys) < len(keys[r[0]:r[1]]):
         print(f"Warning: Some keys not found in {main_metric} of {file_name}")


    for key in valid_keys: # Use only valid keys
        try:
            words = key.split("_")
            if len(words) < 4: # Basic check for expected format
                 print(f"Warning: Unexpected key format '{key}' in {file_name}, skipping.")
                 continue
            domain_3 = words.pop()
            domain_2 = words.pop()
            domain_1 = words.pop()
            task = " ".join(list(map(str.capitalize, words)))
            tasks.append(f"{task}<br />{domain_1}")

            key_data = stats_exm[key]
            # Ensure key_data is a dictionary
            if not isinstance(key_data, dict):
                 print(f"Warning: Data for key '{key}' in {file_name} is not a dictionary. Appending zeros.")
                 well_learned_digit.append(0)
                 has_performance_digit.append(0)
                 in_domain.append(0)
                 out_domain.append(0)
                 short_range.append(0)
                 medium_range.append(0)
                 long_range.append(0)
                 very_long_range.append(0)
                 continue # Skip to next key


            # Safely append metrics, using None or 0 if a metric is missing for a specific key
            well_learned_digit.append(key_data.get("well_learned_digit", 0))
            has_performance_digit.append(key_data.get("has_performance_digit", 0))
            in_domain.append(key_data.get("in_domain", 0))
            out_domain.append(key_data.get("out_domain", 0))
            short_range.append(key_data.get("short_range", 0))
            medium_range.append(key_data.get("medium_range", 0))
            long_range.append(key_data.get("long_range", 0))
            very_long_range.append(key_data.get("very_long_range", 0))

        except Exception as e:
             print(f"Error processing key '{key}' in {file_name}: {e}")
             # Attempt to keep lists aligned by appending a default value, or skip the key entirely
             # For simplicity, let's skip if parsing fails badly
             if len(tasks) > len(short_range): # Check if appending failed mid-key
                 try:
                     tasks.pop() # Remove the task name if metrics failed
                 except IndexError:
                     pass # Handle case where tasks might be empty


    return (
        tasks, well_learned_digit, has_performance_digit,
        in_domain, out_domain,
        short_range, medium_range, long_range, very_long_range
    )


# 示例任务 (Example Tasks)
# (Keep existing task lists)
intTasks = [
    "Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy",
    "Digit Max","Digit Add","Get Digit","Length","Truediv","Floordiv","Mod",
    "Mod Easy","Count","Sig","To Scient"
]
floatTasks = [
    "Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy",
    "Digit Max","Digit Add","Get Digit","Length","To Scient"
]
fractionTasks = [
    "Add","Add Easy","Sub","Max","Multiply Hard","Multiply Easy","Truediv","To Float"
]
sciTasks = [
    "Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy","To Float"
]

# ================== 核心绘图函数 (Core Plotting Function) ==================
# (Keep the corrected plot function from the previous step)
def plot(main_metric, selected_files, selected_metrics, selected_tasks, r):
    colors = [
        "#2C6344","#5F9C61","#A4C97C","#61496D","#B092B6",
        "#CAC1D4","#308192","#E38D26","#F1CC74","#C74D26",
        "#5EA7B8","#AED2E2"
    ]
    colors.reverse()

    M = len(selected_files)
    T = len(selected_tasks)
    if M == 0 or T == 0 or not selected_metrics: # Added check for selected_metrics
        fig = go.Figure()
        fig.update_layout(
             title="Please select models, tasks, and at least one range.",
             xaxis={'visible': False},
             yaxis={'visible': False},
             annotations=[{
                'text': "No data to display. Check selections.",
                'xref': "paper",
                'yref': "paper",
                'showarrow': False,
                'font': {'size': 16}
            }]
         )
        return fig

    total_items = M * T

    # Moved metric labels definition here - it doesn't depend on loaded data
    metric_labels = { # For x-axis labels
        "short_range": "S",
        "medium_range": "M",
        "long_range": "L",
        "very_long_range": "XL",
        # Add other potential metrics if they can be selected and need labels
        "well_learned_digit": "WLD",
        "has_performance_digit": "HPD",
        "in_domain": "ID",
        "out_domain": "OD"
    }
    # Filter labels based on *actually selected* metrics (ranges in this case)
    selected_metric_labels = [metric_labels.get(m, m.replace('_',' ').title()) for m in selected_metrics] # Get labels for selected metrics

    # ------ 单行模式 (Single Row Mode) ------
    if total_items <= 32:
        fig = go.Figure()
        # Keep track of unique task labels added for x-axis update
        all_tasks_x_level1 = []

        for idx, file_name in enumerate(selected_files):
            data_tuple = load_data(file_name, main_metric=main_metric, r=r)
            # Unpack safely, ensuring enough elements exist
            if len(data_tuple) < 9: continue # Skip if data loading failed badly
            (
                tasks, well_learned_digit, has_performance_digit,
                in_domain, out_domain,
                short_range, medium_range, long_range, very_long_range
            ) = data_tuple

            # *** Define metric_vars HERE, AFTER load_data ***
            metric_vars = {
                "well_learned_digit": well_learned_digit,
                "has_performance_digit": has_performance_digit,
                "in_domain": in_domain,
                "out_domain": out_domain,
                "short_range": short_range,
                "medium_range": medium_range,
                "long_range": long_range,
                "very_long_range": very_long_range
            }

            tasks_new_x_level1 = []
            tasks_old_indices = [] # Keep track of original indices for data lookup
            performance = []

            for i, task in enumerate(tasks):
                if task in selected_tasks:
                    tasks_new_x_level1.extend([task] * len(selected_metrics))
                    tasks_old_indices.append(i) # Store index i
                    for sel_m in selected_metrics:
                         # Use the metric_vars map and the stored index i
                        metric_data_list = metric_vars.get(sel_m)
                        # Ensure metric_data_list is not None and index is valid
                        if metric_data_list is not None and i < len(metric_data_list):
                            performance.append(metric_data_list[i])
                        else:
                            # Append None or 0 if metric data is missing for this index/metric
                            performance.append(None)
                            print(f"Warning: Missing data for metric '{sel_m}' at index {i} in file {file_name}")


            if not tasks_new_x_level1: continue # Skip if no tasks matched for this file

             # Store unique tasks added in this trace for later x-axis update
            if idx == 0: # Only need tasks from the first file usually
                all_tasks_x_level1 = tasks_new_x_level1


            # Ensure performance list length matches x-axis length
            expected_len = len(tasks_new_x_level1)
            if len(performance) != expected_len:
                print(f"Warning: Length mismatch for {file_name}. X-axis: {expected_len}, Y-axis: {len(performance)}. Skipping trace.")
                continue

            # Ensure the second level of x-axis also matches
            x_level2 = selected_metric_labels * (len(tasks_new_x_level1) // len(selected_metric_labels))
            if len(tasks_new_x_level1) != len(x_level2):
                 print(f"Warning: X-axis level length mismatch for {file_name}. Level 1: {len(tasks_new_x_level1)}, Level 2: {len(x_level2)}. Skipping trace.")
                 continue


            fig.add_trace(go.Bar(
                x=[tasks_new_x_level1, x_level2 ],
                y=performance,
                name=file_name[:-15] if file_name.endswith('_statistics.txt') else file_name, # Safer name slicing
                marker_color=colors[idx % len(colors)],
                legendgroup=f"legend_{idx}", # Group legend items
                offsetgroup=f"group_{idx}"  # Group bars
            ))

        # Single row layout updates
        fig.update_layout(
            barmode='group',
            xaxis_tickangle=-45,
            template="ggplot2",
            autosize=True,
            height=450,
            xaxis=dict(showgrid=False, title='Task<br />Range', type='multicategory'), # Specify multicategory type
            yaxis=dict(title='Performance'),
            title=" ".join(list(map(str.capitalize, main_metric.split("_")))),
            margin=dict(l=60, r=40, t=80, b=100),
            legend_title_text='Models'
        )
        # Update x-axis ticks explicitly for multicategory if data exists
        if all_tasks_x_level1 and selected_metric_labels:
             unique_tasks = sorted(list(set(all_tasks_x_level1)), key=all_tasks_x_level1.index) # Get unique tasks in order
             tickvals_level1 = [task for task in unique_tasks]
             tickvals_level2 = [selected_metric_labels[0]] * len(unique_tasks) # Show only first metric label below task group
             # This approach might still be tricky, Plotly's auto-labeling is often better for multicat
             # Let's rely on Plotly's default multicategory labeling unless specific formatting is needed.
             pass # Remove explicit tick setting unless necessary


        return fig

    else:
        # ------ 多行模式 (Multi Row Mode) ------
        # (Keep the multi-row logic from the previous step, ensuring metric_vars is defined inside the inner loop)
        row_count = math.ceil(total_items / 32)
        row_height = 200
        gap_px = 100

        margin_top = 80
        margin_bottom = 80
        margin_left = 60
        margin_right = 40

        base_count = T // row_count
        remainder = T % row_count
        task_groups = []
        start_idx = 0
        for i in range(row_count):
            this_count = base_count + (1 if i < remainder else 0)
            group = selected_tasks[start_idx : start_idx + this_count]
            task_groups.append(group)
            start_idx += this_count

        task_groups = [group for group in task_groups if group]
        if not task_groups:
             fig = go.Figure()
             fig.update_layout(title="No tasks selected or data available for multi-row mode.")
             return fig
        row_count = len(task_groups)

        net_height = row_count * row_height + max(0, row_count - 1) * gap_px
        total_fig_height = net_height + margin_top + margin_bottom
        vertical_spacing = gap_px / net_height if net_height > 0 and row_count > 1 else 0
        row_fraction = row_height / net_height if net_height > 0 else 1
        row_heights = [row_fraction] * row_count

        fig = make_subplots(
            rows=row_count,
            cols=1,
            row_heights=row_heights if row_heights else None,
            vertical_spacing=vertical_spacing,
            subplot_titles=[f"Tasks Subset {i+1}" for i in range(row_count)],
            shared_xaxes=False
        )

        added_traces_info = {}

        for row_i, sub_tasks in enumerate(task_groups, start=1):
            if not sub_tasks: continue

            row_x_level1 = []
            row_x_level2 = []


            for idx, file_name in enumerate(selected_files):
                data_tuple = load_data(file_name, main_metric=main_metric, r=r)
                if len(data_tuple) < 9: continue
                (
                    tasks, well_learned_digit, has_performance_digit,
                    in_domain, out_domain,
                    short_range, medium_range, long_range, very_long_range
                ) = data_tuple

                # *** Define metric_vars HERE ***
                metric_vars = {
                    "well_learned_digit": well_learned_digit,
                    "has_performance_digit": has_performance_digit,
                    "in_domain": in_domain,
                    "out_domain": out_domain,
                    "short_range": short_range,
                    "medium_range": medium_range,
                    "long_range": long_range,
                    "very_long_range": very_long_range
                }


                tasks_new_x_level1 = []
                tasks_old_indices = []
                performance = []
                for i, task in enumerate(tasks):
                    if task in sub_tasks:
                        tasks_new_x_level1.extend([task] * len(selected_metrics))
                        tasks_old_indices.append(i)
                        for sel_m in selected_metrics:
                            metric_data_list = metric_vars.get(sel_m)
                            if metric_data_list is not None and i < len(metric_data_list):
                                performance.append(metric_data_list[i])
                            else:
                                performance.append(None)
                                print(f"Warning: Missing data for metric '{sel_m}' at index {i} in file {file_name} (Row {row_i})")


                if not tasks_new_x_level1: continue

                expected_len = len(tasks_new_x_level1)
                if len(performance) != expected_len:
                    print(f"Warning: Length mismatch for {file_name} (Row {row_i}). X: {expected_len}, Y: {len(performance)}. Skipping trace.")
                    continue

                x_level2 = selected_metric_labels * (len(tasks_new_x_level1) // len(selected_metric_labels))
                if len(tasks_new_x_level1) != len(x_level2):
                    print(f"Warning: X-axis level length mismatch for {file_name} (Row {row_i}). L1: {len(tasks_new_x_level1)}, L2: {len(x_level2)}. Skipping trace.")
                    continue

                if idx == 0:
                    row_x_level1 = tasks_new_x_level1
                    row_x_level2 = x_level2


                offset_group_name = f"group_{idx}"

                fig.add_trace(
                    go.Bar(
                        x=[tasks_new_x_level1, x_level2],
                        y=performance,
                        name=file_name[:-15] if file_name.endswith('_statistics.txt') else file_name,
                        marker_color=colors[idx % len(colors)],
                        showlegend=(row_i == 1),
                        legendgroup=f"legend_{idx}",
                        offsetgroup=offset_group_name
                    ),
                    row=row_i,
                    col=1
                )
                if row_i not in added_traces_info: added_traces_info[row_i] = set()
                added_traces_info[row_i].add(offset_group_name)

            if row_x_level1 and row_x_level2:
                 fig.update_xaxes(type='multicategory', row=row_i, col=1)


        # Multi-row layout updates
        fig.update_layout(
            barmode='group',
            template="ggplot2",
            autosize=False,
            width=None,
            height=total_fig_height,
            title=" ".join(list(map(str.capitalize, main_metric.split("_")))),
            margin=dict(l=margin_left, r=margin_right, t=margin_top, b=margin_bottom),
            legend_title_text='Models'
        )

        for i in range(row_count):
            is_last_row = (i == row_count - 1)
            fig.update_xaxes(
                showgrid=False,
                tickangle=-45 if is_last_row else -30,
                title_text='Task<br />Range' if is_last_row else '',
                row=i+1,
                col=1
            )
            fig.update_yaxes(
                title_text='Performance',
                 row=i+1,
                 col=1
            )

    return fig


# ================== Styles ==================

SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "24rem",
    "padding": "2rem 1rem",
    "backgroundColor": "#FFFFFF",
    "boxShadow": "2px 0px 5px rgba(0,0,0,0.1)",
    "overflowY": "auto",
    "transition": "all 0.3s",
    "zIndex": 1000 # Sidebar itself
}

SIDEBAR_HIDDEN = {
    **SIDEBAR_STYLE,
    "left": "-24rem", # Move off-screen
    "padding": "2rem 0",
}

CONTENT_STYLE = {
    "marginLeft": "24rem",
    "padding": "2rem 2rem",
    "transition": "margin-left 0.3s",
    "backgroundColor": "#F7F7F7",
    "minHeight": "100vh",
}

CONTENT_STYLE_FULL = {
    **CONTENT_STYLE,
    "marginLeft": "0rem",
}

# --- NEW: Style for the fixed toggle button container ---
TOGGLE_BUTTON_STYLE = {
    "position": "fixed",
    "top": "10px",       # Position from top
    "left": "10px",      # Position from left
    "zIndex": 1001,      # Ensure it's above the sidebar
    "transition": "left 0.3s" # Optional: Animate button position slightly if needed
}

TOGGLE_BUTTON_STYLE_SHIFTED = { # Optional: Style when sidebar is open
    **TOGGLE_BUTTON_STYLE,
    "left": "calc(24rem + 10px)" # Position relative to open sidebar edge
    # Or keep it fixed at "10px" - simpler
}

# ================== Dash 布局 (Dash Layout) ==================
app.layout = html.Div(style={"margin": "0", "padding": "0"}, children=[

    # --- NEW: Fixed Toggle Button Container ---
    html.Div(
        id="toggle-button-container", # Give it an ID if you want to style it dynamically
        style=TOGGLE_BUTTON_STYLE, # Apply the fixed style
        children=[
            html.Button(
                "☰", # Use an icon/symbol for compactness
                id="sidebar-toggle",
                n_clicks=0,
                style={
                    "padding": "8px 12px",
                    "fontSize": "1.2em",
                    "backgroundColor": "#e9ecef",
                    "border": "1px solid #ccc",
                    "borderRadius": "5px",
                    "cursor": "pointer",
                    "boxShadow": "1px 1px 3px rgba(0,0,0,0.2)"
                },
                title="Toggle Sidebar" # Tooltip
             )
        ]
    ),

    # --- Sidebar ---
    html.Div(
        id="sidebar",
        style=SIDEBAR_STYLE, # Start open
        children=[
            html.H3("Controls", style={"textAlign": "center", "marginBottom": "1.5rem", "marginTop": "2rem"}), # Add margin top to avoid button overlap
            # --- REMOVED Button from here ---

            # (Keep all the control sections: Metric, Files, Tasks, Ranges)
            # Metric 选择单选框
            html.Div(
                style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
                children=[
                    html.Label("Select Metric:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
                    dcc.RadioItems(
                        id='metric-selector',
                        options=[
                            {'label': 'Exact Match', 'value': 'exact_match'},
                            {'label': 'Digit Match', 'value': 'digit_match'},
                            {'label': 'Dlength', 'value': 'dlength'}
                        ],
                        value='exact_match',
                        # inline=True, # Better stacked in sidebar
                        labelStyle={'display': 'block', 'marginBottom': '5px'},
                        style={"marginBottom": "10px"}
                    ),
                ]
            ),

            # 文件选择栏 (Model Selection)
            html.Div(
                style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
                children=[
                    html.Label("Select Models:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
                    dcc.Checklist(
                        id='file-selector-all-clear',
                        options=[
                            {'label': 'Select All', 'value': 'all'},
                            {'label': 'Clear', 'value': 'clear'},
                        ],
                        value=[],
                        inline=True,
                        style={"marginBottom": "5px"}
                    ),
                    dcc.Checklist(
                        id='file-selector',
                        options=[{'label': file_name[:-15] if file_name.endswith('_statistics.txt') else file_name, 'value': file_name} for file_name in file_names],
                        value=[ # Default selection
                            'GPT-4o_statistics.txt',
                            'Llama-3.1-8B-ft_statistics.txt',
                            'Mixtral-8x7B_statistics.txt',
                            'Qwen2-72B_statistics.txt'
                        ],
                        # inline=True, # Stacked looks better in narrow sidebar
                        labelStyle={'display': 'block', 'marginBottom': '3px'},
                        style={"maxHeight": "200px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px"} # Scrollable list
                    ),
                ]
            ),

            # 任务选择 (Task Selection)
            html.Div(
                style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
                 children=[
                     html.Label("Select Tasks:", style={"fontWeight": "bold", "display": "block", "marginBottom": "10px"}),

                     html.H5("Integer Tasks", style={"fontWeight": "bold", "marginTop": "10px"}),
                     dcc.Checklist( id='int-task-selector-all-clear', options=[ {'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
                     dcc.Checklist( id='int-task-selector', options=[{'label': task, 'value': task + '<br />Integer'} for task in intTasks], value=['Add<br />Integer'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

                     html.H5("Float Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
                     dcc.Checklist(id='float-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
                     dcc.Checklist( id='float-task-selector', options=[{'label': task, 'value': task + '<br />Float'} for task in floatTasks], value=['Add<br />Float'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

                     html.H5("Fraction Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
                     dcc.Checklist(id='fraction-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
                     dcc.Checklist( id='fraction-task-selector', options=[{'label': task, 'value': task + '<br />Fraction'} for task in fractionTasks], value=['Add<br />Fraction'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

                     html.H5("Scientific Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
                     dcc.Checklist(id='sci-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
                     dcc.Checklist( id='sci-task-selector', options=[{'label': task, 'value': task + '<br />ScientificNotation'} for task in sciTasks], value=['Add<br />ScientificNotation'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),
                 ]
            ),

             # Range 选择
            html.Div(
                style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
                children=[
                    html.Label("Select Ranges:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
                    dcc.Checklist(
                        id='metrics-selector', # Keep ID, though it selects ranges now
                        options=[
                            {'label': 'Short Range (S)', 'value': 'short_range'},
                            {'label': 'Medium Range (M)', 'value': 'medium_range'},
                            {'label': 'Long Range (L)', 'value': 'long_range'},
                            {'label': 'Very Long Range (XL)', 'value': 'very_long_range'}
                        ],
                        value=['short_range', 'medium_range', 'long_range', 'very_long_range'], # Default
                        # inline=True, # Stacked
                        labelStyle={'display': 'block', 'marginBottom': '5px'},
                        style={"marginBottom": "10px"}
                    ),
                ]
            ),
        ]
    ), # End Sidebar

    # --- Main Content Area ---
    html.Div(
        id="content",
        style=CONTENT_STYLE, # Start with margin for open sidebar
        children=[
             # Add padding top to content to prevent overlap with fixed button
            html.Div(style={"paddingTop": "50px"}, children=[
                html.H1(
                    "NUPA Performance",
                    style={"textAlign": "center", "marginBottom": "30px", "color": "#333"}
                ),
                # Graph Area
                html.Div(
                    style={
                        "backgroundColor": "#FFFFFF",
                        "padding": "20px",
                        "borderRadius": "8px",
                        "boxShadow": "0 1px 4px rgba(0,0,0,0.1)",
                        "marginBottom": "20px",
                    },
                    children=[
                        dcc.Loading(
                            id="loading-graph",
                            type="circle",
                            children=dcc.Graph(
                                id='performance-plot',
                                style={"width": "100%", "height": "auto"}
                             )
                        )
                    ]
                )
            ]) # End padded content div
        ]
    ) # End Content Area
])


# ================== Callbacks ==================

# --- Callback: Plotting (Keep existing) ---
@app.callback(
    Output('performance-plot', 'figure'),
    Input('metric-selector', 'value'),
    Input('file-selector', 'value'),
    Input('int-task-selector', 'value'),
    Input('float-task-selector', 'value'),
    Input('fraction-task-selector', 'value'),
    Input('sci-task-selector', 'value'),
    Input('metrics-selector', 'value') # Input is the range selector now
)
def update_figure(main_metric, selected_files, selected_int_tasks,
                  selected_float_tasks, selected_fraction_tasks,
                  selected_sci_tasks, selected_ranges): # Renamed variable
    selected_tasks = selected_int_tasks + selected_float_tasks + selected_fraction_tasks + selected_sci_tasks
    r = (0, len(keys)) # Use all keys by default now, adjust if needed
    # Pass selected_ranges instead of a fixed list
    return plot(main_metric, selected_files, selected_ranges, selected_tasks, r)


# --- Callback: Sidebar Toggle (No change needed in logic) ---
@app.callback(
    [Output("sidebar", "style"), Output("content", "style")],
    [Input("sidebar-toggle", "n_clicks")],
    [State("sidebar", "style"), State("content", "style")] # Use content style to check state
)
def toggle_sidebar(n, current_sidebar_style, current_content_style):
    if n is None or n == 0:
        return no_update, no_update

    # Check if sidebar is currently open based on content margin
    if current_content_style.get("marginLeft") == "24rem": # If sidebar is currently open
        new_sidebar_style = SIDEBAR_HIDDEN
        new_content_style = CONTENT_STYLE_FULL
    else: # If sidebar is currently hidden
        new_sidebar_style = SIDEBAR_STYLE
        new_content_style = CONTENT_STYLE
    return new_sidebar_style, new_content_style

# --- Callbacks: "Select All / Clear" (Keep existing) ---
def make_task_selector_callback(output_id, output_all_clear_id, task_list, task_suffix):
    @app.callback(
        [Output(output_id, 'value'),
         Output(output_all_clear_id, 'value')],
        Input(output_all_clear_id, 'value'),
    )
    def update_task_selector(all_clear_value):
        if not all_clear_value:
            raise exceptions.PreventUpdate
        trigger = all_clear_value[-1]
        if 'clear' == trigger:
            return [], []
        elif 'all' == trigger:
            all_values = [task + task_suffix for task in task_list]
            return all_values, []
        raise exceptions.PreventUpdate
    return update_task_selector

update_int_selector = make_task_selector_callback('int-task-selector', 'int-task-selector-all-clear', intTasks, '<br />Integer')
update_float_selector = make_task_selector_callback('float-task-selector', 'float-task-selector-all-clear', floatTasks, '<br />Float')
update_fraction_selector = make_task_selector_callback('fraction-task-selector', 'fraction-task-selector-all-clear', fractionTasks, '<br />Fraction')
update_sci_selector = make_task_selector_callback('sci-task-selector', 'sci-task-selector-all-clear', sciTasks, '<br />ScientificNotation')

# --- File Selector All/Clear ---
@app.callback(
    [Output('file-selector', 'value'),
     Output('file-selector-all-clear', 'value')],
    Input('file-selector-all-clear', 'value'),
)
def update_file_selector(all_clear_value):
    if not all_clear_value:
        raise exceptions.PreventUpdate
    trigger = all_clear_value[-1]
    if 'clear' == trigger:
        return [], []
    elif 'all' == trigger:
        return file_names, []
    raise exceptions.PreventUpdate


# ================== Run App ==================
# (Keep existing run code)
if name == '__main__':
    if not os.path.isdir(results_dir):
        print(f"Error: The directory '{results_dir}' does not exist.")
        print("Please create it and place the necessary statistics.txt files inside.")
    elif not keys:
         print(f"Warning: Could not load keys. Functionality might be limited. Check data files in {results_dir}")
    else:
        print(f"Looking for data files in: {os.path.abspath(results_dir)}")

    # Only run if keys were loaded (or decide how to handle empty keys)
    # if keys:
    app.run(debug=True, host='0.0.0.0', port=7860)
    # else:
    #     print("Exiting: Cannot run app without valid keys loaded.")