Spaces:

kangshijia
/

NUPA-Performance

Sleeping

App Files Files Community

NUPA-Performance / app.py

NUPA-Anonymous

add sidebar

b6057fe 3 months ago

raw

history blame contribute delete

35.8 kB

	import os
	import json
	import math
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import dash
	from dash import dcc, html, Input, Output, State, exceptions, no_update

	# Assume name is defined, e.g., name = 'main'
	try:
	name
	except NameError:
	name = '__main__' # Define name if not running directly

	app = dash.Dash(name)

	# ================== 数据准备 (Data Preparation) ==================
	# (Keep the existing data preparation code)
	# Define the path relative to the script's location
	# Use os.path.join for better cross-platform compatibility
	base_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else '.'
	results_dir = os.path.join(base_dir, "test_results_report")

	file_names = [
	'GPT-4o_statistics.txt',
	'GPT-4o-mini_statistics.txt',
	'Llama-3.1-8B-ft_statistics.txt',
	'Llama-3.1-8B_statistics.txt',
	'Llama-3.1-70B_statistics.txt',
	'Mixtral-8x7B_statistics.txt',
	'Qwen2-72B_statistics.txt',
	'Qwen2-7B_statistics.txt',
	'Llama-2-7b-hf_statistics.txt',
	'Llama-2-7b-hf-5-shot_statistics.txt',
	'Llama-3.1-8B-5-shot_statistics.txt',
	'Llama-3.1-70B-5-shot_statistics.txt',
	'Mixtral-8x7B-v0.1-5-shot_statistics.txt',
	'Qwen2-7B-5-shot_statistics.txt',
	'Qwen2-72B-5-shot_statistics.txt',
	]

	# Ensure the directory and at least one file exists for key loading
	if not os.path.exists(results_dir):
	print(f"Error: Results directory not found at {results_dir}")
	# Consider exiting or providing default keys if essential
	keys = [] # Provide empty keys as fallback
	else:
	first_file_path = os.path.join(results_dir, file_names[0] if file_names else '')
	if not file_names or not os.path.exists(first_file_path):
	print(f"Warning: First statistics file not found or file_names list is empty.")
	keys = [] # Provide empty keys as fallback
	else:
	# 从一个文件获取全部key, 仅用于演示 (Get all keys from one file, for demo only)
	try:
	with open(first_file_path, "r") as f:
	results = json.load(f)
	# Ensure 'exact_match' exists before accessing keys
	if "exact_match" in results and isinstance(results["exact_match"], dict):
	keys = list(results["exact_match"].keys())
	else:
	print(f"Warning: 'exact_match' key not found or not a dictionary in {first_file_path}. Setting keys to empty.")
	keys = []
	except Exception as e:
	print(f"Error loading keys from {first_file_path}: {e}")
	keys = [] # Provide empty keys on error


	def load_data(file_name, main_metric="exact_match", r=(0, len(keys))):
	# --- Keep existing load_data function ---
	tasks = []
	well_learned_digit = []
	has_performance_digit = []
	in_domain = []
	out_domain = []
	short_range = []
	medium_range = []
	long_range = []
	very_long_range = []
	metrics_to_extract = [
	"well_learned_digit", "has_performance_digit", "in_domain", "out_domain",
	"short_range", "medium_range", "long_range", "very_long_range"
	]

	file_path = os.path.join(results_dir, file_name)
	if not os.path.exists(file_path):
	print(f"Warning: File not found {file_path}, skipping.")
	# Return empty lists of the correct structure
	return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

	try:
	with open(file_path, "r") as f:
	stats = json.load(f)
	except Exception as e:
	print(f"Error reading or parsing {file_path}: {e}")
	# Return empty lists of the correct structure
	return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

	if main_metric not in stats:
	print(f"Warning: Metric '{main_metric}' not found in {file_name}, skipping.")
	return tuple([[] for _ in range(len(metrics_to_extract) + 1)])

	stats_exm = stats[main_metric]
	# Ensure stats_exm is a dictionary before proceeding
	if not isinstance(stats_exm, dict):
	print(f"Warning: Metric '{main_metric}' in {file_name} is not a dictionary. Skipping.")
	return tuple([[] for _ in range(len(metrics_to_extract) + 1)])


	# Check if keys exist before accessing
	valid_keys = [k for k in keys[r[0]:r[1]] if k in stats_exm]
	if len(valid_keys) < len(keys[r[0]:r[1]]):
	print(f"Warning: Some keys not found in {main_metric} of {file_name}")


	for key in valid_keys: # Use only valid keys
	try:
	words = key.split("_")
	if len(words) < 4: # Basic check for expected format
	print(f"Warning: Unexpected key format '{key}' in {file_name}, skipping.")
	continue
	domain_3 = words.pop()
	domain_2 = words.pop()
	domain_1 = words.pop()
	task = " ".join(list(map(str.capitalize, words)))
	tasks.append(f"{task}<br />{domain_1}")

	key_data = stats_exm[key]
	# Ensure key_data is a dictionary
	if not isinstance(key_data, dict):
	print(f"Warning: Data for key '{key}' in {file_name} is not a dictionary. Appending zeros.")
	well_learned_digit.append(0)
	has_performance_digit.append(0)
	in_domain.append(0)
	out_domain.append(0)
	short_range.append(0)
	medium_range.append(0)
	long_range.append(0)
	very_long_range.append(0)
	continue # Skip to next key


	# Safely append metrics, using None or 0 if a metric is missing for a specific key
	well_learned_digit.append(key_data.get("well_learned_digit", 0))
	has_performance_digit.append(key_data.get("has_performance_digit", 0))
	in_domain.append(key_data.get("in_domain", 0))
	out_domain.append(key_data.get("out_domain", 0))
	short_range.append(key_data.get("short_range", 0))
	medium_range.append(key_data.get("medium_range", 0))
	long_range.append(key_data.get("long_range", 0))
	very_long_range.append(key_data.get("very_long_range", 0))

	except Exception as e:
	print(f"Error processing key '{key}' in {file_name}: {e}")
	# Attempt to keep lists aligned by appending a default value, or skip the key entirely
	# For simplicity, let's skip if parsing fails badly
	if len(tasks) > len(short_range): # Check if appending failed mid-key
	try:
	tasks.pop() # Remove the task name if metrics failed
	except IndexError:
	pass # Handle case where tasks might be empty


	return (
	tasks, well_learned_digit, has_performance_digit,
	in_domain, out_domain,
	short_range, medium_range, long_range, very_long_range
	)


	# 示例任务 (Example Tasks)
	# (Keep existing task lists)
	intTasks = [
	"Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy",
	"Digit Max","Digit Add","Get Digit","Length","Truediv","Floordiv","Mod",
	"Mod Easy","Count","Sig","To Scient"
	]
	floatTasks = [
	"Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy",
	"Digit Max","Digit Add","Get Digit","Length","To Scient"
	]
	fractionTasks = [
	"Add","Add Easy","Sub","Max","Multiply Hard","Multiply Easy","Truediv","To Float"
	]
	sciTasks = [
	"Add","Sub","Max","Max Hard","Multiply Hard","Multiply Easy","To Float"
	]

	# ================== 核心绘图函数 (Core Plotting Function) ==================
	# (Keep the corrected plot function from the previous step)
	def plot(main_metric, selected_files, selected_metrics, selected_tasks, r):
	colors = [
	"#2C6344","#5F9C61","#A4C97C","#61496D","#B092B6",
	"#CAC1D4","#308192","#E38D26","#F1CC74","#C74D26",
	"#5EA7B8","#AED2E2"
	]
	colors.reverse()

	M = len(selected_files)
	T = len(selected_tasks)
	if M == 0 or T == 0 or not selected_metrics: # Added check for selected_metrics
	fig = go.Figure()
	fig.update_layout(
	title="Please select models, tasks, and at least one range.",
	xaxis={'visible': False},
	yaxis={'visible': False},
	annotations=[{
	'text': "No data to display. Check selections.",
	'xref': "paper",
	'yref': "paper",
	'showarrow': False,
	'font': {'size': 16}
	}]
	)
	return fig

	total_items = M * T

	# Moved metric labels definition here - it doesn't depend on loaded data
	metric_labels = { # For x-axis labels
	"short_range": "S",
	"medium_range": "M",
	"long_range": "L",
	"very_long_range": "XL",
	# Add other potential metrics if they can be selected and need labels
	"well_learned_digit": "WLD",
	"has_performance_digit": "HPD",
	"in_domain": "ID",
	"out_domain": "OD"
	}
	# Filter labels based on actually selected metrics (ranges in this case)
	selected_metric_labels = [metric_labels.get(m, m.replace('_',' ').title()) for m in selected_metrics] # Get labels for selected metrics

	# ------ 单行模式 (Single Row Mode) ------
	if total_items <= 32:
	fig = go.Figure()
	# Keep track of unique task labels added for x-axis update
	all_tasks_x_level1 = []

	for idx, file_name in enumerate(selected_files):
	data_tuple = load_data(file_name, main_metric=main_metric, r=r)
	# Unpack safely, ensuring enough elements exist
	if len(data_tuple) < 9: continue # Skip if data loading failed badly
	(
	tasks, well_learned_digit, has_performance_digit,
	in_domain, out_domain,
	short_range, medium_range, long_range, very_long_range
	) = data_tuple

	# * Define metric_vars HERE, AFTER load_data *
	metric_vars = {
	"well_learned_digit": well_learned_digit,
	"has_performance_digit": has_performance_digit,
	"in_domain": in_domain,
	"out_domain": out_domain,
	"short_range": short_range,
	"medium_range": medium_range,
	"long_range": long_range,
	"very_long_range": very_long_range
	}

	tasks_new_x_level1 = []
	tasks_old_indices = [] # Keep track of original indices for data lookup
	performance = []

	for i, task in enumerate(tasks):
	if task in selected_tasks:
	tasks_new_x_level1.extend([task] * len(selected_metrics))
	tasks_old_indices.append(i) # Store index i
	for sel_m in selected_metrics:
	# Use the metric_vars map and the stored index i
	metric_data_list = metric_vars.get(sel_m)
	# Ensure metric_data_list is not None and index is valid
	if metric_data_list is not None and i < len(metric_data_list):
	performance.append(metric_data_list[i])
	else:
	# Append None or 0 if metric data is missing for this index/metric
	performance.append(None)
	print(f"Warning: Missing data for metric '{sel_m}' at index {i} in file {file_name}")


	if not tasks_new_x_level1: continue # Skip if no tasks matched for this file

	# Store unique tasks added in this trace for later x-axis update
	if idx == 0: # Only need tasks from the first file usually
	all_tasks_x_level1 = tasks_new_x_level1


	# Ensure performance list length matches x-axis length
	expected_len = len(tasks_new_x_level1)
	if len(performance) != expected_len:
	print(f"Warning: Length mismatch for {file_name}. X-axis: {expected_len}, Y-axis: {len(performance)}. Skipping trace.")
	continue

	# Ensure the second level of x-axis also matches
	x_level2 = selected_metric_labels * (len(tasks_new_x_level1) // len(selected_metric_labels))
	if len(tasks_new_x_level1) != len(x_level2):
	print(f"Warning: X-axis level length mismatch for {file_name}. Level 1: {len(tasks_new_x_level1)}, Level 2: {len(x_level2)}. Skipping trace.")
	continue


	fig.add_trace(go.Bar(
	x=[tasks_new_x_level1, x_level2 ],
	y=performance,
	name=file_name[:-15] if file_name.endswith('_statistics.txt') else file_name, # Safer name slicing
	marker_color=colors[idx % len(colors)],
	legendgroup=f"legend_{idx}", # Group legend items
	offsetgroup=f"group_{idx}" # Group bars
	))

	# Single row layout updates
	fig.update_layout(
	barmode='group',
	xaxis_tickangle=-45,
	template="ggplot2",
	autosize=True,
	height=450,
	xaxis=dict(showgrid=False, title='Task<br />Range', type='multicategory'), # Specify multicategory type
	yaxis=dict(title='Performance'),
	title=" ".join(list(map(str.capitalize, main_metric.split("_")))),
	margin=dict(l=60, r=40, t=80, b=100),
	legend_title_text='Models'
	)
	# Update x-axis ticks explicitly for multicategory if data exists
	if all_tasks_x_level1 and selected_metric_labels:
	unique_tasks = sorted(list(set(all_tasks_x_level1)), key=all_tasks_x_level1.index) # Get unique tasks in order
	tickvals_level1 = [task for task in unique_tasks]
	tickvals_level2 = [selected_metric_labels[0]] * len(unique_tasks) # Show only first metric label below task group
	# This approach might still be tricky, Plotly's auto-labeling is often better for multicat
	# Let's rely on Plotly's default multicategory labeling unless specific formatting is needed.
	pass # Remove explicit tick setting unless necessary


	return fig

	else:
	# ------ 多行模式 (Multi Row Mode) ------
	# (Keep the multi-row logic from the previous step, ensuring metric_vars is defined inside the inner loop)
	row_count = math.ceil(total_items / 32)
	row_height = 200
	gap_px = 100

	margin_top = 80
	margin_bottom = 80
	margin_left = 60
	margin_right = 40

	base_count = T // row_count
	remainder = T % row_count
	task_groups = []
	start_idx = 0
	for i in range(row_count):
	this_count = base_count + (1 if i < remainder else 0)
	group = selected_tasks[start_idx : start_idx + this_count]
	task_groups.append(group)
	start_idx += this_count

	task_groups = [group for group in task_groups if group]
	if not task_groups:
	fig = go.Figure()
	fig.update_layout(title="No tasks selected or data available for multi-row mode.")
	return fig
	row_count = len(task_groups)

	net_height = row_count * row_height + max(0, row_count - 1) * gap_px
	total_fig_height = net_height + margin_top + margin_bottom
	vertical_spacing = gap_px / net_height if net_height > 0 and row_count > 1 else 0
	row_fraction = row_height / net_height if net_height > 0 else 1
	row_heights = [row_fraction] * row_count

	fig = make_subplots(
	rows=row_count,
	cols=1,
	row_heights=row_heights if row_heights else None,
	vertical_spacing=vertical_spacing,
	subplot_titles=[f"Tasks Subset {i+1}" for i in range(row_count)],
	shared_xaxes=False
	)

	added_traces_info = {}

	for row_i, sub_tasks in enumerate(task_groups, start=1):
	if not sub_tasks: continue

	row_x_level1 = []
	row_x_level2 = []


	for idx, file_name in enumerate(selected_files):
	data_tuple = load_data(file_name, main_metric=main_metric, r=r)
	if len(data_tuple) < 9: continue
	(
	tasks, well_learned_digit, has_performance_digit,
	in_domain, out_domain,
	short_range, medium_range, long_range, very_long_range
	) = data_tuple

	# * Define metric_vars HERE *
	metric_vars = {
	"well_learned_digit": well_learned_digit,
	"has_performance_digit": has_performance_digit,
	"in_domain": in_domain,
	"out_domain": out_domain,
	"short_range": short_range,
	"medium_range": medium_range,
	"long_range": long_range,
	"very_long_range": very_long_range
	}


	tasks_new_x_level1 = []
	tasks_old_indices = []
	performance = []
	for i, task in enumerate(tasks):
	if task in sub_tasks:
	tasks_new_x_level1.extend([task] * len(selected_metrics))
	tasks_old_indices.append(i)
	for sel_m in selected_metrics:
	metric_data_list = metric_vars.get(sel_m)
	if metric_data_list is not None and i < len(metric_data_list):
	performance.append(metric_data_list[i])
	else:
	performance.append(None)
	print(f"Warning: Missing data for metric '{sel_m}' at index {i} in file {file_name} (Row {row_i})")


	if not tasks_new_x_level1: continue

	expected_len = len(tasks_new_x_level1)
	if len(performance) != expected_len:
	print(f"Warning: Length mismatch for {file_name} (Row {row_i}). X: {expected_len}, Y: {len(performance)}. Skipping trace.")
	continue

	x_level2 = selected_metric_labels * (len(tasks_new_x_level1) // len(selected_metric_labels))
	if len(tasks_new_x_level1) != len(x_level2):
	print(f"Warning: X-axis level length mismatch for {file_name} (Row {row_i}). L1: {len(tasks_new_x_level1)}, L2: {len(x_level2)}. Skipping trace.")
	continue

	if idx == 0:
	row_x_level1 = tasks_new_x_level1
	row_x_level2 = x_level2


	offset_group_name = f"group_{idx}"

	fig.add_trace(
	go.Bar(
	x=[tasks_new_x_level1, x_level2],
	y=performance,
	name=file_name[:-15] if file_name.endswith('_statistics.txt') else file_name,
	marker_color=colors[idx % len(colors)],
	showlegend=(row_i == 1),
	legendgroup=f"legend_{idx}",
	offsetgroup=offset_group_name
	),
	row=row_i,
	col=1
	)
	if row_i not in added_traces_info: added_traces_info[row_i] = set()
	added_traces_info[row_i].add(offset_group_name)

	if row_x_level1 and row_x_level2:
	fig.update_xaxes(type='multicategory', row=row_i, col=1)


	# Multi-row layout updates
	fig.update_layout(
	barmode='group',
	template="ggplot2",
	autosize=False,
	width=None,
	height=total_fig_height,
	title=" ".join(list(map(str.capitalize, main_metric.split("_")))),
	margin=dict(l=margin_left, r=margin_right, t=margin_top, b=margin_bottom),
	legend_title_text='Models'
	)

	for i in range(row_count):
	is_last_row = (i == row_count - 1)
	fig.update_xaxes(
	showgrid=False,
	tickangle=-45 if is_last_row else -30,
	title_text='Task<br />Range' if is_last_row else '',
	row=i+1,
	col=1
	)
	fig.update_yaxes(
	title_text='Performance',
	row=i+1,
	col=1
	)

	return fig


	# ================== Styles ==================

	SIDEBAR_STYLE = {
	"position": "fixed",
	"top": 0,
	"left": 0,
	"bottom": 0,
	"width": "24rem",
	"padding": "2rem 1rem",
	"backgroundColor": "#FFFFFF",
	"boxShadow": "2px 0px 5px rgba(0,0,0,0.1)",
	"overflowY": "auto",
	"transition": "all 0.3s",
	"zIndex": 1000 # Sidebar itself
	}

	SIDEBAR_HIDDEN = {
	**SIDEBAR_STYLE,
	"left": "-24rem", # Move off-screen
	"padding": "2rem 0",
	}

	CONTENT_STYLE = {
	"marginLeft": "24rem",
	"padding": "2rem 2rem",
	"transition": "margin-left 0.3s",
	"backgroundColor": "#F7F7F7",
	"minHeight": "100vh",
	}

	CONTENT_STYLE_FULL = {
	**CONTENT_STYLE,
	"marginLeft": "0rem",
	}

	# --- NEW: Style for the fixed toggle button container ---
	TOGGLE_BUTTON_STYLE = {
	"position": "fixed",
	"top": "10px", # Position from top
	"left": "10px", # Position from left
	"zIndex": 1001, # Ensure it's above the sidebar
	"transition": "left 0.3s" # Optional: Animate button position slightly if needed
	}

	TOGGLE_BUTTON_STYLE_SHIFTED = { # Optional: Style when sidebar is open
	**TOGGLE_BUTTON_STYLE,
	"left": "calc(24rem + 10px)" # Position relative to open sidebar edge
	# Or keep it fixed at "10px" - simpler
	}

	# ================== Dash 布局 (Dash Layout) ==================
	app.layout = html.Div(style={"margin": "0", "padding": "0"}, children=[

	# --- NEW: Fixed Toggle Button Container ---
	html.Div(
	id="toggle-button-container", # Give it an ID if you want to style it dynamically
	style=TOGGLE_BUTTON_STYLE, # Apply the fixed style
	children=[
	html.Button(
	"☰", # Use an icon/symbol for compactness
	id="sidebar-toggle",
	n_clicks=0,
	style={
	"padding": "8px 12px",
	"fontSize": "1.2em",
	"backgroundColor": "#e9ecef",
	"border": "1px solid #ccc",
	"borderRadius": "5px",
	"cursor": "pointer",
	"boxShadow": "1px 1px 3px rgba(0,0,0,0.2)"
	},
	title="Toggle Sidebar" # Tooltip
	)
	]
	),

	# --- Sidebar ---
	html.Div(
	id="sidebar",
	style=SIDEBAR_STYLE, # Start open
	children=[
	html.H3("Controls", style={"textAlign": "center", "marginBottom": "1.5rem", "marginTop": "2rem"}), # Add margin top to avoid button overlap
	# --- REMOVED Button from here ---

	# (Keep all the control sections: Metric, Files, Tasks, Ranges)
	# Metric 选择单选框
	html.Div(
	style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
	children=[
	html.Label("Select Metric:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
	dcc.RadioItems(
	id='metric-selector',
	options=[
	{'label': 'Exact Match', 'value': 'exact_match'},
	{'label': 'Digit Match', 'value': 'digit_match'},
	{'label': 'Dlength', 'value': 'dlength'}
	],
	value='exact_match',
	# inline=True, # Better stacked in sidebar
	labelStyle={'display': 'block', 'marginBottom': '5px'},
	style={"marginBottom": "10px"}
	),
	]
	),

	# 文件选择栏 (Model Selection)
	html.Div(
	style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
	children=[
	html.Label("Select Models:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
	dcc.Checklist(
	id='file-selector-all-clear',
	options=[
	{'label': 'Select All', 'value': 'all'},
	{'label': 'Clear', 'value': 'clear'},
	],
	value=[],
	inline=True,
	style={"marginBottom": "5px"}
	),
	dcc.Checklist(
	id='file-selector',
	options=[{'label': file_name[:-15] if file_name.endswith('_statistics.txt') else file_name, 'value': file_name} for file_name in file_names],
	value=[ # Default selection
	'GPT-4o_statistics.txt',
	'Llama-3.1-8B-ft_statistics.txt',
	'Mixtral-8x7B_statistics.txt',
	'Qwen2-72B_statistics.txt'
	],
	# inline=True, # Stacked looks better in narrow sidebar
	labelStyle={'display': 'block', 'marginBottom': '3px'},
	style={"maxHeight": "200px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px"} # Scrollable list
	),
	]
	),

	# 任务选择 (Task Selection)
	html.Div(
	style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
	children=[
	html.Label("Select Tasks:", style={"fontWeight": "bold", "display": "block", "marginBottom": "10px"}),

	html.H5("Integer Tasks", style={"fontWeight": "bold", "marginTop": "10px"}),
	dcc.Checklist( id='int-task-selector-all-clear', options=[ {'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
	dcc.Checklist( id='int-task-selector', options=[{'label': task, 'value': task + '<br />Integer'} for task in intTasks], value=['Add<br />Integer'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

	html.H5("Float Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
	dcc.Checklist(id='float-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
	dcc.Checklist( id='float-task-selector', options=[{'label': task, 'value': task + '<br />Float'} for task in floatTasks], value=['Add<br />Float'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

	html.H5("Fraction Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
	dcc.Checklist(id='fraction-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
	dcc.Checklist( id='fraction-task-selector', options=[{'label': task, 'value': task + '<br />Fraction'} for task in fractionTasks], value=['Add<br />Fraction'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),

	html.H5("Scientific Tasks", style={"fontWeight": "bold", "marginTop": "15px"}),
	dcc.Checklist(id='sci-task-selector-all-clear', options=[{'label': 'All', 'value': 'all'}, {'label': 'Clear', 'value': 'clear'}], value=[], inline=True, style={"marginBottom": "5px"}),
	dcc.Checklist( id='sci-task-selector', options=[{'label': task, 'value': task + '<br />ScientificNotation'} for task in sciTasks], value=['Add<br />ScientificNotation'], labelStyle={'display': 'block'}, style={"maxHeight": "150px", "overflowY": "auto", "border": "1px solid #ddd", "padding": "5px", "borderRadius": "4px", "marginBottom": "10px"}),
	]
	),

	# Range 选择
	html.Div(
	style={"marginBottom": "20px", "borderTop": "1px solid #eee", "paddingTop": "15px"},
	children=[
	html.Label("Select Ranges:", style={"fontWeight": "bold", "display": "block", "marginBottom": "5px"}),
	dcc.Checklist(
	id='metrics-selector', # Keep ID, though it selects ranges now
	options=[
	{'label': 'Short Range (S)', 'value': 'short_range'},
	{'label': 'Medium Range (M)', 'value': 'medium_range'},
	{'label': 'Long Range (L)', 'value': 'long_range'},
	{'label': 'Very Long Range (XL)', 'value': 'very_long_range'}
	],
	value=['short_range', 'medium_range', 'long_range', 'very_long_range'], # Default
	# inline=True, # Stacked
	labelStyle={'display': 'block', 'marginBottom': '5px'},
	style={"marginBottom": "10px"}
	),
	]
	),
	]
	), # End Sidebar

	# --- Main Content Area ---
	html.Div(
	id="content",
	style=CONTENT_STYLE, # Start with margin for open sidebar
	children=[
	# Add padding top to content to prevent overlap with fixed button
	html.Div(style={"paddingTop": "50px"}, children=[
	html.H1(
	"NUPA Performance",
	style={"textAlign": "center", "marginBottom": "30px", "color": "#333"}
	),
	# Graph Area
	html.Div(
	style={
	"backgroundColor": "#FFFFFF",
	"padding": "20px",
	"borderRadius": "8px",
	"boxShadow": "0 1px 4px rgba(0,0,0,0.1)",
	"marginBottom": "20px",
	},
	children=[
	dcc.Loading(
	id="loading-graph",
	type="circle",
	children=dcc.Graph(
	id='performance-plot',
	style={"width": "100%", "height": "auto"}
	)
	)
	]
	)
	]) # End padded content div
	]
	) # End Content Area
	])


	# ================== Callbacks ==================

	# --- Callback: Plotting (Keep existing) ---
	@app.callback(
	Output('performance-plot', 'figure'),
	Input('metric-selector', 'value'),
	Input('file-selector', 'value'),
	Input('int-task-selector', 'value'),
	Input('float-task-selector', 'value'),
	Input('fraction-task-selector', 'value'),
	Input('sci-task-selector', 'value'),
	Input('metrics-selector', 'value') # Input is the range selector now
	)
	def update_figure(main_metric, selected_files, selected_int_tasks,
	selected_float_tasks, selected_fraction_tasks,
	selected_sci_tasks, selected_ranges): # Renamed variable
	selected_tasks = selected_int_tasks + selected_float_tasks + selected_fraction_tasks + selected_sci_tasks
	r = (0, len(keys)) # Use all keys by default now, adjust if needed
	# Pass selected_ranges instead of a fixed list
	return plot(main_metric, selected_files, selected_ranges, selected_tasks, r)


	# --- Callback: Sidebar Toggle (No change needed in logic) ---
	@app.callback(
	[Output("sidebar", "style"), Output("content", "style")],
	[Input("sidebar-toggle", "n_clicks")],
	[State("sidebar", "style"), State("content", "style")] # Use content style to check state
	)
	def toggle_sidebar(n, current_sidebar_style, current_content_style):
	if n is None or n == 0:
	return no_update, no_update

	# Check if sidebar is currently open based on content margin
	if current_content_style.get("marginLeft") == "24rem": # If sidebar is currently open
	new_sidebar_style = SIDEBAR_HIDDEN
	new_content_style = CONTENT_STYLE_FULL
	else: # If sidebar is currently hidden
	new_sidebar_style = SIDEBAR_STYLE
	new_content_style = CONTENT_STYLE
	return new_sidebar_style, new_content_style

	# --- Callbacks: "Select All / Clear" (Keep existing) ---
	def make_task_selector_callback(output_id, output_all_clear_id, task_list, task_suffix):
	@app.callback(
	[Output(output_id, 'value'),
	Output(output_all_clear_id, 'value')],
	Input(output_all_clear_id, 'value'),
	)
	def update_task_selector(all_clear_value):
	if not all_clear_value:
	raise exceptions.PreventUpdate
	trigger = all_clear_value[-1]
	if 'clear' == trigger:
	return [], []
	elif 'all' == trigger:
	all_values = [task + task_suffix for task in task_list]
	return all_values, []
	raise exceptions.PreventUpdate
	return update_task_selector

	update_int_selector = make_task_selector_callback('int-task-selector', 'int-task-selector-all-clear', intTasks, '<br />Integer')
	update_float_selector = make_task_selector_callback('float-task-selector', 'float-task-selector-all-clear', floatTasks, '<br />Float')
	update_fraction_selector = make_task_selector_callback('fraction-task-selector', 'fraction-task-selector-all-clear', fractionTasks, '<br />Fraction')
	update_sci_selector = make_task_selector_callback('sci-task-selector', 'sci-task-selector-all-clear', sciTasks, '<br />ScientificNotation')

	# --- File Selector All/Clear ---
	@app.callback(
	[Output('file-selector', 'value'),
	Output('file-selector-all-clear', 'value')],
	Input('file-selector-all-clear', 'value'),
	)
	def update_file_selector(all_clear_value):
	if not all_clear_value:
	raise exceptions.PreventUpdate
	trigger = all_clear_value[-1]
	if 'clear' == trigger:
	return [], []
	elif 'all' == trigger:
	return file_names, []
	raise exceptions.PreventUpdate


	# ================== Run App ==================
	# (Keep existing run code)
	if name == '__main__':
	if not os.path.isdir(results_dir):
	print(f"Error: The directory '{results_dir}' does not exist.")
	print("Please create it and place the necessary statistics.txt files inside.")
	elif not keys:
	print(f"Warning: Could not load keys. Functionality might be limited. Check data files in {results_dir}")
	else:
	print(f"Looking for data files in: {os.path.abspath(results_dir)}")

	# Only run if keys were loaded (or decide how to handle empty keys)
	# if keys:
	app.run(debug=True, host='0.0.0.0', port=7860)
	# else:
	# print("Exiting: Cannot run app without valid keys loaded.")