Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,12 +38,12 @@ REPO_CONFIG = {
|
|
| 38 |
}
|
| 39 |
}
|
| 40 |
|
| 41 |
-
# Колонки, которые
|
| 42 |
NON_FEATURE_COLS = {
|
| 43 |
"sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
|
| 44 |
"qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
|
| 45 |
"noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
|
| 46 |
-
"precision_mode", "circuit_signature"
|
| 47 |
}
|
| 48 |
|
| 49 |
_ASSET_CACHE = {}
|
|
@@ -79,20 +79,23 @@ def get_methodology_content(ds_name: str):
|
|
| 79 |
"""
|
| 80 |
|
| 81 |
def sync_ml_metrics(ds_name: str):
|
| 82 |
-
"""Динамически находит все доступные числовые метрики
|
| 83 |
assets = load_all_assets(ds_name)
|
| 84 |
df = assets["df"]
|
| 85 |
|
| 86 |
-
#
|
| 87 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
|
|
|
|
|
|
| 88 |
valid_features = [
|
| 89 |
c for c in numeric_cols
|
| 90 |
if c not in NON_FEATURE_COLS
|
| 91 |
-
and not any(
|
| 92 |
]
|
| 93 |
|
| 94 |
-
#
|
| 95 |
-
|
|
|
|
| 96 |
|
| 97 |
return gr.update(choices=valid_features, value=defaults or valid_features[:5])
|
| 98 |
|
|
@@ -100,6 +103,8 @@ def train_model(ds_name: str, features: List[str]):
|
|
| 100 |
if not features: return None, "### ❌ Error: No metrics selected."
|
| 101 |
assets = load_all_assets(ds_name)
|
| 102 |
df = assets["df"]
|
|
|
|
|
|
|
| 103 |
target = "ideal_expval_Z_global"
|
| 104 |
|
| 105 |
train_df = df.dropna(subset=features + [target])
|
|
@@ -112,20 +117,26 @@ def train_model(ds_name: str, features: List[str]):
|
|
| 112 |
sns.set_theme(style="whitegrid", context="talk")
|
| 113 |
fig, axes = plt.subplots(1, 3, figsize=(24, 8))
|
| 114 |
|
|
|
|
| 115 |
axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
|
| 116 |
axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
|
| 117 |
axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
|
|
|
|
| 118 |
|
|
|
|
| 119 |
imp = model.feature_importances_
|
| 120 |
-
|
|
|
|
|
|
|
| 121 |
axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
|
| 122 |
-
axes[1].set_title("Top Metrics Importance")
|
| 123 |
|
|
|
|
| 124 |
sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
|
| 125 |
-
axes[2].set_title("Residuals")
|
| 126 |
|
| 127 |
plt.tight_layout(pad=3.0)
|
| 128 |
-
return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
|
| 129 |
|
| 130 |
def update_explorer(ds_name: str, split_name: str):
|
| 131 |
assets = load_all_assets(ds_name)
|
|
@@ -157,8 +168,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
|
|
| 157 |
with gr.Row():
|
| 158 |
with gr.Column(scale=1):
|
| 159 |
ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
|
| 160 |
-
# Динамический список метрик
|
| 161 |
-
ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (
|
| 162 |
train_btn = gr.Button("Execute Baseline", variant="primary")
|
| 163 |
with gr.Column(scale=2):
|
| 164 |
p_out = gr.Plot()
|
|
@@ -178,7 +189,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
|
|
| 178 |
# Explorer
|
| 179 |
ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
|
| 180 |
|
| 181 |
-
# ML Tab:
|
| 182 |
ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
|
| 183 |
train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
|
| 184 |
|
|
|
|
| 38 |
}
|
| 39 |
}
|
| 40 |
|
| 41 |
+
# Колонки, которые НЕ являются фичами (системные, категориальные или таргеты)
|
| 42 |
NON_FEATURE_COLS = {
|
| 43 |
"sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
|
| 44 |
"qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
|
| 45 |
"noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
|
| 46 |
+
"precision_mode", "circuit_signature", "entanglement", "shots", "gpu_requested", "gpu_available"
|
| 47 |
}
|
| 48 |
|
| 49 |
_ASSET_CACHE = {}
|
|
|
|
| 79 |
"""
|
| 80 |
|
| 81 |
def sync_ml_metrics(ds_name: str):
|
| 82 |
+
"""Динамически находит все доступные числовые метрики (фичи) из CSV/Dataset"""
|
| 83 |
assets = load_all_assets(ds_name)
|
| 84 |
df = assets["df"]
|
| 85 |
|
| 86 |
+
# Извлекаем все числовые колонки
|
| 87 |
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
| 88 |
+
|
| 89 |
+
# Фильтруем: убираем системные ID и таргеты (всё, что начинается на ideal/noisy/error/sign)
|
| 90 |
valid_features = [
|
| 91 |
c for c in numeric_cols
|
| 92 |
if c not in NON_FEATURE_COLS
|
| 93 |
+
and not any(prefix in c for prefix in ["ideal_", "noisy_", "error_", "sign_"])
|
| 94 |
]
|
| 95 |
|
| 96 |
+
# Приоритетные метрики для выбора "по умолчанию"
|
| 97 |
+
top_tier = ["gate_entropy", "meyer_wallach", "adjacency", "depth", "total_gates", "cx_count"]
|
| 98 |
+
defaults = [f for f in top_tier if f in valid_features]
|
| 99 |
|
| 100 |
return gr.update(choices=valid_features, value=defaults or valid_features[:5])
|
| 101 |
|
|
|
|
| 103 |
if not features: return None, "### ❌ Error: No metrics selected."
|
| 104 |
assets = load_all_assets(ds_name)
|
| 105 |
df = assets["df"]
|
| 106 |
+
|
| 107 |
+
# Используем глобальное значение Z как таргет
|
| 108 |
target = "ideal_expval_Z_global"
|
| 109 |
|
| 110 |
train_df = df.dropna(subset=features + [target])
|
|
|
|
| 117 |
sns.set_theme(style="whitegrid", context="talk")
|
| 118 |
fig, axes = plt.subplots(1, 3, figsize=(24, 8))
|
| 119 |
|
| 120 |
+
# 1. Prediction vs Reality
|
| 121 |
axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
|
| 122 |
axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
|
| 123 |
axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
|
| 124 |
+
axes[0].set_xlabel("Ideal ExpVal"); axes[0].set_ylabel("Predicted")
|
| 125 |
|
| 126 |
+
# 2. Feature Importance
|
| 127 |
imp = model.feature_importances_
|
| 128 |
+
# Берем топ-10 если их много, или все если мало
|
| 129 |
+
top_n = min(len(features), 10)
|
| 130 |
+
idx = np.argsort(imp)[-top_n:]
|
| 131 |
axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
|
| 132 |
+
axes[1].set_title(f"Top {top_n} Metrics Importance")
|
| 133 |
|
| 134 |
+
# 3. Residuals
|
| 135 |
sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
|
| 136 |
+
axes[2].set_title("Residuals (Error Distribution)")
|
| 137 |
|
| 138 |
plt.tight_layout(pad=3.0)
|
| 139 |
+
return fig, f"**Mean Absolute Error (MAE):** {mean_absolute_error(y_test, preds):.4f}"
|
| 140 |
|
| 141 |
def update_explorer(ds_name: str, split_name: str):
|
| 142 |
assets = load_all_assets(ds_name)
|
|
|
|
| 168 |
with gr.Row():
|
| 169 |
with gr.Column(scale=1):
|
| 170 |
ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
|
| 171 |
+
# Динамический список метрик, извлекаемый из CSV
|
| 172 |
+
ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (extracted from CSV)", choices=[])
|
| 173 |
train_btn = gr.Button("Execute Baseline", variant="primary")
|
| 174 |
with gr.Column(scale=2):
|
| 175 |
p_out = gr.Plot()
|
|
|
|
| 189 |
# Explorer
|
| 190 |
ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
|
| 191 |
|
| 192 |
+
# ML Tab: Динамическое обновление метрик
|
| 193 |
ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
|
| 194 |
train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
|
| 195 |
|