QSBench commited on
Commit
f9c67d8
·
verified ·
1 Parent(s): 581d034

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -56
app.py CHANGED
@@ -11,7 +11,7 @@ from sklearn.ensemble import RandomForestRegressor
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
- # --- CONFIG ---
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
@@ -38,63 +38,66 @@ REPO_CONFIG = {
38
  }
39
  }
40
 
41
- NON_FEATURE_COLS = {"sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm", "qasm_raw", "qasm_transpiled"}
 
 
 
 
 
 
42
 
43
  _ASSET_CACHE = {}
44
 
45
  def load_all_assets(key: str) -> Dict:
46
  if key not in _ASSET_CACHE:
 
47
  ds = load_dataset(REPO_CONFIG[key]["repo"])
48
  meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
49
  report = requests.get(REPO_CONFIG[key]["report_url"]).json()
50
  _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
51
  return _ASSET_CACHE[key]
52
 
53
- # --- RENDER FUNCTIONS ---
 
54
  def get_methodology_content(ds_name: str):
55
  assets = load_all_assets(ds_name)
56
- meta = assets["meta"]
57
  params = meta.get("parameters", {})
58
- report = assets["report"]
59
 
60
  families = report.get("families", {})
61
- # Важно: Markdown таблицы требуют отсутствия пробелов в начале строки
62
  fam_table = "| Family | Samples | Description |\n|:---|:---|:---|\n"
63
  for f, count in families.items():
64
  fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
65
 
66
  return f"""
67
- ## 📖 Methodology & Release Notes: {meta.get('dataset_version')}
68
-
69
- ### 1. Generation Profile
70
- Dataset produced via **QSBench v{meta.get('generator_version')}**.
71
- - **Hardware Profile:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')}
72
- - **Noise Configuration:** `{params.get('noise')}` (p={params.get('noise_prob')})
73
- - **Backend:** {meta.get('backend_device')} | {meta.get('precision_mode')} precision
74
 
75
- ### 2. Circuit Family Coverage
76
  {fam_table}
77
-
78
- ### 3. Structural Metric Definitions
79
- - **Gate Entropy:** Measures circuit complexity and gate distribution diversity.
80
- - **Meyer-Wallach:** Scalar measure of global entanglement.
81
- - **Adjacency:** Graph density of the qubit interaction map.
82
  """
83
 
84
- def update_explorer(ds_name: str, split_name: str):
 
85
  assets = load_all_assets(ds_name)
86
  df = assets["df"]
87
- splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
88
- display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
89
 
90
- raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
91
- tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
92
- meta_text = f"### 📋 {ds_name} | Version: {assets['meta'].get('dataset_version')}"
 
 
 
 
93
 
94
- return gr.update(choices=splits), display_df, raw, tr, meta_text
 
 
 
95
 
96
  def train_model(ds_name: str, features: List[str]):
97
- if not features: return None, "### ❌ Select features"
98
  assets = load_all_assets(ds_name)
99
  df = assets["df"]
100
  target = "ideal_expval_Z_global"
@@ -111,7 +114,7 @@ def train_model(ds_name: str, features: List[str]):
111
 
112
  axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
113
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
114
- axes[0].set_title(f"R² Score: {r2_score(y_test, preds):.3f}")
115
 
116
  imp = model.feature_importances_
117
  idx = np.argsort(imp)[-10:]
@@ -119,59 +122,73 @@ def train_model(ds_name: str, features: List[str]):
119
  axes[1].set_title("Top Metrics Importance")
120
 
121
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
122
- axes[2].set_title("Residuals Distribution")
123
 
124
  plt.tight_layout(pad=3.0)
125
  return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
126
 
127
- # --- UI INTERFACE ---
 
 
 
 
 
 
 
 
 
 
 
128
  with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
129
  gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
130
 
131
  with gr.Tabs():
132
- # TAB 1: EXPLORER
133
  with gr.TabItem("🔎 Explorer"):
134
- metadata_info = gr.Markdown("### Loading...")
135
  with gr.Row():
136
- ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
137
- split_select = gr.Dropdown(["train"], value="train", label="Subset")
138
- data_table = gr.Dataframe(interactive=False)
139
  with gr.Row():
140
- code_raw = gr.Code(label="Source QASM", language="python")
141
- code_tr = gr.Code(label="Transpiled QASM", language="python")
142
 
143
- # TAB 2: ML
144
  with gr.TabItem("🤖 ML Training"):
145
  with gr.Row():
146
  with gr.Column(scale=1):
147
- ml_ds = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
148
- ml_feat = gr.CheckboxGroup(label="Metrics", choices=["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"], value=["gate_entropy", "meyer_wallach"])
149
- btn = gr.Button("Run Training", variant="primary")
 
150
  with gr.Column(scale=2):
151
- plot_out = gr.Plot()
152
- txt_out = gr.Markdown()
153
 
154
- # TAB 3: METHODOLOGY (С ВЫБОРОМ)
155
  with gr.TabItem("📖 Methodology"):
156
- method_ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="View methodology for:")
157
- guide_md = gr.Markdown()
158
 
159
- # LINKS FOOTER
160
  gr.Markdown(f"""
161
  ---
162
  ### 🔗 Project Links
163
- [**🤗 Hugging Face**](https://huggingface.co/QSBench) | [**💻 GitHub**](https://github.com/QSBench) | [**🌐 Website**](https://qsbench.github.io)
164
  """)
165
 
166
- # EVENT HANDLERS
167
- ds_select.change(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
168
- ml_ds.change(lambda x: gr.update(), [ml_ds], []) # Simple sync
169
- method_ds_select.change(get_methodology_content, [method_ds_select], [guide_md])
170
- btn.click(train_model, [ml_ds, ml_feat], [plot_out, txt_out])
 
 
 
 
 
171
 
172
- # INITIAL LOAD
173
- demo.load(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
174
- demo.load(get_methodology_content, [method_ds_select], [guide_md])
 
175
 
176
  if __name__ == "__main__":
177
  demo.launch()
 
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
+ # --- CONFIG & LOGGING ---
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
 
38
  }
39
  }
40
 
41
+ # Колонки, которые нельзя использовать как фичи для обучения
42
+ NON_FEATURE_COLS = {
43
+ "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
44
+ "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
45
+ "noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
46
+ "precision_mode", "circuit_signature"
47
+ }
48
 
49
  _ASSET_CACHE = {}
50
 
51
  def load_all_assets(key: str) -> Dict:
52
  if key not in _ASSET_CACHE:
53
+ logger.info(f"Fetching {key}...")
54
  ds = load_dataset(REPO_CONFIG[key]["repo"])
55
  meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
56
  report = requests.get(REPO_CONFIG[key]["report_url"]).json()
57
  _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
58
  return _ASSET_CACHE[key]
59
 
60
+ # --- UI LOGIC ---
61
+
62
  def get_methodology_content(ds_name: str):
63
  assets = load_all_assets(ds_name)
64
+ meta, report = assets["meta"], assets["report"]
65
  params = meta.get("parameters", {})
 
66
 
67
  families = report.get("families", {})
 
68
  fam_table = "| Family | Samples | Description |\n|:---|:---|:---|\n"
69
  for f, count in families.items():
70
  fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
71
 
72
  return f"""
73
+ ## 📖 Methodology: {meta.get('dataset_version')}
74
+ **Generator:** QSBench v{meta.get('generator_version')}
75
+ **Config:** {params.get('n_qubits')} Qubits | Depth {params.get('depth')} | Noise `{params.get('noise')}` (p={params.get('noise_prob')})
 
 
 
 
76
 
77
+ ### Circuit Family Coverage
78
  {fam_table}
 
 
 
 
 
79
  """
80
 
81
+ def sync_ml_metrics(ds_name: str):
82
+ """Динамически находит все доступные числовые метрики для конкретного датасета"""
83
  assets = load_all_assets(ds_name)
84
  df = assets["df"]
 
 
85
 
86
+ # Берем только числа, исключая таргеты и служебные поля
87
+ numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
88
+ valid_features = [
89
+ c for c in numeric_cols
90
+ if c not in NON_FEATURE_COLS
91
+ and not any(x in c for x in ["ideal_", "noisy_", "error_", "sign_"])
92
+ ]
93
 
94
+ # Выбираем "золотой стандарт" по умолчанию, если они есть
95
+ defaults = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"] if f in valid_features]
96
+
97
+ return gr.update(choices=valid_features, value=defaults or valid_features[:5])
98
 
99
  def train_model(ds_name: str, features: List[str]):
100
+ if not features: return None, "### ❌ Error: No metrics selected."
101
  assets = load_all_assets(ds_name)
102
  df = assets["df"]
103
  target = "ideal_expval_Z_global"
 
114
 
115
  axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
116
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
117
+ axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
118
 
119
  imp = model.feature_importances_
120
  idx = np.argsort(imp)[-10:]
 
122
  axes[1].set_title("Top Metrics Importance")
123
 
124
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
125
+ axes[2].set_title("Residuals")
126
 
127
  plt.tight_layout(pad=3.0)
128
  return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
129
 
130
+ def update_explorer(ds_name: str, split_name: str):
131
+ assets = load_all_assets(ds_name)
132
+ df = assets["df"]
133
+ splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
134
+ display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
135
+
136
+ raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
137
+ tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
138
+
139
+ return gr.update(choices=splits), display_df, raw, tr, f"### 📋 {ds_name} Explorer"
140
+
141
+ # --- INTERFACE ---
142
  with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
143
  gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
144
 
145
  with gr.Tabs():
 
146
  with gr.TabItem("🔎 Explorer"):
147
+ meta_txt = gr.Markdown("### Loading...")
148
  with gr.Row():
149
+ ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
150
+ sp_sel = gr.Dropdown(["train"], value="train", label="Split")
151
+ data_view = gr.Dataframe(interactive=False)
152
  with gr.Row():
153
+ c_raw = gr.Code(label="Source QASM", language="python")
154
+ c_tr = gr.Code(label="Transpiled QASM", language="python")
155
 
 
156
  with gr.TabItem("🤖 ML Training"):
157
  with gr.Row():
158
  with gr.Column(scale=1):
159
+ ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
160
+ # Динамический список метрик
161
+ ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (Auto-detected)", choices=[])
162
+ train_btn = gr.Button("Execute Baseline", variant="primary")
163
  with gr.Column(scale=2):
164
+ p_out = gr.Plot()
165
+ t_out = gr.Markdown()
166
 
 
167
  with gr.TabItem("📖 Methodology"):
168
+ meth_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Details")
169
+ meth_md = gr.Markdown()
170
 
 
171
  gr.Markdown(f"""
172
  ---
173
  ### 🔗 Project Links
174
+ [**🌐 Website**](https://qsbench.github.io) | [**🤗 Hugging Face**](https://huggingface.co/QSBench) | [**💻 GitHub**](https://github.com/QSBench)
175
  """)
176
 
177
+ # --- EVENTS ---
178
+ # Explorer
179
+ ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
180
+
181
+ # ML Tab: Обновление списка метрик при смене датасета
182
+ ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
183
+ train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
184
+
185
+ # Methodology
186
+ meth_ds_sel.change(get_methodology_content, [meth_ds_sel], [meth_md])
187
 
188
+ # Initial Load
189
+ demo.load(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
190
+ demo.load(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
191
+ demo.load(get_methodology_content, [meth_ds_sel], [meth_md])
192
 
193
  if __name__ == "__main__":
194
  demo.launch()