Yingtao-Zheng commited on
Commit
982620c
·
1 Parent(s): 3d761f3

Merge feature/clearml-thresholds

Browse files
Files changed (1) hide show
  1. evaluation/justify_thresholds.py +119 -537
evaluation/justify_thresholds.py CHANGED
@@ -1,4 +1,5 @@
1
  # LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
 
2
 
3
  import glob
4
  import os
@@ -8,19 +9,9 @@ import numpy as np
8
  import matplotlib
9
  matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
11
- import joblib
12
- from sklearn.linear_model import LogisticRegression
13
  from sklearn.neural_network import MLPClassifier
14
  from sklearn.preprocessing import StandardScaler
15
- from sklearn.metrics import (
16
- roc_curve,
17
- roc_auc_score,
18
- f1_score,
19
- precision_score,
20
- recall_score,
21
- accuracy_score,
22
- confusion_matrix,
23
- )
24
  from xgboost import XGBClassifier
25
 
26
  _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
@@ -32,6 +23,27 @@ PLOTS_DIR = os.path.join(os.path.dirname(__file__), "plots")
32
  REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
33
  SEED = 42
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def _youdens_j(y_true, y_prob):
37
  fpr, tpr, thresholds = roc_curve(y_true, y_prob)
@@ -45,7 +57,7 @@ def _f1_at_threshold(y_true, y_prob, threshold):
45
  return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
46
 
47
 
48
- def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path):
49
  fig, ax = plt.subplots(figsize=(6, 5))
50
  ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
51
  ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
@@ -56,6 +68,13 @@ def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path):
56
  ax.set_title(title)
57
  ax.legend(loc="lower right")
58
  fig.tight_layout()
 
 
 
 
 
 
 
59
  fig.savefig(path, dpi=150)
60
  plt.close(fig)
61
  print(f" saved {path}")
@@ -66,8 +85,7 @@ def run_lopo_models():
66
  by_person, _, _ = load_per_person("face_orientation")
67
  persons = sorted(by_person.keys())
68
 
69
- results = {"mlp": {"y": [], "p": [], "y_folds": [], "p_folds": []},
70
- "xgb": {"y": [], "p": [], "y_folds": [], "p_folds": []}}
71
 
72
  for i, held_out in enumerate(persons):
73
  X_test, y_test = by_person[held_out]
@@ -88,28 +106,23 @@ def run_lopo_models():
88
  mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
89
  results["mlp"]["y"].append(y_test)
90
  results["mlp"]["p"].append(mlp_prob)
91
- results["mlp"]["y_folds"].append(y_test)
92
- results["mlp"]["p_folds"].append(mlp_prob)
93
 
94
  xgb = XGBClassifier(
95
  n_estimators=600, max_depth=8, learning_rate=0.05,
96
  subsample=0.8, colsample_bytree=0.8,
97
  reg_alpha=0.1, reg_lambda=1.0,
98
- eval_metric="logloss",
99
  random_state=SEED, verbosity=0,
100
  )
101
  xgb.fit(X_tr_sc, train_y)
102
  xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
103
  results["xgb"]["y"].append(y_test)
104
  results["xgb"]["p"].append(xgb_prob)
105
- results["xgb"]["y_folds"].append(y_test)
106
- results["xgb"]["p_folds"].append(xgb_prob)
107
 
108
  print(f" fold {i+1}/{len(persons)}: held out {held_out} "
109
  f"({X_test.shape[0]} samples)")
110
 
111
- results["persons"] = persons
112
- for key in ("mlp", "xgb"):
113
  results[key]["y"] = np.concatenate(results[key]["y"])
114
  results[key]["p"] = np.concatenate(results[key]["p"])
115
 
@@ -130,7 +143,8 @@ def analyse_model_thresholds(results):
130
 
131
  path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
132
  _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
133
- f"LOPO ROC — {label} (9 folds, 144k samples)", path)
 
134
 
135
  model_stats[name] = {
136
  "label": label, "auc": auc,
@@ -139,131 +153,14 @@ def analyse_model_thresholds(results):
139
  print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
140
  f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
141
 
142
- return model_stats
143
-
144
-
145
- def _ci_95_t(n):
146
- """95% CI half-width multiplier (t-distribution, df=n-1). Approximate for small n."""
147
- if n <= 1:
148
- return 0.0
149
- df = n - 1
150
- t_975 = [0, 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.37, 2.31]
151
- if df < len(t_975):
152
- return float(t_975[df])
153
- if df <= 30:
154
- return 2.0 + (30 - df) / 100
155
- return 1.96
156
-
157
-
158
- def analyse_precision_recall_confusion(results, model_stats):
159
- """Precision/recall at optimal threshold, pooled confusion matrix, per-fold metrics, 95% CIs."""
160
- print("\n=== Precision, recall, confusion matrix, per-person variance ===")
161
- from sklearn.metrics import precision_recall_curve, average_precision_score
162
-
163
- extended = {}
164
- persons = results["persons"]
165
- n_folds = len(persons)
166
-
167
- for name, label in [("mlp", "MLP"), ("xgb", "XGBoost")]:
168
- y_all = results[name]["y"]
169
- p_all = results[name]["p"]
170
- y_folds = results[name]["y_folds"]
171
- p_folds = results[name]["p_folds"]
172
- opt_t = model_stats[name]["opt_threshold"]
173
-
174
- y_pred = (p_all >= opt_t).astype(int)
175
- prec_pooled = precision_score(y_all, y_pred, zero_division=0)
176
- rec_pooled = recall_score(y_all, y_pred, zero_division=0)
177
- acc_pooled = accuracy_score(y_all, y_pred)
178
- cm = confusion_matrix(y_all, y_pred)
179
- if cm.shape == (2, 2):
180
- tn, fp, fn, tp = cm.ravel()
181
- else:
182
- tn = fp = fn = tp = 0
183
-
184
- prec_folds = []
185
- rec_folds = []
186
- acc_folds = []
187
- f1_folds = []
188
- per_person = []
189
- for k, (y_f, p_f) in enumerate(zip(y_folds, p_folds)):
190
- pred_f = (p_f >= opt_t).astype(int)
191
- prec_f = precision_score(y_f, pred_f, zero_division=0)
192
- rec_f = recall_score(y_f, pred_f, zero_division=0)
193
- acc_f = accuracy_score(y_f, pred_f)
194
- f1_f = f1_score(y_f, pred_f, zero_division=0)
195
- prec_folds.append(prec_f)
196
- rec_folds.append(rec_f)
197
- acc_folds.append(acc_f)
198
- f1_folds.append(f1_f)
199
- per_person.append({
200
- "person": persons[k],
201
- "accuracy": acc_f,
202
- "f1": f1_f,
203
- "precision": prec_f,
204
- "recall": rec_f,
205
- })
206
-
207
- t_mult = _ci_95_t(n_folds)
208
- mean_acc = np.mean(acc_folds)
209
- std_acc = np.std(acc_folds, ddof=1) if n_folds > 1 else 0.0
210
- mean_f1 = np.mean(f1_folds)
211
- std_f1 = np.std(f1_folds, ddof=1) if n_folds > 1 else 0.0
212
- mean_prec = np.mean(prec_folds)
213
- std_prec = np.std(prec_folds, ddof=1) if n_folds > 1 else 0.0
214
- mean_rec = np.mean(rec_folds)
215
- std_rec = np.std(rec_folds, ddof=1) if n_folds > 1 else 0.0
216
-
217
- extended[name] = {
218
- "label": label,
219
- "opt_threshold": opt_t,
220
- "precision_pooled": prec_pooled,
221
- "recall_pooled": rec_pooled,
222
- "accuracy_pooled": acc_pooled,
223
- "confusion_matrix": cm,
224
- "tn": int(tn), "fp": int(fp), "fn": int(fn), "tp": int(tp),
225
- "per_person": per_person,
226
- "accuracy_mean": mean_acc, "accuracy_std": std_acc,
227
- "accuracy_ci_half": t_mult * (std_acc / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
228
- "f1_mean": mean_f1, "f1_std": std_f1,
229
- "f1_ci_half": t_mult * (std_f1 / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
230
- "precision_mean": mean_prec, "precision_std": std_prec,
231
- "precision_ci_half": t_mult * (std_prec / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
232
- "recall_mean": mean_rec, "recall_std": std_rec,
233
- "recall_ci_half": t_mult * (std_rec / np.sqrt(n_folds)) if n_folds > 1 else 0.0,
234
- "n_folds": n_folds,
235
- }
236
-
237
- print(f" {label}: precision={prec_pooled:.4f}, recall={rec_pooled:.4f} | "
238
- f"per-fold F1 mean={mean_f1:.4f} ± {std_f1:.4f} "
239
- f"(95% CI [{mean_f1 - extended[name]['f1_ci_half']:.4f}, {mean_f1 + extended[name]['f1_ci_half']:.4f}])")
240
-
241
- return extended
242
-
243
-
244
- def plot_confusion_matrices(extended_stats):
245
- """Save confusion matrix heatmaps for MLP and XGBoost."""
246
- for name in ("mlp", "xgb"):
247
- s = extended_stats[name]
248
- cm = s["confusion_matrix"]
249
- fig, ax = plt.subplots(figsize=(4, 3))
250
- im = ax.imshow(cm, cmap="Blues")
251
- ax.set_xticks([0, 1])
252
- ax.set_yticks([0, 1])
253
- ax.set_xticklabels(["Pred 0", "Pred 1"])
254
- ax.set_yticklabels(["True 0", "True 1"])
255
- ax.set_ylabel("True label")
256
- ax.set_xlabel("Predicted label")
257
- for i in range(2):
258
- for j in range(2):
259
- ax.text(j, i, str(cm[i, j]), ha="center", va="center", color="white" if cm[i, j] > cm.max() / 2 else "black", fontweight="bold")
260
- ax.set_title(f"LOPO {s['label']} @ t={s['opt_threshold']:.3f}")
261
- fig.tight_layout()
262
- path = os.path.join(PLOTS_DIR, f"confusion_matrix_{name}.png")
263
- fig.savefig(path, dpi=150)
264
- plt.close(fig)
265
- print(f" saved {path}")
266
 
 
267
 
268
  def run_geo_weight_search():
269
  print("\n=== Geometric weight grid search ===")
@@ -309,6 +206,13 @@ def run_geo_weight_search():
309
  ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
310
  ha="center", va="bottom", fontsize=8)
311
  fig.tight_layout()
 
 
 
 
 
 
 
312
  path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
313
  fig.savefig(path, dpi=150)
314
  plt.close(fig)
@@ -316,6 +220,16 @@ def run_geo_weight_search():
316
 
317
  print(f" Best alpha (face weight) = {best_alpha:.1f}, "
318
  f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
 
 
 
 
 
 
 
 
 
 
319
  return dict(mean_f1), best_alpha
320
 
321
 
@@ -382,198 +296,30 @@ def run_hybrid_weight_search(lopo_results):
382
  ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
383
  ha="center", va="bottom", fontsize=8)
384
  fig.tight_layout()
385
- path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
386
- fig.savefig(path, dpi=150)
387
- plt.close(fig)
388
- print(f" saved {path}")
389
-
390
- print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
391
- return dict(mean_f1), best_w
392
-
393
-
394
- def run_hybrid_xgb_weight_search(lopo_results):
395
- """Grid search: XGBoost prob + geometric. Same structure as MLP hybrid."""
396
- print("\n=== Hybrid XGBoost weight grid search ===")
397
-
398
- by_person, _, _ = load_per_person("face_orientation")
399
- persons = sorted(by_person.keys())
400
- features = SELECTED_FEATURES["face_orientation"]
401
- sf_idx = features.index("s_face")
402
- se_idx = features.index("s_eye")
403
-
404
- GEO_FACE_W = 0.7
405
- GEO_EYE_W = 0.3
406
 
407
- w_xgbs = np.arange(0.3, 0.85, 0.1).round(1)
408
- wmf1 = {w: [] for w in w_xgbs}
409
- xgb_p = lopo_results["xgb"]["p"]
410
- offset = 0
411
- for held_out in persons:
412
- X_test, y_test = by_person[held_out]
413
- n = X_test.shape[0]
414
- xgb_prob_fold = xgb_p[offset : offset + n]
415
- offset += n
416
-
417
- sf = X_test[:, sf_idx]
418
- se = X_test[:, se_idx]
419
- geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
420
-
421
- train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
422
- train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
423
- sf_tr = train_X[:, sf_idx]
424
- se_tr = train_X[:, se_idx]
425
- geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
426
-
427
- scaler = StandardScaler().fit(train_X)
428
- X_tr_sc = scaler.transform(train_X)
429
- xgb_tr = XGBClassifier(
430
- n_estimators=600, max_depth=8, learning_rate=0.05,
431
- subsample=0.8, colsample_bytree=0.8,
432
- reg_alpha=0.1, reg_lambda=1.0,
433
- eval_metric="logloss",
434
- random_state=SEED, verbosity=0,
435
  )
436
- xgb_tr.fit(X_tr_sc, train_y)
437
- xgb_prob_tr = xgb_tr.predict_proba(X_tr_sc)[:, 1]
438
-
439
- for w in w_xgbs:
440
- combo_tr = w * xgb_prob_tr + (1.0 - w) * geo_tr
441
- opt_t, *_ = _youdens_j(train_y, combo_tr)
442
-
443
- combo_te = w * xgb_prob_fold + (1.0 - w) * geo_score
444
- f1 = _f1_at_threshold(y_test, combo_te, opt_t)
445
- wmf1[w].append(f1)
446
 
447
- mean_f1 = {w: np.mean(f1s) for w, f1s in wmf1.items()}
448
- best_w = max(mean_f1, key=mean_f1.get)
449
-
450
- fig, ax = plt.subplots(figsize=(7, 4))
451
- ax.bar([f"{w:.1f}" for w in w_xgbs],
452
- [mean_f1[w] for w in w_xgbs], color="steelblue")
453
- ax.set_xlabel("XGBoost weight (w_xgb); geo weight = 1 - w_xgb")
454
- ax.set_ylabel("Mean LOPO F1")
455
- ax.set_title("Hybrid Pipeline: XGBoost vs Geometric Weight Search")
456
- ax.set_ylim(bottom=max(0, min(mean_f1.values()) - 0.05))
457
- for i, w in enumerate(w_xgbs):
458
- ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
459
- ha="center", va="bottom", fontsize=8)
460
- fig.tight_layout()
461
- path = os.path.join(PLOTS_DIR, "hybrid_xgb_weight_search.png")
462
  fig.savefig(path, dpi=150)
463
  plt.close(fig)
464
  print(f" saved {path}")
465
 
466
- print(f" Best w_xgb = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
467
- return dict(mean_f1), best_w
468
-
469
-
470
- def run_hybrid_lr_combiner(lopo_results, use_xgb=True):
471
- """LR combiner: meta-features = [model_prob, geo_score], learned weights instead of grid search."""
472
- print("\n=== Hybrid LR combiner (LOPO) ===")
473
- by_person, _, _ = load_per_person("face_orientation")
474
- persons = sorted(by_person.keys())
475
- features = SELECTED_FEATURES["face_orientation"]
476
- sf_idx = features.index("s_face")
477
- se_idx = features.index("s_eye")
478
- GEO_FACE_W = 0.7
479
- GEO_EYE_W = 0.3
480
-
481
- key = "xgb" if use_xgb else "mlp"
482
- model_p = lopo_results[key]["p"]
483
- offset = 0
484
- fold_f1s = []
485
- for held_out in persons:
486
- X_test, y_test = by_person[held_out]
487
- n = X_test.shape[0]
488
- prob_fold = model_p[offset : offset + n]
489
- offset += n
490
- sf = X_test[:, sf_idx]
491
- se = X_test[:, se_idx]
492
- geo_score = np.clip(GEO_FACE_W * sf + GEO_EYE_W * se, 0, 1)
493
- meta_te = np.column_stack([prob_fold, geo_score])
494
 
495
- train_X = np.concatenate([by_person[p][0] for p in persons if p != held_out])
496
- train_y = np.concatenate([by_person[p][1] for p in persons if p != held_out])
497
- sf_tr = train_X[:, sf_idx]
498
- se_tr = train_X[:, se_idx]
499
- geo_tr = np.clip(GEO_FACE_W * sf_tr + GEO_EYE_W * se_tr, 0, 1)
500
- scaler = StandardScaler().fit(train_X)
501
- X_tr_sc = scaler.transform(train_X)
502
- if use_xgb:
503
- xgb_tr = XGBClassifier(
504
- n_estimators=600, max_depth=8, learning_rate=0.05,
505
- subsample=0.8, colsample_bytree=0.8,
506
- reg_alpha=0.1, reg_lambda=1.0,
507
- eval_metric="logloss",
508
- random_state=SEED, verbosity=0,
509
- )
510
- xgb_tr.fit(X_tr_sc, train_y)
511
- prob_tr = xgb_tr.predict_proba(X_tr_sc)[:, 1]
512
- else:
513
- mlp_tr = MLPClassifier(
514
- hidden_layer_sizes=(64, 32), activation="relu",
515
- max_iter=200, early_stopping=True, validation_fraction=0.15,
516
- random_state=SEED, verbose=False,
517
  )
518
- mlp_tr.fit(X_tr_sc, train_y)
519
- prob_tr = mlp_tr.predict_proba(X_tr_sc)[:, 1]
520
- meta_tr = np.column_stack([prob_tr, geo_tr])
521
-
522
- lr = LogisticRegression(C=1.0, max_iter=500, random_state=SEED)
523
- lr.fit(meta_tr, train_y)
524
- p_tr = lr.predict_proba(meta_tr)[:, 1]
525
- opt_t, *_ = _youdens_j(train_y, p_tr)
526
- p_te = lr.predict_proba(meta_te)[:, 1]
527
- f1 = _f1_at_threshold(y_test, p_te, opt_t)
528
- fold_f1s.append(f1)
529
- print(f" fold {held_out}: F1 = {f1:.4f} (threshold = {opt_t:.3f})")
530
-
531
- mean_f1 = float(np.mean(fold_f1s))
532
- print(f" LR combiner mean LOPO F1 = {mean_f1:.4f}")
533
- return mean_f1
534
-
535
-
536
- def train_and_save_hybrid_combiner(lopo_results, use_xgb, geo_face_weight=0.7, geo_eye_weight=0.3,
537
- combiner_path=None):
538
- """Build OOS meta-dataset from LOPO predictions, train one LR, save joblib + optimal threshold."""
539
- by_person, _, _ = load_per_person("face_orientation")
540
- persons = sorted(by_person.keys())
541
- features = SELECTED_FEATURES["face_orientation"]
542
- sf_idx = features.index("s_face")
543
- se_idx = features.index("s_eye")
544
 
545
- key = "xgb" if use_xgb else "mlp"
546
- model_p = lopo_results[key]["p"]
547
- meta_y = lopo_results[key]["y"]
548
- geo_list = []
549
- offset = 0
550
- for p in persons:
551
- X, _ = by_person[p]
552
- n = X.shape[0]
553
- sf = X[:, sf_idx]
554
- se = X[:, se_idx]
555
- geo_list.append(np.clip(geo_face_weight * sf + geo_eye_weight * se, 0, 1))
556
- offset += n
557
- geo_all = np.concatenate(geo_list)
558
- meta_X = np.column_stack([model_p, geo_all])
559
-
560
- lr = LogisticRegression(C=1.0, max_iter=500, random_state=SEED)
561
- lr.fit(meta_X, meta_y)
562
- p = lr.predict_proba(meta_X)[:, 1]
563
- opt_threshold, *_ = _youdens_j(meta_y, p)
564
-
565
- if combiner_path is None:
566
- combiner_path = os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_combiner.joblib")
567
- os.makedirs(os.path.dirname(combiner_path), exist_ok=True)
568
- joblib.dump({
569
- "combiner": lr,
570
- "threshold": float(opt_threshold),
571
- "use_xgb": bool(use_xgb),
572
- "geo_face_weight": geo_face_weight,
573
- "geo_eye_weight": geo_eye_weight,
574
- }, combiner_path)
575
- print(f" Saved combiner to {combiner_path} (threshold={opt_threshold:.3f})")
576
- return opt_threshold, combiner_path
577
 
578
 
579
  def plot_distributions():
@@ -599,7 +345,8 @@ def plot_distributions():
599
  ear_plot = np.clip(ear_min, 0, 0.85)
600
  mar_plot = np.clip(mar, 0, 1.5)
601
 
602
- fig, ax = plt.subplots(figsize=(7, 4))
 
603
  ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
604
  ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
605
  for val, lbl, c in [
@@ -612,13 +359,21 @@ def plot_distributions():
612
  ax.set_ylabel("Density")
613
  ax.set_title("EAR Distribution by Class (144k samples)")
614
  ax.legend(fontsize=8)
615
- fig.tight_layout()
 
 
 
 
 
 
 
616
  path = os.path.join(PLOTS_DIR, "ear_distribution.png")
617
- fig.savefig(path, dpi=150)
618
- plt.close(fig)
619
  print(f" saved {path}")
620
 
621
- fig, ax = plt.subplots(figsize=(7, 4))
 
622
  ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
623
  ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
624
  ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
@@ -626,10 +381,17 @@ def plot_distributions():
626
  ax.set_ylabel("Density")
627
  ax.set_title("MAR Distribution by Class (144k samples)")
628
  ax.legend(fontsize=8)
629
- fig.tight_layout()
 
 
 
 
 
 
 
630
  path = os.path.join(PLOTS_DIR, "mar_distribution.png")
631
- fig.savefig(path, dpi=150)
632
- plt.close(fig)
633
  print(f" saved {path}")
634
 
635
  closed_pct = np.mean(ear_min < 0.16) * 100
@@ -650,11 +412,7 @@ def plot_distributions():
650
  return stats
651
 
652
 
653
- def write_report(model_stats, extended_stats, geo_f1, best_alpha,
654
- hybrid_mlp_f1, best_w_mlp,
655
- hybrid_xgb_f1, best_w_xgb,
656
- use_xgb_for_hybrid, dist_stats,
657
- lr_combiner_f1=None):
658
  lines = []
659
  lines.append("# Threshold Justification Report")
660
  lines.append("")
@@ -679,91 +437,7 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
679
  lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
680
  lines.append("")
681
 
682
- lines.append("## 2. Precision, Recall and Tradeoff")
683
- lines.append("")
684
- lines.append("At the optimal threshold (Youden's J), pooled over all LOPO held-out predictions:")
685
- lines.append("")
686
- lines.append("| Model | Threshold | Precision | Recall | F1 | Accuracy |")
687
- lines.append("|-------|----------:|----------:|-------:|---:|---------:|")
688
- for key in ("mlp", "xgb"):
689
- s = extended_stats[key]
690
- lines.append(f"| {s['label']} | {s['opt_threshold']:.3f} | {s['precision_pooled']:.4f} | "
691
- f"{s['recall_pooled']:.4f} | {model_stats[key]['f1_opt']:.4f} | {s['accuracy_pooled']:.4f} |")
692
- lines.append("")
693
- lines.append("Higher threshold → fewer positive predictions → higher precision, lower recall. "
694
- "Youden's J picks the threshold that balances sensitivity and specificity (recall for the positive class and true negative rate).")
695
- lines.append("")
696
-
697
- lines.append("## 3. Confusion Matrix (Pooled LOPO)")
698
- lines.append("")
699
- lines.append("At optimal threshold. Rows = true label, columns = predicted label (0 = unfocused, 1 = focused).")
700
- lines.append("")
701
- for key in ("mlp", "xgb"):
702
- s = extended_stats[key]
703
- lines.append(f"### {s['label']}")
704
- lines.append("")
705
- lines.append("| | Pred 0 | Pred 1 |")
706
- lines.append("|--|-------:|-------:|")
707
- cm = s["confusion_matrix"]
708
- if cm.shape == (2, 2):
709
- lines.append(f"| **True 0** | {cm[0,0]} (TN) | {cm[0,1]} (FP) |")
710
- lines.append(f"| **True 1** | {cm[1,0]} (FN) | {cm[1,1]} (TP) |")
711
- lines.append("")
712
- lines.append(f"TN={s['tn']}, FP={s['fp']}, FN={s['fn']}, TP={s['tp']}. ")
713
- lines.append("")
714
- lines.append("![Confusion MLP](plots/confusion_matrix_mlp.png)")
715
- lines.append("")
716
- lines.append("![Confusion XGBoost](plots/confusion_matrix_xgb.png)")
717
- lines.append("")
718
-
719
- lines.append("## 4. Per-Person Performance Variance (LOPO)")
720
- lines.append("")
721
- lines.append("One fold per left-out person; metrics at optimal threshold.")
722
- lines.append("")
723
- for key in ("mlp", "xgb"):
724
- s = extended_stats[key]
725
- lines.append(f"### {s['label']} — per held-out person")
726
- lines.append("")
727
- lines.append("| Person | Accuracy | F1 | Precision | Recall |")
728
- lines.append("|--------|---------:|---:|----------:|-------:|")
729
- for row in s["per_person"]:
730
- lines.append(f"| {row['person']} | {row['accuracy']:.4f} | {row['f1']:.4f} | {row['precision']:.4f} | {row['recall']:.4f} |")
731
- lines.append("")
732
- lines.append("### Summary across persons")
733
- lines.append("")
734
- lines.append("| Model | Accuracy mean ± std | F1 mean ± std | Precision mean ± std | Recall mean ± std |")
735
- lines.append("|-------|---------------------|---------------|----------------------|-------------------|")
736
- for key in ("mlp", "xgb"):
737
- s = extended_stats[key]
738
- lines.append(f"| {s['label']} | {s['accuracy_mean']:.4f} ± {s['accuracy_std']:.4f} | "
739
- f"{s['f1_mean']:.4f} ± {s['f1_std']:.4f} | "
740
- f"{s['precision_mean']:.4f} ± {s['precision_std']:.4f} | "
741
- f"{s['recall_mean']:.4f} ± {s['recall_std']:.4f} |")
742
- lines.append("")
743
-
744
- lines.append("## 5. Confidence Intervals (95%, LOPO over 9 persons)")
745
- lines.append("")
746
- lines.append("Mean ± half-width of 95% t-interval (df=8) for each metric across the 9 left-out persons.")
747
- lines.append("")
748
- lines.append("| Model | F1 | Accuracy | Precision | Recall |")
749
- lines.append("|-------|---:|--------:|----------:|-------:|")
750
- for key in ("mlp", "xgb"):
751
- s = extended_stats[key]
752
- f1_lo = s["f1_mean"] - s["f1_ci_half"]
753
- f1_hi = s["f1_mean"] + s["f1_ci_half"]
754
- acc_lo = s["accuracy_mean"] - s["accuracy_ci_half"]
755
- acc_hi = s["accuracy_mean"] + s["accuracy_ci_half"]
756
- prec_lo = s["precision_mean"] - s["precision_ci_half"]
757
- prec_hi = s["precision_mean"] + s["precision_ci_half"]
758
- rec_lo = s["recall_mean"] - s["recall_ci_half"]
759
- rec_hi = s["recall_mean"] + s["recall_ci_half"]
760
- lines.append(f"| {s['label']} | {s['f1_mean']:.4f} [{f1_lo:.4f}, {f1_hi:.4f}] | "
761
- f"{s['accuracy_mean']:.4f} [{acc_lo:.4f}, {acc_hi:.4f}] | "
762
- f"{s['precision_mean']:.4f} [{prec_lo:.4f}, {prec_hi:.4f}] | "
763
- f"{s['recall_mean']:.4f} [{rec_lo:.4f}, {rec_hi:.4f}] |")
764
- lines.append("")
765
-
766
- lines.append("## 6. Geometric Pipeline Weights (s_face vs s_eye)")
767
  lines.append("")
768
  lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
769
  "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
@@ -780,68 +454,25 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
780
  lines.append("![Geometric weight search](plots/geo_weight_search.png)")
781
  lines.append("")
782
 
783
- lines.append("## 7. Hybrid Pipeline: MLP vs Geometric")
784
  lines.append("")
785
  lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
786
- "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3).")
 
787
  lines.append("")
788
  lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
789
  lines.append("|-------------------:|-------------:|")
790
- for w in sorted(hybrid_mlp_f1.keys()):
791
- marker = " **<-- selected**" if w == best_w_mlp else ""
792
- lines.append(f"| {w:.1f} | {hybrid_mlp_f1[w]:.4f}{marker} |")
793
- lines.append("")
794
- lines.append(f"**Best:** w_mlp = {best_w_mlp:.1f} (MLP {best_w_mlp*100:.0f}%, "
795
- f"geometric {(1-best_w_mlp)*100:.0f}%) → mean LOPO F1 = {hybrid_mlp_f1[best_w_mlp]:.4f}")
796
- lines.append("")
797
- lines.append("![Hybrid MLP weight search](plots/hybrid_weight_search.png)")
798
- lines.append("")
799
-
800
- lines.append("## 8. Hybrid Pipeline: XGBoost vs Geometric")
801
- lines.append("")
802
- lines.append("Same grid over w_xgb in {0.3 ... 0.8}. w_geo = 1 - w_xgb.")
803
- lines.append("")
804
- lines.append("| XGBoost Weight (w_xgb) | Mean LOPO F1 |")
805
- lines.append("|-----------------------:|-------------:|")
806
- for w in sorted(hybrid_xgb_f1.keys()):
807
- marker = " **<-- selected**" if w == best_w_xgb else ""
808
- lines.append(f"| {w:.1f} | {hybrid_xgb_f1[w]:.4f}{marker} |")
809
  lines.append("")
810
- lines.append(f"**Best:** w_xgb = {best_w_xgb:.1f} mean LOPO F1 = {hybrid_xgb_f1[best_w_xgb]:.4f}")
 
811
  lines.append("")
812
- lines.append("![Hybrid XGBoost weight search](plots/hybrid_xgb_weight_search.png)")
813
  lines.append("")
814
 
815
- f1_mlp = hybrid_mlp_f1[best_w_mlp]
816
- f1_xgb = hybrid_xgb_f1[best_w_xgb]
817
- lines.append("### Which hybrid is used in the app?")
818
- lines.append("")
819
- if use_xgb_for_hybrid:
820
- lines.append(f"**XGBoost hybrid is better** (F1 = {f1_xgb:.4f} vs MLP hybrid F1 = {f1_mlp:.4f}).")
821
- else:
822
- lines.append(f"**MLP hybrid is better** (F1 = {f1_mlp:.4f} vs XGBoost hybrid F1 = {f1_xgb:.4f}).")
823
- lines.append("")
824
- if lr_combiner_f1 is not None:
825
- lines.append("### Logistic regression combiner (replaces heuristic weights)")
826
- lines.append("")
827
- lines.append("Instead of a fixed linear blend (e.g. 0.3·ML + 0.7·geo), a **logistic regression** "
828
- "combines model probability and geometric score: meta-features = [model_prob, geo_score], "
829
- "trained on the same LOPO splits. Threshold from Youden's J on combiner output.")
830
- lines.append("")
831
- lines.append(f"| Method | Mean LOPO F1 |")
832
- lines.append("|--------|-------------:|")
833
- lines.append(f"| Heuristic weight grid (best w) | {(f1_xgb if use_xgb_for_hybrid else f1_mlp):.4f} |")
834
- lines.append(f"| **LR combiner** | **{lr_combiner_f1:.4f}** |")
835
- lines.append("")
836
- lines.append("The app uses the saved LR combiner when `combiner_path` is set in `hybrid_focus_config.json`.")
837
- lines.append("")
838
- else:
839
- if use_xgb_for_hybrid:
840
- lines.append("The app uses **XGBoost + geometric** with the weights above.")
841
- else:
842
- lines.append("The app uses **MLP + geometric** with the weights above.")
843
- lines.append("")
844
- lines.append("## 5. Eye and Mouth Aspect Ratio Thresholds")
845
  lines.append("")
846
  lines.append("### EAR (Eye Aspect Ratio)")
847
  lines.append("")
@@ -874,7 +505,7 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
874
  lines.append("![MAR distribution](plots/mar_distribution.png)")
875
  lines.append("")
876
 
877
- lines.append("## 10. Other Constants")
878
  lines.append("")
879
  lines.append("| Constant | Value | Rationale |")
880
  lines.append("|----------|------:|-----------|")
@@ -901,71 +532,22 @@ def write_report(model_stats, extended_stats, geo_f1, best_alpha,
901
  print(f"\nReport written to {REPORT_PATH}")
902
 
903
 
904
- def write_hybrid_config(use_xgb, best_w_mlp, best_w_xgb, config_path,
905
- combiner_path=None, combiner_threshold=None):
906
- """Write hybrid_focus_config.json. If combiner_path set, app uses LR combiner instead of heuristic weights."""
907
- import json
908
- if use_xgb:
909
- w_xgb = round(float(best_w_xgb), 2)
910
- w_geo = round(1.0 - best_w_xgb, 2)
911
- w_mlp = 0.3
912
- else:
913
- w_mlp = round(float(best_w_mlp), 2)
914
- w_geo = round(1.0 - best_w_mlp, 2)
915
- w_xgb = 0.0
916
- cfg = {
917
- "use_xgb": bool(use_xgb),
918
- "w_mlp": w_mlp,
919
- "w_xgb": w_xgb,
920
- "w_geo": w_geo,
921
- "threshold": float(combiner_threshold) if combiner_threshold is not None else 0.35,
922
- "use_yawn_veto": True,
923
- "geo_face_weight": 0.7,
924
- "geo_eye_weight": 0.3,
925
- "mar_yawn_threshold": 0.55,
926
- "metric": "f1",
927
- }
928
- if combiner_path:
929
- cfg["combiner"] = "logistic"
930
- cfg["combiner_path"] = os.path.basename(combiner_path)
931
- with open(config_path, "w", encoding="utf-8") as f:
932
- json.dump(cfg, f, indent=2)
933
- print(f" Written {config_path} (use_xgb={cfg['use_xgb']}, combiner={cfg.get('combiner', 'heuristic')})")
934
-
935
-
936
  def main():
937
  os.makedirs(PLOTS_DIR, exist_ok=True)
938
 
939
  lopo_results = run_lopo_models()
940
  model_stats = analyse_model_thresholds(lopo_results)
941
- extended_stats = analyse_precision_recall_confusion(lopo_results, model_stats)
942
- plot_confusion_matrices(extended_stats)
943
  geo_f1, best_alpha = run_geo_weight_search()
944
- hybrid_mlp_f1, best_w_mlp = run_hybrid_weight_search(lopo_results)
945
- hybrid_xgb_f1, best_w_xgb = run_hybrid_xgb_weight_search(lopo_results)
946
  dist_stats = plot_distributions()
947
 
948
- f1_mlp = hybrid_mlp_f1[best_w_mlp]
949
- f1_xgb = hybrid_xgb_f1[best_w_xgb]
950
- use_xgb_for_hybrid = f1_xgb > f1_mlp
951
- print(f"\n Hybrid comparison: MLP F1 = {f1_mlp:.4f}, XGBoost F1 = {f1_xgb:.4f} → "
952
- f"use {'XGBoost' if use_xgb_for_hybrid else 'MLP'}")
953
-
954
- lr_combiner_f1 = run_hybrid_lr_combiner(lopo_results, use_xgb=use_xgb_for_hybrid)
955
- combiner_threshold, combiner_path = train_and_save_hybrid_combiner(
956
- lopo_results, use_xgb_for_hybrid,
957
- combiner_path=os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_combiner.joblib"),
958
- )
959
-
960
- config_path = os.path.join(_PROJECT_ROOT, "checkpoints", "hybrid_focus_config.json")
961
- write_hybrid_config(use_xgb_for_hybrid, best_w_mlp, best_w_xgb, config_path,
962
- combiner_path=combiner_path, combiner_threshold=combiner_threshold)
963
-
964
- write_report(model_stats, extended_stats, geo_f1, best_alpha,
965
- hybrid_mlp_f1, best_w_mlp,
966
- hybrid_xgb_f1, best_w_xgb,
967
- use_xgb_for_hybrid, dist_stats,
968
- lr_combiner_f1=lr_combiner_f1)
969
  print("\nDone.")
970
 
971
 
 
1
  # LOPO threshold/weight analysis. Run: python -m evaluation.justify_thresholds
2
+ # ClearML logging: set USE_CLEARML=1 env var or pass --clearml flag
3
 
4
  import glob
5
  import os
 
9
  import matplotlib
10
  matplotlib.use("Agg")
11
  import matplotlib.pyplot as plt
 
 
12
  from sklearn.neural_network import MLPClassifier
13
  from sklearn.preprocessing import StandardScaler
14
+ from sklearn.metrics import roc_curve, roc_auc_score, f1_score
 
 
 
 
 
 
 
 
15
  from xgboost import XGBClassifier
16
 
17
  _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 
23
  REPORT_PATH = os.path.join(os.path.dirname(__file__), "THRESHOLD_JUSTIFICATION.md")
24
  SEED = 42
25
 
26
+ # ClearML
27
+ # start logging with: USE_CLEARML=1 python -m evaluation.justify_thresholds or: python -m evaluation.justify_thresholds --clearml
28
+ _USE_CLEARML = os.environ.get("USE_CLEARML", "0") == "1" or "--clearml" in sys.argv
29
+
30
+ _task = None
31
+ _logger = None
32
+
33
+ if _USE_CLEARML:
34
+ try:
35
+ from clearml import Task
36
+ _task = Task.init(
37
+ project_name="Focus Guard",
38
+ task_name="Threshold Justification",
39
+ tags=["evaluation", "thresholds"],
40
+ )
41
+ _task.connect({"SEED": SEED, "n_participants": 9})
42
+ _logger = _task.get_logger()
43
+ print("ClearML enabled — logging to project 'Focus Guard'")
44
+ except ImportError:
45
+ print("WARNING: ClearML not installed. Continuing without logging.")
46
+ _USE_CLEARML = False
47
 
48
  def _youdens_j(y_true, y_prob):
49
  fpr, tpr, thresholds = roc_curve(y_true, y_prob)
 
57
  return f1_score(y_true, (y_prob >= threshold).astype(int), zero_division=0)
58
 
59
 
60
+ def _plot_roc(fpr, tpr, auc, opt_thresh, opt_idx, title, path, clearml_title=None):
61
  fig, ax = plt.subplots(figsize=(6, 5))
62
  ax.plot(fpr, tpr, lw=2, label=f"ROC (AUC = {auc:.4f})")
63
  ax.plot(fpr[opt_idx], tpr[opt_idx], "ro", markersize=10,
 
68
  ax.set_title(title)
69
  ax.legend(loc="lower right")
70
  fig.tight_layout()
71
+
72
+ # Log to ClearML before closing the figure
73
+ if _logger and clearml_title:
74
+ _logger.report_matplotlib_figure(
75
+ title=clearml_title, series="ROC", figure=fig, iteration=0
76
+ )
77
+
78
  fig.savefig(path, dpi=150)
79
  plt.close(fig)
80
  print(f" saved {path}")
 
85
  by_person, _, _ = load_per_person("face_orientation")
86
  persons = sorted(by_person.keys())
87
 
88
+ results = {"mlp": {"y": [], "p": []}, "xgb": {"y": [], "p": []}}
 
89
 
90
  for i, held_out in enumerate(persons):
91
  X_test, y_test = by_person[held_out]
 
106
  mlp_prob = mlp.predict_proba(X_te_sc)[:, 1]
107
  results["mlp"]["y"].append(y_test)
108
  results["mlp"]["p"].append(mlp_prob)
 
 
109
 
110
  xgb = XGBClassifier(
111
  n_estimators=600, max_depth=8, learning_rate=0.05,
112
  subsample=0.8, colsample_bytree=0.8,
113
  reg_alpha=0.1, reg_lambda=1.0,
114
+ use_label_encoder=False, eval_metric="logloss",
115
  random_state=SEED, verbosity=0,
116
  )
117
  xgb.fit(X_tr_sc, train_y)
118
  xgb_prob = xgb.predict_proba(X_te_sc)[:, 1]
119
  results["xgb"]["y"].append(y_test)
120
  results["xgb"]["p"].append(xgb_prob)
 
 
121
 
122
  print(f" fold {i+1}/{len(persons)}: held out {held_out} "
123
  f"({X_test.shape[0]} samples)")
124
 
125
+ for key in results:
 
126
  results[key]["y"] = np.concatenate(results[key]["y"])
127
  results[key]["p"] = np.concatenate(results[key]["p"])
128
 
 
143
 
144
  path = os.path.join(PLOTS_DIR, f"roc_{name}.png")
145
  _plot_roc(fpr, tpr, auc, opt_t, opt_idx,
146
+ f"LOPO ROC — {label} (9 folds, 144k samples)", path,
147
+ clearml_title=f"ROC_{label}")
148
 
149
  model_stats[name] = {
150
  "label": label, "auc": auc,
 
153
  print(f" {label}: AUC={auc:.4f}, optimal threshold={opt_t:.3f} "
154
  f"(F1={f1_opt:.4f}), F1@0.50={f1_50:.4f}")
155
 
156
+ # Log scalars to ClearML
157
+ if _logger:
158
+ _logger.report_single_value(f"{label} Optimal Threshold", opt_t)
159
+ _logger.report_single_value(f"{label} AUC", auc)
160
+ _logger.report_single_value(f"{label} F1 @ Optimal", f1_opt)
161
+ _logger.report_single_value(f"{label} F1 @ 0.5", f1_50)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ return model_stats
164
 
165
  def run_geo_weight_search():
166
  print("\n=== Geometric weight grid search ===")
 
206
  ax.text(i, mean_f1[a] + 0.003, f"{mean_f1[a]:.3f}",
207
  ha="center", va="bottom", fontsize=8)
208
  fig.tight_layout()
209
+
210
+ # Log to ClearML before closing
211
+ if _logger:
212
+ _logger.report_matplotlib_figure(
213
+ title="Geo Weight Search", series="F1 vs Alpha", figure=fig, iteration=0
214
+ )
215
+
216
  path = os.path.join(PLOTS_DIR, "geo_weight_search.png")
217
  fig.savefig(path, dpi=150)
218
  plt.close(fig)
 
220
 
221
  print(f" Best alpha (face weight) = {best_alpha:.1f}, "
222
  f"mean LOPO F1 = {mean_f1[best_alpha]:.4f}")
223
+
224
+ # Log scalars to ClearML
225
+ if _logger:
226
+ _logger.report_single_value("Geo Best Alpha", best_alpha)
227
+ for i, a in enumerate(sorted(alphas)):
228
+ _logger.report_scalar(
229
+ "Geo Weight Search", "Mean LOPO F1",
230
+ iteration=i, value=mean_f1[a]
231
+ )
232
+
233
  return dict(mean_f1), best_alpha
234
 
235
 
 
296
  ax.text(i, mean_f1[w] + 0.003, f"{mean_f1[w]:.3f}",
297
  ha="center", va="bottom", fontsize=8)
298
  fig.tight_layout()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
+ # Log to ClearML before closing
301
+ if _logger:
302
+ _logger.report_matplotlib_figure(
303
+ title="Hybrid Weight Search", series="F1 vs w_mlp", figure=fig, iteration=0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  )
 
 
 
 
 
 
 
 
 
 
305
 
306
+ path = os.path.join(PLOTS_DIR, "hybrid_weight_search.png")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  fig.savefig(path, dpi=150)
308
  plt.close(fig)
309
  print(f" saved {path}")
310
 
311
+ print(f" Best w_mlp = {best_w:.1f}, mean LOPO F1 = {mean_f1[best_w]:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
+ # Log scalars to ClearML
314
+ if _logger:
315
+ _logger.report_single_value("Hybrid Best w_mlp", best_w)
316
+ for i, w in enumerate(sorted(w_mlps)):
317
+ _logger.report_scalar(
318
+ "Hybrid Weight Search", "Mean LOPO F1",
319
+ iteration=i, value=mean_f1[w]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
+ return dict(mean_f1), best_w
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
 
325
  def plot_distributions():
 
345
  ear_plot = np.clip(ear_min, 0, 0.85)
346
  mar_plot = np.clip(mar, 0, 1.5)
347
 
348
+ # EAR distribution plot
349
+ fig_ear, ax = plt.subplots(figsize=(7, 4))
350
  ax.hist(ear_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
351
  ax.hist(ear_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
352
  for val, lbl, c in [
 
359
  ax.set_ylabel("Density")
360
  ax.set_title("EAR Distribution by Class (144k samples)")
361
  ax.legend(fontsize=8)
362
+ fig_ear.tight_layout()
363
+
364
+ # Log to ClearML before closing
365
+ if _logger:
366
+ _logger.report_matplotlib_figure(
367
+ title="EAR Distribution", series="by class", figure=fig_ear, iteration=0
368
+ )
369
+
370
  path = os.path.join(PLOTS_DIR, "ear_distribution.png")
371
+ fig_ear.savefig(path, dpi=150)
372
+ plt.close(fig_ear)
373
  print(f" saved {path}")
374
 
375
+ # MAR distribution plot
376
+ fig_mar, ax = plt.subplots(figsize=(7, 4))
377
  ax.hist(mar_plot[labels == 1], bins=100, alpha=0.6, label="Focused (1)", density=True)
378
  ax.hist(mar_plot[labels == 0], bins=100, alpha=0.6, label="Unfocused (0)", density=True)
379
  ax.axvline(0.55, color="red", ls="--", lw=1.5, label="MAR_YAWN = 0.55")
 
381
  ax.set_ylabel("Density")
382
  ax.set_title("MAR Distribution by Class (144k samples)")
383
  ax.legend(fontsize=8)
384
+ fig_mar.tight_layout()
385
+
386
+ # Log to ClearML before closing
387
+ if _logger:
388
+ _logger.report_matplotlib_figure(
389
+ title="MAR Distribution", series="by class", figure=fig_mar, iteration=0
390
+ )
391
+
392
  path = os.path.join(PLOTS_DIR, "mar_distribution.png")
393
+ fig_mar.savefig(path, dpi=150)
394
+ plt.close(fig_mar)
395
  print(f" saved {path}")
396
 
397
  closed_pct = np.mean(ear_min < 0.16) * 100
 
412
  return stats
413
 
414
 
415
+ def write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats):
 
 
 
 
416
  lines = []
417
  lines.append("# Threshold Justification Report")
418
  lines.append("")
 
437
  lines.append("![XGBoost ROC](plots/roc_xgboost.png)")
438
  lines.append("")
439
 
440
+ lines.append("## 2. Geometric Pipeline Weights (s_face vs s_eye)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  lines.append("")
442
  lines.append("Grid search over face weight alpha in {0.2 ... 0.8}. "
443
  "Eye weight = 1 - alpha. Threshold per fold via Youden's J.")
 
454
  lines.append("![Geometric weight search](plots/geo_weight_search.png)")
455
  lines.append("")
456
 
457
+ lines.append("## 3. Hybrid Pipeline Weights (MLP vs Geometric)")
458
  lines.append("")
459
  lines.append("Grid search over w_mlp in {0.3 ... 0.8}. w_geo = 1 - w_mlp. "
460
+ "Geometric sub-score uses same weights as geometric pipeline (face=0.7, eye=0.3). "
461
+ "If you change geometric weights, re-run this script — optimal w_mlp can shift.")
462
  lines.append("")
463
  lines.append("| MLP Weight (w_mlp) | Mean LOPO F1 |")
464
  lines.append("|-------------------:|-------------:|")
465
+ for w in sorted(hybrid_f1.keys()):
466
+ marker = " **<-- selected**" if w == best_w else ""
467
+ lines.append(f"| {w:.1f} | {hybrid_f1[w]:.4f}{marker} |")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  lines.append("")
469
+ lines.append(f"**Best:** w_mlp = {best_w:.1f} (MLP {best_w*100:.0f}%, "
470
+ f"geometric {(1-best_w)*100:.0f}%)")
471
  lines.append("")
472
+ lines.append("![Hybrid weight search](plots/hybrid_weight_search.png)")
473
  lines.append("")
474
 
475
+ lines.append("## 4. Eye and Mouth Aspect Ratio Thresholds")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  lines.append("")
477
  lines.append("### EAR (Eye Aspect Ratio)")
478
  lines.append("")
 
505
  lines.append("![MAR distribution](plots/mar_distribution.png)")
506
  lines.append("")
507
 
508
+ lines.append("## 5. Other Constants")
509
  lines.append("")
510
  lines.append("| Constant | Value | Rationale |")
511
  lines.append("|----------|------:|-----------|")
 
532
  print(f"\nReport written to {REPORT_PATH}")
533
 
534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535
  def main():
536
  os.makedirs(PLOTS_DIR, exist_ok=True)
537
 
538
  lopo_results = run_lopo_models()
539
  model_stats = analyse_model_thresholds(lopo_results)
 
 
540
  geo_f1, best_alpha = run_geo_weight_search()
541
+ hybrid_f1, best_w = run_hybrid_weight_search(lopo_results)
 
542
  dist_stats = plot_distributions()
543
 
544
+ write_report(model_stats, geo_f1, best_alpha, hybrid_f1, best_w, dist_stats)
545
+
546
+ # Close ClearML task
547
+ if _task:
548
+ _task.close()
549
+ print("ClearML task closed.")
550
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  print("\nDone.")
552
 
553