Spaces:
Sleeping
Sleeping
Merge remote-tracking branch 'origin/feature/integration2.0' into hugging_face_final
Browse files- config/__init__.py +3 -0
- config/clearml_enrich.py +87 -0
- evaluation/justify_thresholds.py +18 -3
- models/mlp/train.py +84 -7
- models/xgboost/sweep.py +9 -1
- models/xgboost/sweep_local.py +3 -1
- models/xgboost/train.py +101 -11
config/__init__.py
CHANGED
|
@@ -3,6 +3,9 @@
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
|
|
|
|
|
|
|
|
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import Any
|
| 8 |
|
|
|
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
import os
|
| 6 |
+
|
| 7 |
+
# ClearML UI project name (must match the project in your ClearML workspace).
|
| 8 |
+
CLEARML_PROJECT_NAME = "FocusGuards Large Group Project"
|
| 9 |
from pathlib import Path
|
| 10 |
from typing import Any
|
| 11 |
|
config/clearml_enrich.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Extra ClearML polish: env tags, config snapshot, output model metadata."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def project_root() -> Path:
|
| 12 |
+
return Path(__file__).resolve().parent.parent
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def active_config_path() -> Path:
|
| 16 |
+
env = os.environ.get("FOCUSGUARD_CONFIG")
|
| 17 |
+
if env:
|
| 18 |
+
return Path(env).expanduser()
|
| 19 |
+
return Path(__file__).resolve().parent / "default.yaml"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def enrich_task(task, *, role: str) -> None:
|
| 23 |
+
"""Tags for filtering in the UI (Python, OS, torch device, git revision)."""
|
| 24 |
+
tags = [
|
| 25 |
+
role,
|
| 26 |
+
f"py{sys.version_info.major}{sys.version_info.minor}",
|
| 27 |
+
sys.platform.replace(" ", "_"),
|
| 28 |
+
]
|
| 29 |
+
try:
|
| 30 |
+
import torch
|
| 31 |
+
|
| 32 |
+
ver = torch.__version__.split("+")[0].replace(".", "_")
|
| 33 |
+
tags.append(f"torch_{ver}")
|
| 34 |
+
tags.append("cuda" if torch.cuda.is_available() else "cpu")
|
| 35 |
+
except ImportError:
|
| 36 |
+
tags.append("no_torch")
|
| 37 |
+
rev = _git_short_rev()
|
| 38 |
+
if rev:
|
| 39 |
+
tags.append(f"git_{rev}")
|
| 40 |
+
task.add_tags(tags)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _git_short_rev() -> str | None:
|
| 44 |
+
root = project_root()
|
| 45 |
+
try:
|
| 46 |
+
p = subprocess.run(
|
| 47 |
+
["git", "rev-parse", "--short", "HEAD"],
|
| 48 |
+
cwd=str(root),
|
| 49 |
+
capture_output=True,
|
| 50 |
+
text=True,
|
| 51 |
+
timeout=6,
|
| 52 |
+
check=False,
|
| 53 |
+
)
|
| 54 |
+
if p.returncode == 0 and p.stdout:
|
| 55 |
+
return p.stdout.strip()
|
| 56 |
+
except (OSError, subprocess.TimeoutExpired):
|
| 57 |
+
pass
|
| 58 |
+
return None
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def upload_repro_artifacts(task) -> None:
|
| 62 |
+
"""Pin the exact YAML + requirements file used for this run."""
|
| 63 |
+
cfg = active_config_path()
|
| 64 |
+
if cfg.is_file():
|
| 65 |
+
task.upload_artifact(name="config_yaml", artifact_object=str(cfg))
|
| 66 |
+
req = project_root() / "requirements.txt"
|
| 67 |
+
if req.is_file():
|
| 68 |
+
task.upload_artifact(name="requirements_txt", artifact_object=str(req))
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def attach_output_metrics(output_model, metrics: dict[str, float | str]) -> None:
|
| 72 |
+
"""Surface headline metrics on the registered model card."""
|
| 73 |
+
for k, v in metrics.items():
|
| 74 |
+
key = str(k).replace("/", "_")
|
| 75 |
+
try:
|
| 76 |
+
output_model.set_metadata(key, str(v))
|
| 77 |
+
except Exception:
|
| 78 |
+
pass
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def task_done_summary(task, summary: str) -> None:
|
| 82 |
+
setter = getattr(task, "set_comment", None)
|
| 83 |
+
if callable(setter):
|
| 84 |
+
try:
|
| 85 |
+
setter(summary)
|
| 86 |
+
except Exception:
|
| 87 |
+
pass
|
evaluation/justify_thresholds.py
CHANGED
|
@@ -32,22 +32,26 @@ _logger = None
|
|
| 32 |
if _USE_CLEARML:
|
| 33 |
try:
|
| 34 |
from clearml import Task
|
| 35 |
-
from config import flatten_for_clearml
|
| 36 |
_task = Task.init(
|
| 37 |
-
project_name=
|
| 38 |
task_name="Threshold Justification",
|
| 39 |
tags=["evaluation", "thresholds"],
|
| 40 |
)
|
|
|
|
|
|
|
|
|
|
| 41 |
flat = flatten_for_clearml()
|
| 42 |
flat["evaluation/SEED"] = SEED
|
| 43 |
flat["evaluation/n_participants"] = 9
|
| 44 |
_task.connect(flat)
|
|
|
|
| 45 |
_logger = _task.get_logger()
|
| 46 |
if _CLEARML_QUEUE:
|
| 47 |
print(f"[ClearML] Enqueuing to queue '{_CLEARML_QUEUE}'.")
|
| 48 |
_task.execute_remotely(queue_name=_CLEARML_QUEUE)
|
| 49 |
sys.exit(0)
|
| 50 |
-
print("ClearML enabled β logging to project '
|
| 51 |
except ImportError:
|
| 52 |
print("WARNING: ClearML not installed. Continuing without logging.")
|
| 53 |
_USE_CLEARML = False
|
|
@@ -548,6 +552,17 @@ def main():
|
|
| 548 |
|
| 549 |
# Close ClearML task
|
| 550 |
if _task:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
_task.close()
|
| 552 |
print("ClearML task closed.")
|
| 553 |
|
|
|
|
| 32 |
if _USE_CLEARML:
|
| 33 |
try:
|
| 34 |
from clearml import Task
|
| 35 |
+
from config import CLEARML_PROJECT_NAME, flatten_for_clearml
|
| 36 |
_task = Task.init(
|
| 37 |
+
project_name=CLEARML_PROJECT_NAME,
|
| 38 |
task_name="Threshold Justification",
|
| 39 |
tags=["evaluation", "thresholds"],
|
| 40 |
)
|
| 41 |
+
from config.clearml_enrich import enrich_task, upload_repro_artifacts
|
| 42 |
+
|
| 43 |
+
enrich_task(_task, role="eval_thresholds")
|
| 44 |
flat = flatten_for_clearml()
|
| 45 |
flat["evaluation/SEED"] = SEED
|
| 46 |
flat["evaluation/n_participants"] = 9
|
| 47 |
_task.connect(flat)
|
| 48 |
+
upload_repro_artifacts(_task)
|
| 49 |
_logger = _task.get_logger()
|
| 50 |
if _CLEARML_QUEUE:
|
| 51 |
print(f"[ClearML] Enqueuing to queue '{_CLEARML_QUEUE}'.")
|
| 52 |
_task.execute_remotely(queue_name=_CLEARML_QUEUE)
|
| 53 |
sys.exit(0)
|
| 54 |
+
print(f"ClearML enabled β logging to project '{CLEARML_PROJECT_NAME}'")
|
| 55 |
except ImportError:
|
| 56 |
print("WARNING: ClearML not installed. Continuing without logging.")
|
| 57 |
_USE_CLEARML = False
|
|
|
|
| 552 |
|
| 553 |
# Close ClearML task
|
| 554 |
if _task:
|
| 555 |
+
from config.clearml_enrich import task_done_summary
|
| 556 |
+
|
| 557 |
+
if os.path.isfile(REPORT_PATH):
|
| 558 |
+
_task.upload_artifact(
|
| 559 |
+
name="threshold_justification_report",
|
| 560 |
+
artifact_object=REPORT_PATH,
|
| 561 |
+
)
|
| 562 |
+
task_done_summary(
|
| 563 |
+
_task,
|
| 564 |
+
"LOPO threshold / weight analysis; see artifact threshold_justification_report and plots in Debug samples.",
|
| 565 |
+
)
|
| 566 |
_task.close()
|
| 567 |
print("ClearML task closed.")
|
| 568 |
|
models/mlp/train.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
import random
|
|
@@ -9,10 +10,12 @@ import torch
|
|
| 9 |
import torch.nn as nn
|
| 10 |
import torch.optim as optim
|
| 11 |
from sklearn.metrics import (
|
|
|
|
| 12 |
confusion_matrix,
|
| 13 |
f1_score,
|
| 14 |
precision_recall_fscore_support,
|
| 15 |
roc_auc_score,
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
from data_preparation.prepare_dataset import get_dataloaders, SELECTED_FEATURES
|
|
@@ -62,12 +65,15 @@ task = None
|
|
| 62 |
if USE_CLEARML:
|
| 63 |
try:
|
| 64 |
from clearml import Task
|
| 65 |
-
from config import flatten_for_clearml
|
| 66 |
task = Task.init(
|
| 67 |
-
project_name=
|
| 68 |
task_name="MLP Model Training",
|
| 69 |
tags=["training", "mlp_model"],
|
| 70 |
)
|
|
|
|
|
|
|
|
|
|
| 71 |
flat = flatten_for_clearml()
|
| 72 |
flat["mlp/model_name"] = CFG.get("model_name", "face_orientation")
|
| 73 |
flat["mlp/epochs"] = CFG.get("epochs", 30)
|
|
@@ -77,6 +83,7 @@ if USE_CLEARML:
|
|
| 77 |
flat["mlp/hidden_sizes"] = str(CFG.get("hidden_sizes", [64, 32]))
|
| 78 |
flat["mlp/split_ratios"] = str(CFG.get("split_ratios", (0.7, 0.15, 0.15)))
|
| 79 |
task.connect(flat)
|
|
|
|
| 80 |
if CLEARML_QUEUE:
|
| 81 |
print(f"[ClearML] Enqueuing to queue '{CLEARML_QUEUE}'. Agent will run training.")
|
| 82 |
task.execute_remotely(queue_name=CLEARML_QUEUE)
|
|
@@ -313,18 +320,75 @@ def main() -> None:
|
|
| 313 |
np.savez(meta_path, feature_names=np.array(SELECTED_FEATURES["face_orientation"]))
|
| 314 |
print(f"[LOG] Scaler and meta saved to {ckpt_dir}")
|
| 315 |
|
| 316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
if task is not None:
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
| 319 |
task.upload_artifact(name="training_log", artifact_object=log_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
task.logger.report_single_value("test/accuracy", test_acc)
|
| 321 |
task.logger.report_single_value("test/f1_weighted", test_f1)
|
| 322 |
task.logger.report_single_value("test/roc_auc", test_auc)
|
| 323 |
for key, val in dataset_stats.items():
|
| 324 |
if isinstance(val, list):
|
| 325 |
-
|
|
|
|
| 326 |
else:
|
| 327 |
-
task.logger.report_single_value(f"dataset/{key}", val)
|
| 328 |
prec, rec, f1_per_class, _ = precision_recall_fscore_support(
|
| 329 |
test_labels_np, test_preds_np, average=None, zero_division=0
|
| 330 |
)
|
|
@@ -332,7 +396,6 @@ def main() -> None:
|
|
| 332 |
task.logger.report_single_value(f"test/class_{c}_precision", float(prec[c]))
|
| 333 |
task.logger.report_single_value(f"test/class_{c}_recall", float(rec[c]))
|
| 334 |
task.logger.report_single_value(f"test/class_{c}_f1", float(f1_per_class[c]))
|
| 335 |
-
cm = confusion_matrix(test_labels_np, test_preds_np)
|
| 336 |
import matplotlib
|
| 337 |
matplotlib.use("Agg")
|
| 338 |
import matplotlib.pyplot as plt
|
|
@@ -351,6 +414,20 @@ def main() -> None:
|
|
| 351 |
fig.tight_layout()
|
| 352 |
task.logger.report_matplotlib_figure(title="Confusion Matrix", series="test", figure=fig, iteration=0)
|
| 353 |
plt.close(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
task.logger.flush()
|
| 355 |
|
| 356 |
|
|
|
|
| 1 |
+
import csv
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import random
|
|
|
|
| 10 |
import torch.nn as nn
|
| 11 |
import torch.optim as optim
|
| 12 |
from sklearn.metrics import (
|
| 13 |
+
classification_report,
|
| 14 |
confusion_matrix,
|
| 15 |
f1_score,
|
| 16 |
precision_recall_fscore_support,
|
| 17 |
roc_auc_score,
|
| 18 |
+
roc_curve,
|
| 19 |
)
|
| 20 |
|
| 21 |
from data_preparation.prepare_dataset import get_dataloaders, SELECTED_FEATURES
|
|
|
|
| 65 |
if USE_CLEARML:
|
| 66 |
try:
|
| 67 |
from clearml import Task
|
| 68 |
+
from config import CLEARML_PROJECT_NAME, flatten_for_clearml
|
| 69 |
task = Task.init(
|
| 70 |
+
project_name=CLEARML_PROJECT_NAME,
|
| 71 |
task_name="MLP Model Training",
|
| 72 |
tags=["training", "mlp_model"],
|
| 73 |
)
|
| 74 |
+
from config.clearml_enrich import enrich_task, upload_repro_artifacts
|
| 75 |
+
|
| 76 |
+
enrich_task(task, role="train_mlp")
|
| 77 |
flat = flatten_for_clearml()
|
| 78 |
flat["mlp/model_name"] = CFG.get("model_name", "face_orientation")
|
| 79 |
flat["mlp/epochs"] = CFG.get("epochs", 30)
|
|
|
|
| 83 |
flat["mlp/hidden_sizes"] = str(CFG.get("hidden_sizes", [64, 32]))
|
| 84 |
flat["mlp/split_ratios"] = str(CFG.get("split_ratios", (0.7, 0.15, 0.15)))
|
| 85 |
task.connect(flat)
|
| 86 |
+
upload_repro_artifacts(task)
|
| 87 |
if CLEARML_QUEUE:
|
| 88 |
print(f"[ClearML] Enqueuing to queue '{CLEARML_QUEUE}'. Agent will run training.")
|
| 89 |
task.execute_remotely(queue_name=CLEARML_QUEUE)
|
|
|
|
| 320 |
np.savez(meta_path, feature_names=np.array(SELECTED_FEATURES["face_orientation"]))
|
| 321 |
print(f"[LOG] Scaler and meta saved to {ckpt_dir}")
|
| 322 |
|
| 323 |
+
cm = confusion_matrix(test_labels_np, test_preds_np)
|
| 324 |
+
pred_csv = os.path.join(logs_dir, f"{CFG['model_name']}_test_predictions.csv")
|
| 325 |
+
with open(pred_csv, "w", newline="") as f:
|
| 326 |
+
w = csv.writer(f)
|
| 327 |
+
w.writerow(["y_true", "y_pred"] + [f"prob_{j}" for j in range(num_classes)])
|
| 328 |
+
for i in range(len(test_labels_np)):
|
| 329 |
+
w.writerow(
|
| 330 |
+
[int(test_labels_np[i]), int(test_preds_np[i])]
|
| 331 |
+
+ [float(x) for x in test_probs[i]]
|
| 332 |
+
)
|
| 333 |
+
summary_path = os.path.join(logs_dir, f"{CFG['model_name']}_test_metrics_summary.json")
|
| 334 |
+
with open(summary_path, "w", encoding="utf-8") as f:
|
| 335 |
+
json.dump(
|
| 336 |
+
{
|
| 337 |
+
"model": "mlp",
|
| 338 |
+
"model_name": CFG["model_name"],
|
| 339 |
+
"checkpoint": os.path.basename(best_ckpt_path),
|
| 340 |
+
"test_loss": history["test_loss"],
|
| 341 |
+
"test_accuracy": history["test_acc"],
|
| 342 |
+
"test_f1_weighted": history["test_f1"],
|
| 343 |
+
"test_roc_auc": history["test_auc"],
|
| 344 |
+
"confusion_matrix": cm.tolist(),
|
| 345 |
+
"classification_report": classification_report(
|
| 346 |
+
test_labels_np, test_preds_np, digits=4
|
| 347 |
+
),
|
| 348 |
+
},
|
| 349 |
+
f,
|
| 350 |
+
indent=2,
|
| 351 |
+
)
|
| 352 |
+
print(f"[LOG] Test predictions β {pred_csv}")
|
| 353 |
+
|
| 354 |
+
# ClearML: artifacts, confusion matrix, per-class metrics, registered model
|
| 355 |
if task is not None:
|
| 356 |
+
from clearml import OutputModel
|
| 357 |
+
from config.clearml_enrich import attach_output_metrics, task_done_summary
|
| 358 |
+
|
| 359 |
+
task.upload_artifact(name="mlp_checkpoint", artifact_object=best_ckpt_path)
|
| 360 |
task.upload_artifact(name="training_log", artifact_object=log_path)
|
| 361 |
+
task.upload_artifact(name="test_predictions", artifact_object=pred_csv)
|
| 362 |
+
task.upload_artifact(name="test_metrics_summary", artifact_object=summary_path)
|
| 363 |
+
task.upload_artifact(name="scaler_mlp", artifact_object=scaler_path)
|
| 364 |
+
task.upload_artifact(name="meta_mlp", artifact_object=meta_path)
|
| 365 |
+
out_model = OutputModel(
|
| 366 |
+
task=task, name=f"MLP_{CFG['model_name']}", framework="PyTorch"
|
| 367 |
+
)
|
| 368 |
+
out_model.update_weights(
|
| 369 |
+
weights_filename=best_ckpt_path, auto_delete_file=False
|
| 370 |
+
)
|
| 371 |
+
attach_output_metrics(
|
| 372 |
+
out_model,
|
| 373 |
+
{
|
| 374 |
+
"test_accuracy": round(float(test_acc), 6),
|
| 375 |
+
"test_f1_weighted": round(float(test_f1), 6),
|
| 376 |
+
"test_roc_auc": round(float(test_auc), 6),
|
| 377 |
+
},
|
| 378 |
+
)
|
| 379 |
+
task_done_summary(
|
| 380 |
+
task,
|
| 381 |
+
f"MLP {CFG['model_name']}: test acc={test_acc:.4f}, F1={test_f1:.4f}, ROC-AUC={test_auc:.4f}",
|
| 382 |
+
)
|
| 383 |
task.logger.report_single_value("test/accuracy", test_acc)
|
| 384 |
task.logger.report_single_value("test/f1_weighted", test_f1)
|
| 385 |
task.logger.report_single_value("test/roc_auc", test_auc)
|
| 386 |
for key, val in dataset_stats.items():
|
| 387 |
if isinstance(val, list):
|
| 388 |
+
for i, v in enumerate(val):
|
| 389 |
+
task.logger.report_single_value(f"dataset/{key}/{i}", float(v))
|
| 390 |
else:
|
| 391 |
+
task.logger.report_single_value(f"dataset/{key}", float(val))
|
| 392 |
prec, rec, f1_per_class, _ = precision_recall_fscore_support(
|
| 393 |
test_labels_np, test_preds_np, average=None, zero_division=0
|
| 394 |
)
|
|
|
|
| 396 |
task.logger.report_single_value(f"test/class_{c}_precision", float(prec[c]))
|
| 397 |
task.logger.report_single_value(f"test/class_{c}_recall", float(rec[c]))
|
| 398 |
task.logger.report_single_value(f"test/class_{c}_f1", float(f1_per_class[c]))
|
|
|
|
| 399 |
import matplotlib
|
| 400 |
matplotlib.use("Agg")
|
| 401 |
import matplotlib.pyplot as plt
|
|
|
|
| 414 |
fig.tight_layout()
|
| 415 |
task.logger.report_matplotlib_figure(title="Confusion Matrix", series="test", figure=fig, iteration=0)
|
| 416 |
plt.close(fig)
|
| 417 |
+
if num_classes == 2:
|
| 418 |
+
fpr, tpr, _ = roc_curve(test_labels_np, test_probs[:, 1])
|
| 419 |
+
fig_r, ax_r = plt.subplots(figsize=(6, 5))
|
| 420 |
+
ax_r.plot(fpr, tpr, label=f"ROC-AUC = {test_auc:.4f}")
|
| 421 |
+
ax_r.plot([0, 1], [0, 1], "k--", lw=1)
|
| 422 |
+
ax_r.set_xlabel("False positive rate")
|
| 423 |
+
ax_r.set_ylabel("True positive rate")
|
| 424 |
+
ax_r.set_title("Test ROC (MLP)")
|
| 425 |
+
ax_r.legend(loc="lower right")
|
| 426 |
+
fig_r.tight_layout()
|
| 427 |
+
task.logger.report_matplotlib_figure(
|
| 428 |
+
title="ROC", series="test", figure=fig_r, iteration=0
|
| 429 |
+
)
|
| 430 |
+
plt.close(fig_r)
|
| 431 |
task.logger.flush()
|
| 432 |
|
| 433 |
|
models/xgboost/sweep.py
CHANGED
|
@@ -18,7 +18,15 @@ Usage
|
|
| 18 |
python models/xgboost/sweep.py
|
| 19 |
"""
|
| 20 |
|
|
|
|
|
|
|
| 21 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
from clearml import Task
|
| 23 |
from clearml.automation import (
|
| 24 |
HyperParameterOptimizer,
|
|
@@ -73,7 +81,7 @@ OBJECTIVE_SIGN = "min" # minimize logloss
|
|
| 73 |
def main():
|
| 74 |
# Register this controller as its own ClearML Task so it is tracked too.
|
| 75 |
controller_task = Task.init(
|
| 76 |
-
project_name=
|
| 77 |
task_name="XGBoost HPO Sweep Controller",
|
| 78 |
task_type=Task.TaskTypes.optimizer,
|
| 79 |
tags=["sweep", "xgboost", "hpo"],
|
|
|
|
| 18 |
python models/xgboost/sweep.py
|
| 19 |
"""
|
| 20 |
|
| 21 |
+
import os
|
| 22 |
+
import sys
|
| 23 |
import time
|
| 24 |
+
|
| 25 |
+
_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
| 26 |
+
if _ROOT not in sys.path:
|
| 27 |
+
sys.path.insert(0, _ROOT)
|
| 28 |
+
|
| 29 |
+
from config import CLEARML_PROJECT_NAME
|
| 30 |
from clearml import Task
|
| 31 |
from clearml.automation import (
|
| 32 |
HyperParameterOptimizer,
|
|
|
|
| 81 |
def main():
|
| 82 |
# Register this controller as its own ClearML Task so it is tracked too.
|
| 83 |
controller_task = Task.init(
|
| 84 |
+
project_name=CLEARML_PROJECT_NAME,
|
| 85 |
task_name="XGBoost HPO Sweep Controller",
|
| 86 |
task_type=Task.TaskTypes.optimizer,
|
| 87 |
tags=["sweep", "xgboost", "hpo"],
|
models/xgboost/sweep_local.py
CHANGED
|
@@ -13,11 +13,13 @@ import numpy as np
|
|
| 13 |
from xgboost import XGBClassifier
|
| 14 |
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
|
| 15 |
|
|
|
|
|
|
|
| 16 |
# Import your own dataset loading logic
|
| 17 |
from data_preparation.prepare_dataset import get_default_split_config, get_numpy_splits
|
| 18 |
|
| 19 |
# ββ General Settings ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
-
PROJECT_NAME =
|
| 21 |
BASE_TASK_NAME = "XGBoost Sweep Trial"
|
| 22 |
DATA_SPLITS, SEED = get_default_split_config()
|
| 23 |
|
|
|
|
| 13 |
from xgboost import XGBClassifier
|
| 14 |
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
|
| 15 |
|
| 16 |
+
from config import CLEARML_PROJECT_NAME
|
| 17 |
+
|
| 18 |
# Import your own dataset loading logic
|
| 19 |
from data_preparation.prepare_dataset import get_default_split_config, get_numpy_splits
|
| 20 |
|
| 21 |
# ββ General Settings ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
+
PROJECT_NAME = CLEARML_PROJECT_NAME
|
| 23 |
BASE_TASK_NAME = "XGBoost Sweep Trial"
|
| 24 |
DATA_SPLITS, SEED = get_default_split_config()
|
| 25 |
|
models/xgboost/train.py
CHANGED
|
@@ -1,12 +1,20 @@
|
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
import random
|
| 4 |
import sys
|
| 5 |
|
| 6 |
import numpy as np
|
| 7 |
-
from sklearn.metrics import
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
from models.xgboost.config import XGB_BASE_PARAMS, build_xgb_classifier
|
| 11 |
|
| 12 |
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
@@ -48,12 +56,15 @@ task = None
|
|
| 48 |
if USE_CLEARML:
|
| 49 |
try:
|
| 50 |
from clearml import Task
|
| 51 |
-
from config import flatten_for_clearml
|
| 52 |
task = Task.init(
|
| 53 |
-
project_name=
|
| 54 |
task_name="XGBoost Model Training",
|
| 55 |
tags=["training", "xgboost"],
|
| 56 |
)
|
|
|
|
|
|
|
|
|
|
| 57 |
flat = flatten_for_clearml()
|
| 58 |
for k, v in CFG.get("xgb_params", {}).items():
|
| 59 |
flat[f"xgb_params/{k}"] = v
|
|
@@ -61,6 +72,7 @@ if USE_CLEARML:
|
|
| 61 |
flat["seed"] = CFG["seed"]
|
| 62 |
flat["split_ratios"] = str(CFG["split_ratios"])
|
| 63 |
task.connect(flat)
|
|
|
|
| 64 |
if CLEARML_QUEUE:
|
| 65 |
print(f"[ClearML] Enqueuing to queue '{CLEARML_QUEUE}'.")
|
| 66 |
task.execute_remotely(queue_name=CLEARML_QUEUE)
|
|
@@ -134,6 +146,49 @@ def main():
|
|
| 134 |
"test_class_counts": np.bincount(y_test.astype(int), minlength=num_classes).tolist(),
|
| 135 |
}
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
if task is not None:
|
| 138 |
for i, (tl, vl) in enumerate(zip(train_losses, val_losses)):
|
| 139 |
task.logger.report_scalar("Loss", "Train", tl, iteration=i + 1)
|
|
@@ -142,9 +197,11 @@ def main():
|
|
| 142 |
task.logger.report_single_value("test/f1_weighted", test_f1)
|
| 143 |
task.logger.report_single_value("test/roc_auc", test_auc)
|
| 144 |
for key, val in dataset_stats.items():
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
|
|
|
| 148 |
prec, rec, f1_per_class, _ = precision_recall_fscore_support(
|
| 149 |
y_test, test_preds, average=None, zero_division=0
|
| 150 |
)
|
|
@@ -152,7 +209,6 @@ def main():
|
|
| 152 |
task.logger.report_single_value(f"test/class_{c}_precision", float(prec[c]))
|
| 153 |
task.logger.report_single_value(f"test/class_{c}_recall", float(rec[c]))
|
| 154 |
task.logger.report_single_value(f"test/class_{c}_f1", float(f1_per_class[c]))
|
| 155 |
-
cm = confusion_matrix(y_test, test_preds)
|
| 156 |
import matplotlib
|
| 157 |
matplotlib.use("Agg")
|
| 158 |
import matplotlib.pyplot as plt
|
|
@@ -171,6 +227,20 @@ def main():
|
|
| 171 |
fig.tight_layout()
|
| 172 |
task.logger.report_matplotlib_figure(title="Confusion Matrix", series="test", figure=fig, iteration=0)
|
| 173 |
plt.close(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
task.logger.flush()
|
| 175 |
|
| 176 |
# ββ Save checkpoint βββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -200,8 +270,6 @@ def main():
|
|
| 200 |
"dataset_stats": dataset_stats,
|
| 201 |
}
|
| 202 |
|
| 203 |
-
logs_dir = CFG["logs_dir"]
|
| 204 |
-
os.makedirs(logs_dir, exist_ok=True)
|
| 205 |
log_path = os.path.join(logs_dir, f"xgboost_{CFG['model_name']}_training_log.json")
|
| 206 |
|
| 207 |
with open(log_path, "w") as f:
|
|
@@ -210,8 +278,30 @@ def main():
|
|
| 210 |
print(f"[LOG] Training history saved to: {log_path}")
|
| 211 |
|
| 212 |
if task is not None:
|
|
|
|
|
|
|
|
|
|
| 213 |
task.upload_artifact(name="xgboost_model", artifact_object=model_path)
|
| 214 |
task.upload_artifact(name="training_log", artifact_object=log_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
import csv
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import random
|
| 5 |
import sys
|
| 6 |
|
| 7 |
import numpy as np
|
| 8 |
+
from sklearn.metrics import (
|
| 9 |
+
classification_report,
|
| 10 |
+
confusion_matrix,
|
| 11 |
+
f1_score,
|
| 12 |
+
precision_recall_fscore_support,
|
| 13 |
+
roc_auc_score,
|
| 14 |
+
roc_curve,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
from data_preparation.prepare_dataset import get_numpy_splits, SELECTED_FEATURES
|
| 18 |
from models.xgboost.config import XGB_BASE_PARAMS, build_xgb_classifier
|
| 19 |
|
| 20 |
_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
| 56 |
if USE_CLEARML:
|
| 57 |
try:
|
| 58 |
from clearml import Task
|
| 59 |
+
from config import CLEARML_PROJECT_NAME, flatten_for_clearml
|
| 60 |
task = Task.init(
|
| 61 |
+
project_name=CLEARML_PROJECT_NAME,
|
| 62 |
task_name="XGBoost Model Training",
|
| 63 |
tags=["training", "xgboost"],
|
| 64 |
)
|
| 65 |
+
from config.clearml_enrich import enrich_task, upload_repro_artifacts
|
| 66 |
+
|
| 67 |
+
enrich_task(task, role="train_xgboost")
|
| 68 |
flat = flatten_for_clearml()
|
| 69 |
for k, v in CFG.get("xgb_params", {}).items():
|
| 70 |
flat[f"xgb_params/{k}"] = v
|
|
|
|
| 72 |
flat["seed"] = CFG["seed"]
|
| 73 |
flat["split_ratios"] = str(CFG["split_ratios"])
|
| 74 |
task.connect(flat)
|
| 75 |
+
upload_repro_artifacts(task)
|
| 76 |
if CLEARML_QUEUE:
|
| 77 |
print(f"[ClearML] Enqueuing to queue '{CLEARML_QUEUE}'.")
|
| 78 |
task.execute_remotely(queue_name=CLEARML_QUEUE)
|
|
|
|
| 146 |
"test_class_counts": np.bincount(y_test.astype(int), minlength=num_classes).tolist(),
|
| 147 |
}
|
| 148 |
|
| 149 |
+
logs_dir = CFG["logs_dir"]
|
| 150 |
+
os.makedirs(logs_dir, exist_ok=True)
|
| 151 |
+
cm = confusion_matrix(y_test, test_preds)
|
| 152 |
+
y_test_i = y_test.astype(int)
|
| 153 |
+
pred_path = os.path.join(logs_dir, f"xgboost_{CFG['model_name']}_test_predictions.csv")
|
| 154 |
+
with open(pred_path, "w", newline="") as f:
|
| 155 |
+
w = csv.writer(f)
|
| 156 |
+
w.writerow(["y_true", "y_pred"] + [f"prob_{j}" for j in range(num_classes)])
|
| 157 |
+
for i in range(len(y_test_i)):
|
| 158 |
+
w.writerow(
|
| 159 |
+
[int(y_test_i[i]), int(test_preds[i])]
|
| 160 |
+
+ [float(x) for x in test_probs[i]]
|
| 161 |
+
)
|
| 162 |
+
summary_path = os.path.join(logs_dir, f"xgboost_{CFG['model_name']}_test_metrics_summary.json")
|
| 163 |
+
with open(summary_path, "w", encoding="utf-8") as f:
|
| 164 |
+
json.dump(
|
| 165 |
+
{
|
| 166 |
+
"model": "xgboost",
|
| 167 |
+
"model_name": CFG["model_name"],
|
| 168 |
+
"test_accuracy": round(test_acc, 6),
|
| 169 |
+
"test_f1_weighted": round(test_f1, 6),
|
| 170 |
+
"test_roc_auc": round(test_auc, 6),
|
| 171 |
+
"confusion_matrix": cm.tolist(),
|
| 172 |
+
"classification_report": classification_report(
|
| 173 |
+
y_test, test_preds, digits=4
|
| 174 |
+
),
|
| 175 |
+
},
|
| 176 |
+
f,
|
| 177 |
+
indent=2,
|
| 178 |
+
)
|
| 179 |
+
feat_names = list(
|
| 180 |
+
SELECTED_FEATURES.get(CFG["model_name"], SELECTED_FEATURES["face_orientation"])
|
| 181 |
+
)
|
| 182 |
+
imp_vals = model.feature_importances_
|
| 183 |
+
imp_rows = [
|
| 184 |
+
{"feature": feat_names[i], "importance": float(imp_vals[i])}
|
| 185 |
+
for i in range(min(len(feat_names), len(imp_vals)))
|
| 186 |
+
]
|
| 187 |
+
imp_path = os.path.join(logs_dir, f"xgboost_{CFG['model_name']}_feature_importance.json")
|
| 188 |
+
with open(imp_path, "w", encoding="utf-8") as f:
|
| 189 |
+
json.dump(imp_rows, f, indent=2)
|
| 190 |
+
print(f"[LOG] Test predictions β {pred_path}")
|
| 191 |
+
|
| 192 |
if task is not None:
|
| 193 |
for i, (tl, vl) in enumerate(zip(train_losses, val_losses)):
|
| 194 |
task.logger.report_scalar("Loss", "Train", tl, iteration=i + 1)
|
|
|
|
| 197 |
task.logger.report_single_value("test/f1_weighted", test_f1)
|
| 198 |
task.logger.report_single_value("test/roc_auc", test_auc)
|
| 199 |
for key, val in dataset_stats.items():
|
| 200 |
+
if isinstance(val, list):
|
| 201 |
+
for i, v in enumerate(val):
|
| 202 |
+
task.logger.report_single_value(f"dataset/{key}/{i}", float(v))
|
| 203 |
+
else:
|
| 204 |
+
task.logger.report_single_value(f"dataset/{key}", float(val))
|
| 205 |
prec, rec, f1_per_class, _ = precision_recall_fscore_support(
|
| 206 |
y_test, test_preds, average=None, zero_division=0
|
| 207 |
)
|
|
|
|
| 209 |
task.logger.report_single_value(f"test/class_{c}_precision", float(prec[c]))
|
| 210 |
task.logger.report_single_value(f"test/class_{c}_recall", float(rec[c]))
|
| 211 |
task.logger.report_single_value(f"test/class_{c}_f1", float(f1_per_class[c]))
|
|
|
|
| 212 |
import matplotlib
|
| 213 |
matplotlib.use("Agg")
|
| 214 |
import matplotlib.pyplot as plt
|
|
|
|
| 227 |
fig.tight_layout()
|
| 228 |
task.logger.report_matplotlib_figure(title="Confusion Matrix", series="test", figure=fig, iteration=0)
|
| 229 |
plt.close(fig)
|
| 230 |
+
if num_classes == 2:
|
| 231 |
+
fpr, tpr, _ = roc_curve(y_test, test_probs[:, 1])
|
| 232 |
+
fig_r, ax_r = plt.subplots(figsize=(6, 5))
|
| 233 |
+
ax_r.plot(fpr, tpr, label=f"ROC-AUC = {test_auc:.4f}")
|
| 234 |
+
ax_r.plot([0, 1], [0, 1], "k--", lw=1)
|
| 235 |
+
ax_r.set_xlabel("False positive rate")
|
| 236 |
+
ax_r.set_ylabel("True positive rate")
|
| 237 |
+
ax_r.set_title("Test ROC (XGBoost)")
|
| 238 |
+
ax_r.legend(loc="lower right")
|
| 239 |
+
fig_r.tight_layout()
|
| 240 |
+
task.logger.report_matplotlib_figure(
|
| 241 |
+
title="ROC", series="test", figure=fig_r, iteration=0
|
| 242 |
+
)
|
| 243 |
+
plt.close(fig_r)
|
| 244 |
task.logger.flush()
|
| 245 |
|
| 246 |
# ββ Save checkpoint βββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 270 |
"dataset_stats": dataset_stats,
|
| 271 |
}
|
| 272 |
|
|
|
|
|
|
|
| 273 |
log_path = os.path.join(logs_dir, f"xgboost_{CFG['model_name']}_training_log.json")
|
| 274 |
|
| 275 |
with open(log_path, "w") as f:
|
|
|
|
| 278 |
print(f"[LOG] Training history saved to: {log_path}")
|
| 279 |
|
| 280 |
if task is not None:
|
| 281 |
+
from clearml import OutputModel
|
| 282 |
+
from config.clearml_enrich import attach_output_metrics, task_done_summary
|
| 283 |
+
|
| 284 |
task.upload_artifact(name="xgboost_model", artifact_object=model_path)
|
| 285 |
task.upload_artifact(name="training_log", artifact_object=log_path)
|
| 286 |
+
task.upload_artifact(name="test_predictions", artifact_object=pred_path)
|
| 287 |
+
task.upload_artifact(name="test_metrics_summary", artifact_object=summary_path)
|
| 288 |
+
task.upload_artifact(name="feature_importance", artifact_object=imp_path)
|
| 289 |
+
out_model = OutputModel(
|
| 290 |
+
task=task, name=f"XGBoost_{CFG['model_name']}", framework="XGBoost"
|
| 291 |
+
)
|
| 292 |
+
out_model.update_weights(weights_filename=model_path, auto_delete_file=False)
|
| 293 |
+
attach_output_metrics(
|
| 294 |
+
out_model,
|
| 295 |
+
{
|
| 296 |
+
"test_accuracy": round(test_acc, 6),
|
| 297 |
+
"test_f1_weighted": round(test_f1, 6),
|
| 298 |
+
"test_roc_auc": round(test_auc, 6),
|
| 299 |
+
},
|
| 300 |
+
)
|
| 301 |
+
task_done_summary(
|
| 302 |
+
task,
|
| 303 |
+
f"XGBoost {CFG['model_name']}: test acc={test_acc:.4f}, F1={test_f1:.4f}, ROC-AUC={test_auc:.4f}",
|
| 304 |
+
)
|
| 305 |
|
| 306 |
|
| 307 |
if __name__ == "__main__":
|