GoverningDocs
/

ccr-binary-logreg

Text Classification

sklearn-logistic-regression

document-classification

binary-classification

legal-documents

property-management

declaration-of-covenants

logistic-regression

Eval Results (legacy)

Model card Files Files and versions

hslee1981 commited on 10 days ago

Commit

3ed6290

·

verified ·

1 Parent(s): 76932f1

T18 Phase 1 Tier 1: config snapshot

Files changed (1) hide show

config.json +77 -0

config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "model_type": "sklearn-logistic-regression",
+  "embedding_model": "openai-text-embedding-3-small",
+  "embedding_dim": 1536,
+  "max_pages_per_doc": 20,
+  "skip_boilerplate": true,
+  "operating_threshold": 0.436,
+  "decision_band": {
+    "reject_below": 0.3,
+    "fast_pass_at_or_above": 0.85,
+    "escalate_between": "(0.30, 0.85)"
+  },
+  "training_data": {
+    "source": "setfit_experiments PostgreSQL DB + multi-signal Phase 0 relabeling",
+    "n_pages": 7129,
+    "n_documents": 465,
+    "binary_class_balance": {
+      "positive": 3014,
+      "negative": 4115
+    },
+    "split": {
+      "train": 298,
+      "val": 64,
+      "test": 65,
+      "train_pos": 201,
+      "val_pos": 39,
+      "test_pos": 47
+    }
+  },
+  "test_metrics": {
+    "name": "logreg_tuned (TEST set)",
+    "threshold": 0.4359872072086175,
+    "accuracy": 0.9076923076923077,
+    "f1": 0.94,
+    "roc_auc": 0.9550827423167849,
+    "brier_score": 0.13433461274707392,
+    "ece": 0.27835753511850964,
+    "confusion_matrix": [
+      [
+        12,
+        6
+      ],
+      [
+        0,
+        47
+      ]
+    ]
+  },
+  "validation_metrics": {
+    "name": "LogReg @ best-threshold",
+    "threshold": 0.4359872072086175,
+    "accuracy": 0.859375,
+    "f1": 0.8941176470588236,
+    "roc_auc": 0.8748717948717949,
+    "brier_score": 0.15576505514468417,
+    "ece": 0.19068488965598734,
+    "confusion_matrix": [
+      [
+        17,
+        8
+      ],
+      [
+        1,
+        38
+      ]
+    ]
+  },
+  "candidates_compared": [
+    "logreg_05",
+    "logreg_tuned",
+    "logreg_platt_05",
+    "logreg_platt_tuned",
+    "mlp_05",
+    "mlp_tuned"
+  ],
+  "winner_selection_rule": "max F1 across LogReg, LogReg+Platt, MLP at best threshold; simplicity tiebreak to LogReg"
+}