avinashm commited on
Commit
e65379c
·
verified ·
1 Parent(s): 24c9883

Upload all trained models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. confidence_bin_epoch_stats.json +63 -0
  3. confidence_bin_metrics_eval.json +102 -0
  4. confidence_bin_metrics_test.json +102 -0
  5. confidence_bin_model.joblib +3 -0
  6. constraint_dimension_epoch_stats.json +59 -0
  7. constraint_dimension_hf/config.json +54 -0
  8. constraint_dimension_hf/model.safetensors +3 -0
  9. constraint_dimension_hf/tokenizer.json +0 -0
  10. constraint_dimension_hf/tokenizer_config.json +23 -0
  11. constraint_dimension_metrics_eval.json +196 -0
  12. constraint_dimension_metrics_test.json +196 -0
  13. constraint_dimension_model.joblib +3 -0
  14. context_tag_epoch_stats.json +31 -0
  15. context_tag_hf/config.json +58 -0
  16. context_tag_hf/model.safetensors +3 -0
  17. context_tag_hf/tokenizer.json +0 -0
  18. context_tag_hf/tokenizer_config.json +23 -0
  19. context_tag_metrics_eval.json +276 -0
  20. context_tag_metrics_test.json +276 -0
  21. context_tag_model.joblib +3 -0
  22. decay_profile_epoch_stats.json +113 -0
  23. decay_profile_metrics_eval.json +158 -0
  24. decay_profile_metrics_test.json +158 -0
  25. decay_profile_model.joblib +3 -0
  26. extractor_epoch_stats.json +61 -0
  27. extractor_label_map.json +62 -0
  28. extractor_metrics_eval.json +1464 -0
  29. extractor_metrics_test.json +1464 -0
  30. extractor_model.joblib +3 -0
  31. extractor_report_eval.json +432 -0
  32. extractor_report_test.json +432 -0
  33. extractor_training_metadata.json +350 -0
  34. fact_extraction_structured_epoch_stats.json +46 -0
  35. fact_extraction_structured_hf/config.json +73 -0
  36. fact_extraction_structured_hf/model.safetensors +3 -0
  37. fact_extraction_structured_hf/tokenizer.json +0 -0
  38. fact_extraction_structured_hf/tokenizer_config.json +14 -0
  39. fact_extraction_structured_metrics_eval.json +33 -0
  40. fact_extraction_structured_metrics_test.json +33 -0
  41. fact_extraction_structured_model.joblib +3 -0
  42. forgetting_action_policy_epoch_stats.json +69 -0
  43. forgetting_action_policy_hf/config.json +52 -0
  44. forgetting_action_policy_hf/model.safetensors +3 -0
  45. forgetting_action_policy_hf/tokenizer.json +0 -0
  46. forgetting_action_policy_hf/tokenizer_config.json +23 -0
  47. forgetting_action_policy_metrics_eval.json +162 -0
  48. forgetting_action_policy_metrics_test.json +162 -0
  49. forgetting_action_policy_model.joblib +3 -0
  50. manifest.json +2799 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ memory_rerank_pair_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ novelty_pair_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ reconsolidation_candidate_pair_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ retrieval_constraint_relevance_pair_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ schema_match_pair_hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
confidence_bin_epoch_stats.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "confidence_bin",
3
+ "epoch_stats": [
4
+ {
5
+ "boundary_index": 0,
6
+ "boundary": "low|>medium"
7
+ },
8
+ {
9
+ "boundary_index": 1,
10
+ "boundary": "medium|>high"
11
+ }
12
+ ],
13
+ "training_summary": {
14
+ "actual_epochs": 1,
15
+ "best_epoch": 1,
16
+ "early_stopped": false,
17
+ "boundary_count": 2
18
+ },
19
+ "boundaries": [
20
+ {
21
+ "boundary": "low|>medium",
22
+ "index": 0,
23
+ "positive_rows": 16000,
24
+ "negative_rows": 8000,
25
+ "calibration": {
26
+ "method": "sigmoid",
27
+ "split": "eval",
28
+ "rows": 3000,
29
+ "pre_ece": 0.01198595582677442,
30
+ "post_ece": 0.0008781016022264554,
31
+ "pre_accuracy": 1.0,
32
+ "post_accuracy": 1.0,
33
+ "accuracy_delta": 0.0
34
+ },
35
+ "isotonic": {
36
+ "rows": 3000,
37
+ "pre_mean": 0.6666663343311852,
38
+ "post_mean": 0.6666666666666666
39
+ }
40
+ },
41
+ {
42
+ "boundary": "medium|>high",
43
+ "index": 1,
44
+ "positive_rows": 8000,
45
+ "negative_rows": 16000,
46
+ "calibration": {
47
+ "method": "sigmoid",
48
+ "split": "eval",
49
+ "rows": 3000,
50
+ "pre_ece": 0.015159181746049357,
51
+ "post_ece": 0.0007613706727828129,
52
+ "pre_accuracy": 1.0,
53
+ "post_accuracy": 1.0,
54
+ "accuracy_delta": 0.0
55
+ },
56
+ "isotonic": {
57
+ "rows": 3000,
58
+ "pre_mean": 0.3333336656697009,
59
+ "post_mean": 0.3333333333333333
60
+ }
61
+ }
62
+ ]
63
+ }
confidence_bin_metrics_eval.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 3000,
6
+ "accuracy": 1.0,
7
+ "macro_f1": 1.0,
8
+ "weighted_f1": 1.0,
9
+ "micro_f1": 1.0,
10
+ "labels": [
11
+ "low",
12
+ "medium",
13
+ "high"
14
+ ],
15
+ "confusion_matrix": [
16
+ [
17
+ 1000,
18
+ 0,
19
+ 0
20
+ ],
21
+ [
22
+ 0,
23
+ 1000,
24
+ 0
25
+ ],
26
+ [
27
+ 0,
28
+ 0,
29
+ 1000
30
+ ]
31
+ ],
32
+ "calibration_error": 0.0,
33
+ "classification_report": {
34
+ "confidence_bin::high": {
35
+ "precision": 1.0,
36
+ "recall": 1.0,
37
+ "f1-score": 1.0,
38
+ "support": 1000.0
39
+ },
40
+ "confidence_bin::low": {
41
+ "precision": 1.0,
42
+ "recall": 1.0,
43
+ "f1-score": 1.0,
44
+ "support": 1000.0
45
+ },
46
+ "confidence_bin::medium": {
47
+ "precision": 1.0,
48
+ "recall": 1.0,
49
+ "f1-score": 1.0,
50
+ "support": 1000.0
51
+ },
52
+ "accuracy": 1.0,
53
+ "macro avg": {
54
+ "precision": 1.0,
55
+ "recall": 1.0,
56
+ "f1-score": 1.0,
57
+ "support": 3000.0
58
+ },
59
+ "weighted avg": {
60
+ "precision": 1.0,
61
+ "recall": 1.0,
62
+ "f1-score": 1.0,
63
+ "support": 3000.0
64
+ }
65
+ },
66
+ "ordinal_mae": 0.0,
67
+ "off_by_two_rate": 0.0
68
+ },
69
+ "per_task": {
70
+ "confidence_bin": {
71
+ "rows": 3000,
72
+ "accuracy": 1.0,
73
+ "macro_f1": 1.0,
74
+ "weighted_f1": 1.0,
75
+ "micro_f1": 1.0,
76
+ "labels": [
77
+ "high",
78
+ "low",
79
+ "medium"
80
+ ],
81
+ "confusion_matrix": [
82
+ [
83
+ 1000,
84
+ 0,
85
+ 0
86
+ ],
87
+ [
88
+ 0,
89
+ 1000,
90
+ 0
91
+ ],
92
+ [
93
+ 0,
94
+ 0,
95
+ 1000
96
+ ]
97
+ ],
98
+ "wrong_task_predictions": 0,
99
+ "wrong_task_rate": 0.0
100
+ }
101
+ }
102
+ }
confidence_bin_metrics_test.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 3000,
6
+ "accuracy": 1.0,
7
+ "macro_f1": 1.0,
8
+ "weighted_f1": 1.0,
9
+ "micro_f1": 1.0,
10
+ "labels": [
11
+ "low",
12
+ "medium",
13
+ "high"
14
+ ],
15
+ "confusion_matrix": [
16
+ [
17
+ 1000,
18
+ 0,
19
+ 0
20
+ ],
21
+ [
22
+ 0,
23
+ 1000,
24
+ 0
25
+ ],
26
+ [
27
+ 0,
28
+ 0,
29
+ 1000
30
+ ]
31
+ ],
32
+ "calibration_error": 0.0,
33
+ "classification_report": {
34
+ "confidence_bin::high": {
35
+ "precision": 1.0,
36
+ "recall": 1.0,
37
+ "f1-score": 1.0,
38
+ "support": 1000.0
39
+ },
40
+ "confidence_bin::low": {
41
+ "precision": 1.0,
42
+ "recall": 1.0,
43
+ "f1-score": 1.0,
44
+ "support": 1000.0
45
+ },
46
+ "confidence_bin::medium": {
47
+ "precision": 1.0,
48
+ "recall": 1.0,
49
+ "f1-score": 1.0,
50
+ "support": 1000.0
51
+ },
52
+ "accuracy": 1.0,
53
+ "macro avg": {
54
+ "precision": 1.0,
55
+ "recall": 1.0,
56
+ "f1-score": 1.0,
57
+ "support": 3000.0
58
+ },
59
+ "weighted avg": {
60
+ "precision": 1.0,
61
+ "recall": 1.0,
62
+ "f1-score": 1.0,
63
+ "support": 3000.0
64
+ }
65
+ },
66
+ "ordinal_mae": 0.0,
67
+ "off_by_two_rate": 0.0
68
+ },
69
+ "per_task": {
70
+ "confidence_bin": {
71
+ "rows": 3000,
72
+ "accuracy": 1.0,
73
+ "macro_f1": 1.0,
74
+ "weighted_f1": 1.0,
75
+ "micro_f1": 1.0,
76
+ "labels": [
77
+ "high",
78
+ "low",
79
+ "medium"
80
+ ],
81
+ "confusion_matrix": [
82
+ [
83
+ 1000,
84
+ 0,
85
+ 0
86
+ ],
87
+ [
88
+ 0,
89
+ 1000,
90
+ 0
91
+ ],
92
+ [
93
+ 0,
94
+ 0,
95
+ 1000
96
+ ]
97
+ ],
98
+ "wrong_task_predictions": 0,
99
+ "wrong_task_rate": 0.0
100
+ }
101
+ }
102
+ }
confidence_bin_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d7d0aa1ae26de72c24cbf67a2ad2528303338c92ce1504801e1c492bc588c38
3
+ size 1609220
constraint_dimension_epoch_stats.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "constraint_dimension",
3
+ "epoch_stats": [
4
+ {
5
+ "epoch": 1,
6
+ "train_loss": 1.0786596297055173
7
+ },
8
+ {
9
+ "epoch": 2,
10
+ "train_loss": 0.49559883196265825
11
+ },
12
+ {
13
+ "epoch": 3,
14
+ "train_loss": 0.36042651461985464
15
+ },
16
+ {
17
+ "epoch": 4,
18
+ "train_loss": 0.27737118249450576
19
+ },
20
+ {
21
+ "epoch": 5,
22
+ "train_loss": 0.20896544052678293
23
+ },
24
+ {
25
+ "epoch": 6,
26
+ "train_loss": 0.15534398090000953
27
+ },
28
+ {
29
+ "epoch": 7,
30
+ "train_loss": 0.11773543272898254
31
+ },
32
+ {
33
+ "epoch": 8,
34
+ "train_loss": 0.07906012590035061
35
+ },
36
+ {
37
+ "epoch": 9,
38
+ "train_loss": 0.05605952438313094
39
+ },
40
+ {
41
+ "epoch": 10,
42
+ "train_loss": 0.03452488826215441
43
+ }
44
+ ],
45
+ "training_summary": {
46
+ "actual_epochs": 10,
47
+ "best_epoch": 10,
48
+ "early_stopped": false,
49
+ "backbone_model_name": "microsoft/deberta-v3-base",
50
+ "tokenizer_name": "microsoft/deberta-v3-base"
51
+ },
52
+ "calibration": {
53
+ "method": "temperature_grid_search",
54
+ "rows": 6000,
55
+ "temperature": 2.0,
56
+ "loss": 0.5294126563569427
57
+ },
58
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/constraint_dimension_hf"
59
+ }
constraint_dimension_hf/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "constraint_dimension::policy",
14
+ "1": "constraint_dimension::goal",
15
+ "2": "constraint_dimension::value",
16
+ "3": "constraint_dimension::causal",
17
+ "4": "constraint_dimension::state",
18
+ "5": "constraint_dimension::other"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "constraint_dimension::causal": 3,
24
+ "constraint_dimension::goal": 1,
25
+ "constraint_dimension::other": 5,
26
+ "constraint_dimension::policy": 0,
27
+ "constraint_dimension::state": 4,
28
+ "constraint_dimension::value": 2
29
+ },
30
+ "layer_norm_eps": 1e-07,
31
+ "legacy": true,
32
+ "max_position_embeddings": 512,
33
+ "max_relative_positions": -1,
34
+ "model_type": "deberta-v2",
35
+ "norm_rel_ebd": "layer_norm",
36
+ "num_attention_heads": 12,
37
+ "num_hidden_layers": 12,
38
+ "pad_token_id": 0,
39
+ "pooler_dropout": 0,
40
+ "pooler_hidden_act": "gelu",
41
+ "pooler_hidden_size": 768,
42
+ "pos_att_type": [
43
+ "p2c",
44
+ "c2p"
45
+ ],
46
+ "position_biased_input": false,
47
+ "position_buckets": 256,
48
+ "relative_attention": true,
49
+ "share_att_key": true,
50
+ "tie_word_embeddings": true,
51
+ "transformers_version": "5.3.0",
52
+ "type_vocab_size": 0,
53
+ "vocab_size": 128100
54
+ }
constraint_dimension_hf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf5d168fa5fd65de839eb2e3e0b165809502cb6266b236fa5f19697391c9f46f
3
+ size 737731560
constraint_dimension_hf/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
constraint_dimension_hf/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
constraint_dimension_metrics_eval.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 6000,
6
+ "accuracy": 0.8698333333333333,
7
+ "macro_f1": 0.8703105353987516,
8
+ "weighted_f1": 0.8703105353987517,
9
+ "micro_f1": 0.8698333333333333,
10
+ "labels": [
11
+ "constraint_dimension::causal",
12
+ "constraint_dimension::goal",
13
+ "constraint_dimension::other",
14
+ "constraint_dimension::policy",
15
+ "constraint_dimension::state",
16
+ "constraint_dimension::value"
17
+ ],
18
+ "confusion_matrix": [
19
+ [
20
+ 965,
21
+ 2,
22
+ 16,
23
+ 0,
24
+ 12,
25
+ 5
26
+ ],
27
+ [
28
+ 1,
29
+ 907,
30
+ 57,
31
+ 1,
32
+ 9,
33
+ 25
34
+ ],
35
+ [
36
+ 19,
37
+ 31,
38
+ 791,
39
+ 18,
40
+ 56,
41
+ 85
42
+ ],
43
+ [
44
+ 1,
45
+ 0,
46
+ 22,
47
+ 954,
48
+ 2,
49
+ 21
50
+ ],
51
+ [
52
+ 27,
53
+ 13,
54
+ 78,
55
+ 6,
56
+ 842,
57
+ 34
58
+ ],
59
+ [
60
+ 13,
61
+ 23,
62
+ 145,
63
+ 23,
64
+ 36,
65
+ 760
66
+ ]
67
+ ],
68
+ "calibration_error": 0.04364704496164792,
69
+ "classification_report": {
70
+ "constraint_dimension::causal": {
71
+ "precision": 0.9405458089668616,
72
+ "recall": 0.965,
73
+ "f1-score": 0.9526159921026653,
74
+ "support": 1000.0
75
+ },
76
+ "constraint_dimension::goal": {
77
+ "precision": 0.9293032786885246,
78
+ "recall": 0.907,
79
+ "f1-score": 0.9180161943319838,
80
+ "support": 1000.0
81
+ },
82
+ "constraint_dimension::other": {
83
+ "precision": 0.7132551848512173,
84
+ "recall": 0.791,
85
+ "f1-score": 0.7501185395922239,
86
+ "support": 1000.0
87
+ },
88
+ "constraint_dimension::policy": {
89
+ "precision": 0.9520958083832335,
90
+ "recall": 0.954,
91
+ "f1-score": 0.9530469530469531,
92
+ "support": 1000.0
93
+ },
94
+ "constraint_dimension::state": {
95
+ "precision": 0.8798328108672936,
96
+ "recall": 0.842,
97
+ "f1-score": 0.8605007664793051,
98
+ "support": 1000.0
99
+ },
100
+ "constraint_dimension::value": {
101
+ "precision": 0.8172043010752689,
102
+ "recall": 0.76,
103
+ "f1-score": 0.7875647668393783,
104
+ "support": 1000.0
105
+ },
106
+ "accuracy": 0.8698333333333333,
107
+ "macro avg": {
108
+ "precision": 0.8720395321387332,
109
+ "recall": 0.8698333333333332,
110
+ "f1-score": 0.8703105353987516,
111
+ "support": 6000.0
112
+ },
113
+ "weighted avg": {
114
+ "precision": 0.8720395321387332,
115
+ "recall": 0.8698333333333333,
116
+ "f1-score": 0.8703105353987517,
117
+ "support": 6000.0
118
+ }
119
+ }
120
+ },
121
+ "per_task": {
122
+ "constraint_dimension": {
123
+ "rows": 6000,
124
+ "accuracy": 0.8698333333333333,
125
+ "macro_f1": 0.8703105353987516,
126
+ "weighted_f1": 0.8703105353987517,
127
+ "micro_f1": 0.8698333333333333,
128
+ "labels": [
129
+ "causal",
130
+ "goal",
131
+ "other",
132
+ "policy",
133
+ "state",
134
+ "value"
135
+ ],
136
+ "confusion_matrix": [
137
+ [
138
+ 965,
139
+ 2,
140
+ 16,
141
+ 0,
142
+ 12,
143
+ 5
144
+ ],
145
+ [
146
+ 1,
147
+ 907,
148
+ 57,
149
+ 1,
150
+ 9,
151
+ 25
152
+ ],
153
+ [
154
+ 19,
155
+ 31,
156
+ 791,
157
+ 18,
158
+ 56,
159
+ 85
160
+ ],
161
+ [
162
+ 1,
163
+ 0,
164
+ 22,
165
+ 954,
166
+ 2,
167
+ 21
168
+ ],
169
+ [
170
+ 27,
171
+ 13,
172
+ 78,
173
+ 6,
174
+ 842,
175
+ 34
176
+ ],
177
+ [
178
+ 13,
179
+ 23,
180
+ 145,
181
+ 23,
182
+ 36,
183
+ 760
184
+ ]
185
+ ],
186
+ "wrong_task_predictions": 0,
187
+ "wrong_task_rate": 0.0
188
+ }
189
+ },
190
+ "calibration": {
191
+ "method": "temperature_grid_search",
192
+ "rows": 6000,
193
+ "temperature": 2.8,
194
+ "loss": null
195
+ }
196
+ }
constraint_dimension_metrics_test.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 6000,
6
+ "accuracy": 0.8828333333333334,
7
+ "macro_f1": 0.883185453547077,
8
+ "weighted_f1": 0.883185453547077,
9
+ "micro_f1": 0.8828333333333334,
10
+ "labels": [
11
+ "constraint_dimension::causal",
12
+ "constraint_dimension::goal",
13
+ "constraint_dimension::other",
14
+ "constraint_dimension::policy",
15
+ "constraint_dimension::state",
16
+ "constraint_dimension::value"
17
+ ],
18
+ "confusion_matrix": [
19
+ [
20
+ 966,
21
+ 2,
22
+ 20,
23
+ 1,
24
+ 7,
25
+ 4
26
+ ],
27
+ [
28
+ 1,
29
+ 927,
30
+ 47,
31
+ 2,
32
+ 6,
33
+ 17
34
+ ],
35
+ [
36
+ 12,
37
+ 30,
38
+ 817,
39
+ 22,
40
+ 56,
41
+ 63
42
+ ],
43
+ [
44
+ 0,
45
+ 2,
46
+ 26,
47
+ 949,
48
+ 1,
49
+ 22
50
+ ],
51
+ [
52
+ 27,
53
+ 7,
54
+ 63,
55
+ 8,
56
+ 865,
57
+ 30
58
+ ],
59
+ [
60
+ 7,
61
+ 19,
62
+ 137,
63
+ 27,
64
+ 37,
65
+ 773
66
+ ]
67
+ ],
68
+ "calibration_error": 0.04340881209580012,
69
+ "classification_report": {
70
+ "constraint_dimension::causal": {
71
+ "precision": 0.9536031589338598,
72
+ "recall": 0.966,
73
+ "f1-score": 0.9597615499254843,
74
+ "support": 1000.0
75
+ },
76
+ "constraint_dimension::goal": {
77
+ "precision": 0.939209726443769,
78
+ "recall": 0.927,
79
+ "f1-score": 0.9330649219929542,
80
+ "support": 1000.0
81
+ },
82
+ "constraint_dimension::other": {
83
+ "precision": 0.7360360360360361,
84
+ "recall": 0.817,
85
+ "f1-score": 0.7744075829383886,
86
+ "support": 1000.0
87
+ },
88
+ "constraint_dimension::policy": {
89
+ "precision": 0.9405351833498513,
90
+ "recall": 0.949,
91
+ "f1-score": 0.944748631159781,
92
+ "support": 1000.0
93
+ },
94
+ "constraint_dimension::state": {
95
+ "precision": 0.8899176954732511,
96
+ "recall": 0.865,
97
+ "f1-score": 0.8772819472616633,
98
+ "support": 1000.0
99
+ },
100
+ "constraint_dimension::value": {
101
+ "precision": 0.8503850385038504,
102
+ "recall": 0.773,
103
+ "f1-score": 0.8098480880041907,
104
+ "support": 1000.0
105
+ },
106
+ "accuracy": 0.8828333333333334,
107
+ "macro avg": {
108
+ "precision": 0.8849478064567696,
109
+ "recall": 0.8828333333333332,
110
+ "f1-score": 0.883185453547077,
111
+ "support": 6000.0
112
+ },
113
+ "weighted avg": {
114
+ "precision": 0.8849478064567696,
115
+ "recall": 0.8828333333333334,
116
+ "f1-score": 0.883185453547077,
117
+ "support": 6000.0
118
+ }
119
+ }
120
+ },
121
+ "per_task": {
122
+ "constraint_dimension": {
123
+ "rows": 6000,
124
+ "accuracy": 0.8828333333333334,
125
+ "macro_f1": 0.883185453547077,
126
+ "weighted_f1": 0.883185453547077,
127
+ "micro_f1": 0.8828333333333334,
128
+ "labels": [
129
+ "causal",
130
+ "goal",
131
+ "other",
132
+ "policy",
133
+ "state",
134
+ "value"
135
+ ],
136
+ "confusion_matrix": [
137
+ [
138
+ 966,
139
+ 2,
140
+ 20,
141
+ 1,
142
+ 7,
143
+ 4
144
+ ],
145
+ [
146
+ 1,
147
+ 927,
148
+ 47,
149
+ 2,
150
+ 6,
151
+ 17
152
+ ],
153
+ [
154
+ 12,
155
+ 30,
156
+ 817,
157
+ 22,
158
+ 56,
159
+ 63
160
+ ],
161
+ [
162
+ 0,
163
+ 2,
164
+ 26,
165
+ 949,
166
+ 1,
167
+ 22
168
+ ],
169
+ [
170
+ 27,
171
+ 7,
172
+ 63,
173
+ 8,
174
+ 865,
175
+ 30
176
+ ],
177
+ [
178
+ 7,
179
+ 19,
180
+ 137,
181
+ 27,
182
+ 37,
183
+ 773
184
+ ]
185
+ ],
186
+ "wrong_task_predictions": 0,
187
+ "wrong_task_rate": 0.0
188
+ }
189
+ },
190
+ "calibration": {
191
+ "method": "temperature_grid_search",
192
+ "rows": 6000,
193
+ "temperature": 2.8,
194
+ "loss": null
195
+ }
196
+ }
constraint_dimension_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cdb4509952221d1ffd5928e1bdd2d8733fab5dd05e3f3a5568ebb8788cc59e4
3
+ size 744943121
context_tag_epoch_stats.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "context_tag",
3
+ "epoch_stats": [
4
+ {
5
+ "epoch": 1,
6
+ "train_loss": 0.9430735153735004
7
+ },
8
+ {
9
+ "epoch": 2,
10
+ "train_loss": 0.42989056710620754
11
+ },
12
+ {
13
+ "epoch": 3,
14
+ "train_loss": 0.26783848089678214
15
+ }
16
+ ],
17
+ "training_summary": {
18
+ "actual_epochs": 3,
19
+ "best_epoch": 3,
20
+ "early_stopped": false,
21
+ "backbone_model_name": "microsoft/deberta-v3-base",
22
+ "tokenizer_name": "microsoft/deberta-v3-base"
23
+ },
24
+ "calibration": {
25
+ "method": "temperature_grid_search",
26
+ "rows": 8000,
27
+ "temperature": 2.0,
28
+ "loss": 0.22631261527409646
29
+ },
30
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/context_tag_hf"
31
+ }
context_tag_hf/config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "context_tag::general",
14
+ "1": "context_tag::food",
15
+ "2": "context_tag::travel",
16
+ "3": "context_tag::finance",
17
+ "4": "context_tag::health",
18
+ "5": "context_tag::work",
19
+ "6": "context_tag::tech",
20
+ "7": "context_tag::social"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "context_tag::finance": 3,
26
+ "context_tag::food": 1,
27
+ "context_tag::general": 0,
28
+ "context_tag::health": 4,
29
+ "context_tag::social": 7,
30
+ "context_tag::tech": 6,
31
+ "context_tag::travel": 2,
32
+ "context_tag::work": 5
33
+ },
34
+ "layer_norm_eps": 1e-07,
35
+ "legacy": true,
36
+ "max_position_embeddings": 512,
37
+ "max_relative_positions": -1,
38
+ "model_type": "deberta-v2",
39
+ "norm_rel_ebd": "layer_norm",
40
+ "num_attention_heads": 12,
41
+ "num_hidden_layers": 12,
42
+ "pad_token_id": 0,
43
+ "pooler_dropout": 0,
44
+ "pooler_hidden_act": "gelu",
45
+ "pooler_hidden_size": 768,
46
+ "pos_att_type": [
47
+ "p2c",
48
+ "c2p"
49
+ ],
50
+ "position_biased_input": false,
51
+ "position_buckets": 256,
52
+ "relative_attention": true,
53
+ "share_att_key": true,
54
+ "tie_word_embeddings": true,
55
+ "transformers_version": "5.3.0",
56
+ "type_vocab_size": 0,
57
+ "vocab_size": 128100
58
+ }
context_tag_hf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d912bc4be6e4e2d1fa014d52f3c17755c7e4e329955d47f62e10df47b1280768
3
+ size 737737712
context_tag_hf/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
context_tag_hf/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
context_tag_metrics_eval.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 8000,
6
+ "accuracy": 0.94625,
7
+ "macro_f1": 0.9445890976694143,
8
+ "weighted_f1": 0.9445890976694143,
9
+ "micro_f1": 0.94625,
10
+ "labels": [
11
+ "context_tag::finance",
12
+ "context_tag::food",
13
+ "context_tag::general",
14
+ "context_tag::health",
15
+ "context_tag::social",
16
+ "context_tag::tech",
17
+ "context_tag::travel",
18
+ "context_tag::work"
19
+ ],
20
+ "confusion_matrix": [
21
+ [
22
+ 982,
23
+ 0,
24
+ 8,
25
+ 0,
26
+ 0,
27
+ 4,
28
+ 0,
29
+ 6
30
+ ],
31
+ [
32
+ 1,
33
+ 972,
34
+ 9,
35
+ 11,
36
+ 6,
37
+ 0,
38
+ 1,
39
+ 0
40
+ ],
41
+ [
42
+ 50,
43
+ 53,
44
+ 743,
45
+ 33,
46
+ 29,
47
+ 26,
48
+ 27,
49
+ 39
50
+ ],
51
+ [
52
+ 1,
53
+ 18,
54
+ 10,
55
+ 967,
56
+ 1,
57
+ 2,
58
+ 0,
59
+ 1
60
+ ],
61
+ [
62
+ 2,
63
+ 5,
64
+ 4,
65
+ 1,
66
+ 984,
67
+ 0,
68
+ 2,
69
+ 2
70
+ ],
71
+ [
72
+ 5,
73
+ 0,
74
+ 14,
75
+ 2,
76
+ 1,
77
+ 971,
78
+ 0,
79
+ 7
80
+ ],
81
+ [
82
+ 1,
83
+ 2,
84
+ 6,
85
+ 0,
86
+ 6,
87
+ 1,
88
+ 983,
89
+ 1
90
+ ],
91
+ [
92
+ 1,
93
+ 0,
94
+ 11,
95
+ 0,
96
+ 0,
97
+ 19,
98
+ 1,
99
+ 968
100
+ ]
101
+ ],
102
+ "calibration_error": 0.023930798305538282,
103
+ "classification_report": {
104
+ "context_tag::finance": {
105
+ "precision": 0.9415148609779482,
106
+ "recall": 0.982,
107
+ "f1-score": 0.9613313754282917,
108
+ "support": 1000.0
109
+ },
110
+ "context_tag::food": {
111
+ "precision": 0.9257142857142857,
112
+ "recall": 0.972,
113
+ "f1-score": 0.9482926829268292,
114
+ "support": 1000.0
115
+ },
116
+ "context_tag::general": {
117
+ "precision": 0.9229813664596274,
118
+ "recall": 0.743,
119
+ "f1-score": 0.8232686980609418,
120
+ "support": 1000.0
121
+ },
122
+ "context_tag::health": {
123
+ "precision": 0.9536489151873767,
124
+ "recall": 0.967,
125
+ "f1-score": 0.9602780536246276,
126
+ "support": 1000.0
127
+ },
128
+ "context_tag::social": {
129
+ "precision": 0.9581304771178188,
130
+ "recall": 0.984,
131
+ "f1-score": 0.9708929452392698,
132
+ "support": 1000.0
133
+ },
134
+ "context_tag::tech": {
135
+ "precision": 0.9491691104594331,
136
+ "recall": 0.971,
137
+ "f1-score": 0.9599604547701434,
138
+ "support": 1000.0
139
+ },
140
+ "context_tag::travel": {
141
+ "precision": 0.9694280078895463,
142
+ "recall": 0.983,
143
+ "f1-score": 0.9761668321747765,
144
+ "support": 1000.0
145
+ },
146
+ "context_tag::work": {
147
+ "precision": 0.9453125,
148
+ "recall": 0.968,
149
+ "f1-score": 0.9565217391304348,
150
+ "support": 1000.0
151
+ },
152
+ "accuracy": 0.94625,
153
+ "macro avg": {
154
+ "precision": 0.9457374404757546,
155
+ "recall": 0.94625,
156
+ "f1-score": 0.9445890976694143,
157
+ "support": 8000.0
158
+ },
159
+ "weighted avg": {
160
+ "precision": 0.9457374404757545,
161
+ "recall": 0.94625,
162
+ "f1-score": 0.9445890976694143,
163
+ "support": 8000.0
164
+ }
165
+ }
166
+ },
167
+ "per_task": {
168
+ "context_tag": {
169
+ "rows": 8000,
170
+ "accuracy": 0.94625,
171
+ "macro_f1": 0.9445890976694143,
172
+ "weighted_f1": 0.9445890976694143,
173
+ "micro_f1": 0.94625,
174
+ "labels": [
175
+ "finance",
176
+ "food",
177
+ "general",
178
+ "health",
179
+ "social",
180
+ "tech",
181
+ "travel",
182
+ "work"
183
+ ],
184
+ "confusion_matrix": [
185
+ [
186
+ 982,
187
+ 0,
188
+ 8,
189
+ 0,
190
+ 0,
191
+ 4,
192
+ 0,
193
+ 6
194
+ ],
195
+ [
196
+ 1,
197
+ 972,
198
+ 9,
199
+ 11,
200
+ 6,
201
+ 0,
202
+ 1,
203
+ 0
204
+ ],
205
+ [
206
+ 50,
207
+ 53,
208
+ 743,
209
+ 33,
210
+ 29,
211
+ 26,
212
+ 27,
213
+ 39
214
+ ],
215
+ [
216
+ 1,
217
+ 18,
218
+ 10,
219
+ 967,
220
+ 1,
221
+ 2,
222
+ 0,
223
+ 1
224
+ ],
225
+ [
226
+ 2,
227
+ 5,
228
+ 4,
229
+ 1,
230
+ 984,
231
+ 0,
232
+ 2,
233
+ 2
234
+ ],
235
+ [
236
+ 5,
237
+ 0,
238
+ 14,
239
+ 2,
240
+ 1,
241
+ 971,
242
+ 0,
243
+ 7
244
+ ],
245
+ [
246
+ 1,
247
+ 2,
248
+ 6,
249
+ 0,
250
+ 6,
251
+ 1,
252
+ 983,
253
+ 1
254
+ ],
255
+ [
256
+ 1,
257
+ 0,
258
+ 11,
259
+ 0,
260
+ 0,
261
+ 19,
262
+ 1,
263
+ 968
264
+ ]
265
+ ],
266
+ "wrong_task_predictions": 0,
267
+ "wrong_task_rate": 0.0
268
+ }
269
+ },
270
+ "calibration": {
271
+ "method": "temperature_grid_search",
272
+ "rows": 8000,
273
+ "temperature": 2.0,
274
+ "loss": 0.22631261527409646
275
+ }
276
+ }
context_tag_metrics_test.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 8000,
6
+ "accuracy": 0.947375,
7
+ "macro_f1": 0.9462078544778264,
8
+ "weighted_f1": 0.9462078544778263,
9
+ "micro_f1": 0.947375,
10
+ "labels": [
11
+ "context_tag::finance",
12
+ "context_tag::food",
13
+ "context_tag::general",
14
+ "context_tag::health",
15
+ "context_tag::social",
16
+ "context_tag::tech",
17
+ "context_tag::travel",
18
+ "context_tag::work"
19
+ ],
20
+ "confusion_matrix": [
21
+ [
22
+ 979,
23
+ 0,
24
+ 12,
25
+ 1,
26
+ 0,
27
+ 3,
28
+ 1,
29
+ 4
30
+ ],
31
+ [
32
+ 0,
33
+ 951,
34
+ 15,
35
+ 24,
36
+ 8,
37
+ 0,
38
+ 2,
39
+ 0
40
+ ],
41
+ [
42
+ 52,
43
+ 34,
44
+ 775,
45
+ 27,
46
+ 28,
47
+ 21,
48
+ 30,
49
+ 33
50
+ ],
51
+ [
52
+ 0,
53
+ 13,
54
+ 15,
55
+ 972,
56
+ 0,
57
+ 0,
58
+ 0,
59
+ 0
60
+ ],
61
+ [
62
+ 1,
63
+ 5,
64
+ 10,
65
+ 0,
66
+ 975,
67
+ 0,
68
+ 6,
69
+ 3
70
+ ],
71
+ [
72
+ 2,
73
+ 0,
74
+ 9,
75
+ 0,
76
+ 0,
77
+ 975,
78
+ 0,
79
+ 14
80
+ ],
81
+ [
82
+ 4,
83
+ 0,
84
+ 4,
85
+ 0,
86
+ 4,
87
+ 1,
88
+ 986,
89
+ 1
90
+ ],
91
+ [
92
+ 5,
93
+ 0,
94
+ 11,
95
+ 0,
96
+ 0,
97
+ 17,
98
+ 1,
99
+ 966
100
+ ]
101
+ ],
102
+ "calibration_error": 0.024662236875587645,
103
+ "classification_report": {
104
+ "context_tag::finance": {
105
+ "precision": 0.9386385426653883,
106
+ "recall": 0.979,
107
+ "f1-score": 0.9583945178658835,
108
+ "support": 1000.0
109
+ },
110
+ "context_tag::food": {
111
+ "precision": 0.9481555333998006,
112
+ "recall": 0.951,
113
+ "f1-score": 0.9495756365451822,
114
+ "support": 1000.0
115
+ },
116
+ "context_tag::general": {
117
+ "precision": 0.9106933019976499,
118
+ "recall": 0.775,
119
+ "f1-score": 0.8373851971907077,
120
+ "support": 1000.0
121
+ },
122
+ "context_tag::health": {
123
+ "precision": 0.94921875,
124
+ "recall": 0.972,
125
+ "f1-score": 0.9604743083003953,
126
+ "support": 1000.0
127
+ },
128
+ "context_tag::social": {
129
+ "precision": 0.9605911330049262,
130
+ "recall": 0.975,
131
+ "f1-score": 0.967741935483871,
132
+ "support": 1000.0
133
+ },
134
+ "context_tag::tech": {
135
+ "precision": 0.9587020648967551,
136
+ "recall": 0.975,
137
+ "f1-score": 0.9667823500247893,
138
+ "support": 1000.0
139
+ },
140
+ "context_tag::travel": {
141
+ "precision": 0.9610136452241715,
142
+ "recall": 0.986,
143
+ "f1-score": 0.9733464955577492,
144
+ "support": 1000.0
145
+ },
146
+ "context_tag::work": {
147
+ "precision": 0.9461312438785504,
148
+ "recall": 0.966,
149
+ "f1-score": 0.9559623948540327,
150
+ "support": 1000.0
151
+ },
152
+ "accuracy": 0.947375,
153
+ "macro avg": {
154
+ "precision": 0.9466430268834052,
155
+ "recall": 0.947375,
156
+ "f1-score": 0.9462078544778264,
157
+ "support": 8000.0
158
+ },
159
+ "weighted avg": {
160
+ "precision": 0.9466430268834054,
161
+ "recall": 0.947375,
162
+ "f1-score": 0.9462078544778263,
163
+ "support": 8000.0
164
+ }
165
+ }
166
+ },
167
+ "per_task": {
168
+ "context_tag": {
169
+ "rows": 8000,
170
+ "accuracy": 0.947375,
171
+ "macro_f1": 0.9462078544778264,
172
+ "weighted_f1": 0.9462078544778263,
173
+ "micro_f1": 0.947375,
174
+ "labels": [
175
+ "finance",
176
+ "food",
177
+ "general",
178
+ "health",
179
+ "social",
180
+ "tech",
181
+ "travel",
182
+ "work"
183
+ ],
184
+ "confusion_matrix": [
185
+ [
186
+ 979,
187
+ 0,
188
+ 12,
189
+ 1,
190
+ 0,
191
+ 3,
192
+ 1,
193
+ 4
194
+ ],
195
+ [
196
+ 0,
197
+ 951,
198
+ 15,
199
+ 24,
200
+ 8,
201
+ 0,
202
+ 2,
203
+ 0
204
+ ],
205
+ [
206
+ 52,
207
+ 34,
208
+ 775,
209
+ 27,
210
+ 28,
211
+ 21,
212
+ 30,
213
+ 33
214
+ ],
215
+ [
216
+ 0,
217
+ 13,
218
+ 15,
219
+ 972,
220
+ 0,
221
+ 0,
222
+ 0,
223
+ 0
224
+ ],
225
+ [
226
+ 1,
227
+ 5,
228
+ 10,
229
+ 0,
230
+ 975,
231
+ 0,
232
+ 6,
233
+ 3
234
+ ],
235
+ [
236
+ 2,
237
+ 0,
238
+ 9,
239
+ 0,
240
+ 0,
241
+ 975,
242
+ 0,
243
+ 14
244
+ ],
245
+ [
246
+ 4,
247
+ 0,
248
+ 4,
249
+ 0,
250
+ 4,
251
+ 1,
252
+ 986,
253
+ 1
254
+ ],
255
+ [
256
+ 5,
257
+ 0,
258
+ 11,
259
+ 0,
260
+ 0,
261
+ 17,
262
+ 1,
263
+ 966
264
+ ]
265
+ ],
266
+ "wrong_task_predictions": 0,
267
+ "wrong_task_rate": 0.0
268
+ }
269
+ },
270
+ "calibration": {
271
+ "method": "temperature_grid_search",
272
+ "rows": 8000,
273
+ "temperature": 2.0,
274
+ "loss": 0.22631261527409646
275
+ }
276
+ }
context_tag_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b104b49ed2cbeaca3d4ed98f18d0586638ebb163bbdfeafb8f2a9c8a2cd1ec0
3
+ size 744958599
decay_profile_epoch_stats.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "decay_profile",
3
+ "epoch_stats": [
4
+ {
5
+ "boundary_index": 0,
6
+ "boundary": "very_fast|>fast"
7
+ },
8
+ {
9
+ "boundary_index": 1,
10
+ "boundary": "fast|>medium"
11
+ },
12
+ {
13
+ "boundary_index": 2,
14
+ "boundary": "medium|>slow"
15
+ },
16
+ {
17
+ "boundary_index": 3,
18
+ "boundary": "slow|>very_slow"
19
+ }
20
+ ],
21
+ "training_summary": {
22
+ "actual_epochs": 1,
23
+ "best_epoch": 1,
24
+ "early_stopped": false,
25
+ "boundary_count": 4
26
+ },
27
+ "boundaries": [
28
+ {
29
+ "boundary": "very_fast|>fast",
30
+ "index": 0,
31
+ "positive_rows": 32000,
32
+ "negative_rows": 8000,
33
+ "calibration": {
34
+ "method": "sigmoid",
35
+ "split": "eval",
36
+ "rows": 5000,
37
+ "pre_ece": 0.007230522978587395,
38
+ "post_ece": 0.00047552909827319745,
39
+ "pre_accuracy": 1.0,
40
+ "post_accuracy": 1.0,
41
+ "accuracy_delta": 0.0
42
+ },
43
+ "isotonic": {
44
+ "rows": 5000,
45
+ "pre_mean": 0.7999997007490585,
46
+ "post_mean": 0.8
47
+ }
48
+ },
49
+ {
50
+ "boundary": "fast|>medium",
51
+ "index": 1,
52
+ "positive_rows": 24000,
53
+ "negative_rows": 16000,
54
+ "calibration": {
55
+ "method": "sigmoid",
56
+ "split": "eval",
57
+ "rows": 5000,
58
+ "pre_ece": 0.009162517489532895,
59
+ "post_ece": 0.0005985034683911872,
60
+ "pre_accuracy": 1.0,
61
+ "post_accuracy": 1.0,
62
+ "accuracy_delta": 0.0
63
+ },
64
+ "isotonic": {
65
+ "rows": 5000,
66
+ "pre_mean": 0.5999999334705475,
67
+ "post_mean": 0.6
68
+ }
69
+ },
70
+ {
71
+ "boundary": "medium|>slow",
72
+ "index": 2,
73
+ "positive_rows": 16000,
74
+ "negative_rows": 24000,
75
+ "calibration": {
76
+ "method": "sigmoid",
77
+ "split": "eval",
78
+ "rows": 5000,
79
+ "pre_ece": 0.011862165538748193,
80
+ "post_ece": 0.0005116815113473949,
81
+ "pre_accuracy": 1.0,
82
+ "post_accuracy": 1.0,
83
+ "accuracy_delta": 0.0
84
+ },
85
+ "isotonic": {
86
+ "rows": 5000,
87
+ "pre_mean": 0.4000000665555369,
88
+ "post_mean": 0.4
89
+ }
90
+ },
91
+ {
92
+ "boundary": "slow|>very_slow",
93
+ "index": 3,
94
+ "positive_rows": 8000,
95
+ "negative_rows": 32000,
96
+ "calibration": {
97
+ "method": "sigmoid",
98
+ "split": "eval",
99
+ "rows": 5000,
100
+ "pre_ece": 0.007803197222763126,
101
+ "post_ece": 0.0005485122608633874,
102
+ "pre_accuracy": 1.0,
103
+ "post_accuracy": 1.0,
104
+ "accuracy_delta": 0.0
105
+ },
106
+ "isotonic": {
107
+ "rows": 5000,
108
+ "pre_mean": 0.20000029925930188,
109
+ "post_mean": 0.2
110
+ }
111
+ }
112
+ ]
113
+ }
decay_profile_metrics_eval.json ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 5000,
6
+ "accuracy": 1.0,
7
+ "macro_f1": 1.0,
8
+ "weighted_f1": 1.0,
9
+ "micro_f1": 1.0,
10
+ "labels": [
11
+ "very_fast",
12
+ "fast",
13
+ "medium",
14
+ "slow",
15
+ "very_slow"
16
+ ],
17
+ "confusion_matrix": [
18
+ [
19
+ 1000,
20
+ 0,
21
+ 0,
22
+ 0,
23
+ 0
24
+ ],
25
+ [
26
+ 0,
27
+ 1000,
28
+ 0,
29
+ 0,
30
+ 0
31
+ ],
32
+ [
33
+ 0,
34
+ 0,
35
+ 1000,
36
+ 0,
37
+ 0
38
+ ],
39
+ [
40
+ 0,
41
+ 0,
42
+ 0,
43
+ 1000,
44
+ 0
45
+ ],
46
+ [
47
+ 0,
48
+ 0,
49
+ 0,
50
+ 0,
51
+ 1000
52
+ ]
53
+ ],
54
+ "calibration_error": 0.0,
55
+ "classification_report": {
56
+ "decay_profile::fast": {
57
+ "precision": 1.0,
58
+ "recall": 1.0,
59
+ "f1-score": 1.0,
60
+ "support": 1000.0
61
+ },
62
+ "decay_profile::medium": {
63
+ "precision": 1.0,
64
+ "recall": 1.0,
65
+ "f1-score": 1.0,
66
+ "support": 1000.0
67
+ },
68
+ "decay_profile::slow": {
69
+ "precision": 1.0,
70
+ "recall": 1.0,
71
+ "f1-score": 1.0,
72
+ "support": 1000.0
73
+ },
74
+ "decay_profile::very_fast": {
75
+ "precision": 1.0,
76
+ "recall": 1.0,
77
+ "f1-score": 1.0,
78
+ "support": 1000.0
79
+ },
80
+ "decay_profile::very_slow": {
81
+ "precision": 1.0,
82
+ "recall": 1.0,
83
+ "f1-score": 1.0,
84
+ "support": 1000.0
85
+ },
86
+ "accuracy": 1.0,
87
+ "macro avg": {
88
+ "precision": 1.0,
89
+ "recall": 1.0,
90
+ "f1-score": 1.0,
91
+ "support": 5000.0
92
+ },
93
+ "weighted avg": {
94
+ "precision": 1.0,
95
+ "recall": 1.0,
96
+ "f1-score": 1.0,
97
+ "support": 5000.0
98
+ }
99
+ },
100
+ "ordinal_mae": 0.0,
101
+ "off_by_two_rate": 0.0
102
+ },
103
+ "per_task": {
104
+ "decay_profile": {
105
+ "rows": 5000,
106
+ "accuracy": 1.0,
107
+ "macro_f1": 1.0,
108
+ "weighted_f1": 1.0,
109
+ "micro_f1": 1.0,
110
+ "labels": [
111
+ "fast",
112
+ "medium",
113
+ "slow",
114
+ "very_fast",
115
+ "very_slow"
116
+ ],
117
+ "confusion_matrix": [
118
+ [
119
+ 1000,
120
+ 0,
121
+ 0,
122
+ 0,
123
+ 0
124
+ ],
125
+ [
126
+ 0,
127
+ 1000,
128
+ 0,
129
+ 0,
130
+ 0
131
+ ],
132
+ [
133
+ 0,
134
+ 0,
135
+ 1000,
136
+ 0,
137
+ 0
138
+ ],
139
+ [
140
+ 0,
141
+ 0,
142
+ 0,
143
+ 1000,
144
+ 0
145
+ ],
146
+ [
147
+ 0,
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 1000
152
+ ]
153
+ ],
154
+ "wrong_task_predictions": 0,
155
+ "wrong_task_rate": 0.0
156
+ }
157
+ }
158
+ }
decay_profile_metrics_test.json ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 5000,
6
+ "accuracy": 1.0,
7
+ "macro_f1": 1.0,
8
+ "weighted_f1": 1.0,
9
+ "micro_f1": 1.0,
10
+ "labels": [
11
+ "very_fast",
12
+ "fast",
13
+ "medium",
14
+ "slow",
15
+ "very_slow"
16
+ ],
17
+ "confusion_matrix": [
18
+ [
19
+ 1000,
20
+ 0,
21
+ 0,
22
+ 0,
23
+ 0
24
+ ],
25
+ [
26
+ 0,
27
+ 1000,
28
+ 0,
29
+ 0,
30
+ 0
31
+ ],
32
+ [
33
+ 0,
34
+ 0,
35
+ 1000,
36
+ 0,
37
+ 0
38
+ ],
39
+ [
40
+ 0,
41
+ 0,
42
+ 0,
43
+ 1000,
44
+ 0
45
+ ],
46
+ [
47
+ 0,
48
+ 0,
49
+ 0,
50
+ 0,
51
+ 1000
52
+ ]
53
+ ],
54
+ "calibration_error": 0.0,
55
+ "classification_report": {
56
+ "decay_profile::fast": {
57
+ "precision": 1.0,
58
+ "recall": 1.0,
59
+ "f1-score": 1.0,
60
+ "support": 1000.0
61
+ },
62
+ "decay_profile::medium": {
63
+ "precision": 1.0,
64
+ "recall": 1.0,
65
+ "f1-score": 1.0,
66
+ "support": 1000.0
67
+ },
68
+ "decay_profile::slow": {
69
+ "precision": 1.0,
70
+ "recall": 1.0,
71
+ "f1-score": 1.0,
72
+ "support": 1000.0
73
+ },
74
+ "decay_profile::very_fast": {
75
+ "precision": 1.0,
76
+ "recall": 1.0,
77
+ "f1-score": 1.0,
78
+ "support": 1000.0
79
+ },
80
+ "decay_profile::very_slow": {
81
+ "precision": 1.0,
82
+ "recall": 1.0,
83
+ "f1-score": 1.0,
84
+ "support": 1000.0
85
+ },
86
+ "accuracy": 1.0,
87
+ "macro avg": {
88
+ "precision": 1.0,
89
+ "recall": 1.0,
90
+ "f1-score": 1.0,
91
+ "support": 5000.0
92
+ },
93
+ "weighted avg": {
94
+ "precision": 1.0,
95
+ "recall": 1.0,
96
+ "f1-score": 1.0,
97
+ "support": 5000.0
98
+ }
99
+ },
100
+ "ordinal_mae": 0.0,
101
+ "off_by_two_rate": 0.0
102
+ },
103
+ "per_task": {
104
+ "decay_profile": {
105
+ "rows": 5000,
106
+ "accuracy": 1.0,
107
+ "macro_f1": 1.0,
108
+ "weighted_f1": 1.0,
109
+ "micro_f1": 1.0,
110
+ "labels": [
111
+ "fast",
112
+ "medium",
113
+ "slow",
114
+ "very_fast",
115
+ "very_slow"
116
+ ],
117
+ "confusion_matrix": [
118
+ [
119
+ 1000,
120
+ 0,
121
+ 0,
122
+ 0,
123
+ 0
124
+ ],
125
+ [
126
+ 0,
127
+ 1000,
128
+ 0,
129
+ 0,
130
+ 0
131
+ ],
132
+ [
133
+ 0,
134
+ 0,
135
+ 1000,
136
+ 0,
137
+ 0
138
+ ],
139
+ [
140
+ 0,
141
+ 0,
142
+ 0,
143
+ 1000,
144
+ 0
145
+ ],
146
+ [
147
+ 0,
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 1000
152
+ ]
153
+ ],
154
+ "wrong_task_predictions": 0,
155
+ "wrong_task_rate": 0.0
156
+ }
157
+ }
158
+ }
decay_profile_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd2c138214adbef0c88b088af45620eec9f86a3c16bcc2e2d4bae312ac1df64
3
+ size 2087014
extractor_epoch_stats.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "epoch_stats": [
4
+ {
5
+ "epoch": 1,
6
+ "train_loss": 0.07304321517285889,
7
+ "train_accuracy": 0.9985243055555556,
8
+ "train_macro_f1": 0.9996205356573068,
9
+ "train_weighted_f1": 0.9985243053339707,
10
+ "valid_loss": 0.07040478728784708,
11
+ "valid_accuracy": 0.9978333333333333,
12
+ "valid_macro_f1": 0.9994428567862855,
13
+ "valid_weighted_f1": 0.9978333319466658,
14
+ "monitor_metric": "macro_f1",
15
+ "monitor_value": 0.9994428567862855,
16
+ "improved": true
17
+ },
18
+ {
19
+ "epoch": 2,
20
+ "train_loss": 0.07098693556871623,
21
+ "train_accuracy": 0.9985138888888889,
22
+ "train_macro_f1": 0.9996178570566361,
23
+ "train_weighted_f1": 0.998513888553585,
24
+ "valid_loss": 0.06895428509737356,
25
+ "valid_accuracy": 0.9979166666666667,
26
+ "valid_macro_f1": 0.9994642852803567,
27
+ "valid_weighted_f1": 0.9979166649791653,
28
+ "monitor_metric": "macro_f1",
29
+ "monitor_value": 0.9994642852803567,
30
+ "improved": false
31
+ },
32
+ {
33
+ "epoch": 3,
34
+ "train_loss": 0.06992293075722475,
35
+ "train_accuracy": 0.9984930555555556,
36
+ "train_macro_f1": 0.9996124998827812,
37
+ "train_weighted_f1": 0.9984930550997047,
38
+ "valid_loss": 0.06817925236885357,
39
+ "valid_accuracy": 0.9979166666666667,
40
+ "valid_macro_f1": 0.9994642852803567,
41
+ "valid_weighted_f1": 0.9979166649791653,
42
+ "monitor_metric": "macro_f1",
43
+ "monitor_value": 0.9994642852803567,
44
+ "improved": false
45
+ },
46
+ {
47
+ "epoch": 4,
48
+ "train_loss": 0.0692276576162906,
49
+ "train_accuracy": 0.9984861111111111,
50
+ "train_macro_f1": 0.9996107141570066,
51
+ "train_weighted_f1": 0.9984861106105815,
52
+ "valid_loss": 0.06766472301272425,
53
+ "valid_accuracy": 0.9979166666666667,
54
+ "valid_macro_f1": 0.9994642852803567,
55
+ "valid_weighted_f1": 0.9979166649791653,
56
+ "monitor_metric": "macro_f1",
57
+ "monitor_value": 0.9994642852803567,
58
+ "improved": false
59
+ }
60
+ ]
61
+ }
extractor_label_map.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "label_to_id": {
3
+ "constraint_scope::finance": 0,
4
+ "constraint_scope::food": 1,
5
+ "constraint_scope::general": 2,
6
+ "constraint_scope::health": 3,
7
+ "constraint_scope::none": 4,
8
+ "constraint_scope::social": 5,
9
+ "constraint_scope::tech": 6,
10
+ "constraint_scope::travel": 7,
11
+ "constraint_scope::work": 8,
12
+ "constraint_stability::semi_stable": 9,
13
+ "constraint_stability::stable": 10,
14
+ "constraint_stability::volatile": 11,
15
+ "constraint_type::causal": 12,
16
+ "constraint_type::constraint_other": 13,
17
+ "constraint_type::goal": 14,
18
+ "constraint_type::none": 15,
19
+ "constraint_type::policy": 16,
20
+ "constraint_type::preference": 17,
21
+ "constraint_type::state": 18,
22
+ "constraint_type::value": 19,
23
+ "fact_type::identity": 20,
24
+ "fact_type::location": 21,
25
+ "fact_type::none": 22,
26
+ "fact_type::occupation": 23,
27
+ "fact_type::other_fact": 24,
28
+ "fact_type::preference": 25,
29
+ "pii_presence::no_pii": 26,
30
+ "pii_presence::pii": 27
31
+ },
32
+ "id_to_label": {
33
+ "0": "constraint_scope::finance",
34
+ "1": "constraint_scope::food",
35
+ "2": "constraint_scope::general",
36
+ "3": "constraint_scope::health",
37
+ "4": "constraint_scope::none",
38
+ "5": "constraint_scope::social",
39
+ "6": "constraint_scope::tech",
40
+ "7": "constraint_scope::travel",
41
+ "8": "constraint_scope::work",
42
+ "9": "constraint_stability::semi_stable",
43
+ "10": "constraint_stability::stable",
44
+ "11": "constraint_stability::volatile",
45
+ "12": "constraint_type::causal",
46
+ "13": "constraint_type::constraint_other",
47
+ "14": "constraint_type::goal",
48
+ "15": "constraint_type::none",
49
+ "16": "constraint_type::policy",
50
+ "17": "constraint_type::preference",
51
+ "18": "constraint_type::state",
52
+ "19": "constraint_type::value",
53
+ "20": "fact_type::identity",
54
+ "21": "fact_type::location",
55
+ "22": "fact_type::none",
56
+ "23": "fact_type::occupation",
57
+ "24": "fact_type::other_fact",
58
+ "25": "fact_type::preference",
59
+ "26": "pii_presence::no_pii",
60
+ "27": "pii_presence::pii"
61
+ }
62
+ }
extractor_metrics_eval.json ADDED
@@ -0,0 +1,1464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 36000,
6
+ "accuracy": 0.9981666666666666,
7
+ "macro_f1": 0.9995285691468461,
8
+ "weighted_f1": 0.9981666577932904,
9
+ "micro_f1": 0.9981666666666666,
10
+ "labels": [
11
+ "constraint_scope::finance",
12
+ "constraint_scope::food",
13
+ "constraint_scope::general",
14
+ "constraint_scope::health",
15
+ "constraint_scope::none",
16
+ "constraint_scope::social",
17
+ "constraint_scope::tech",
18
+ "constraint_scope::travel",
19
+ "constraint_scope::work",
20
+ "constraint_stability::semi_stable",
21
+ "constraint_stability::stable",
22
+ "constraint_stability::volatile",
23
+ "constraint_type::causal",
24
+ "constraint_type::constraint_other",
25
+ "constraint_type::goal",
26
+ "constraint_type::none",
27
+ "constraint_type::policy",
28
+ "constraint_type::preference",
29
+ "constraint_type::state",
30
+ "constraint_type::value",
31
+ "fact_type::identity",
32
+ "fact_type::location",
33
+ "fact_type::none",
34
+ "fact_type::occupation",
35
+ "fact_type::other_fact",
36
+ "fact_type::preference",
37
+ "pii_presence::no_pii",
38
+ "pii_presence::pii"
39
+ ],
40
+ "confusion_matrix": [
41
+ [
42
+ 1000,
43
+ 0,
44
+ 0,
45
+ 0,
46
+ 0,
47
+ 0,
48
+ 0,
49
+ 0,
50
+ 0,
51
+ 0,
52
+ 0,
53
+ 0,
54
+ 0,
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0,
59
+ 0,
60
+ 0,
61
+ 0,
62
+ 0,
63
+ 0,
64
+ 0,
65
+ 0,
66
+ 0,
67
+ 0,
68
+ 0,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 1000,
74
+ 0,
75
+ 0,
76
+ 0,
77
+ 0,
78
+ 0,
79
+ 0,
80
+ 0,
81
+ 0,
82
+ 0,
83
+ 0,
84
+ 0,
85
+ 0,
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 0,
90
+ 0,
91
+ 0,
92
+ 0,
93
+ 0,
94
+ 0,
95
+ 0,
96
+ 0,
97
+ 0,
98
+ 0,
99
+ 0
100
+ ],
101
+ [
102
+ 0,
103
+ 0,
104
+ 1000,
105
+ 0,
106
+ 0,
107
+ 0,
108
+ 0,
109
+ 0,
110
+ 0,
111
+ 0,
112
+ 0,
113
+ 0,
114
+ 0,
115
+ 0,
116
+ 0,
117
+ 0,
118
+ 0,
119
+ 0,
120
+ 0,
121
+ 0,
122
+ 0,
123
+ 0,
124
+ 0,
125
+ 0,
126
+ 0,
127
+ 0,
128
+ 0,
129
+ 0
130
+ ],
131
+ [
132
+ 0,
133
+ 0,
134
+ 0,
135
+ 1000,
136
+ 0,
137
+ 0,
138
+ 0,
139
+ 0,
140
+ 0,
141
+ 0,
142
+ 0,
143
+ 0,
144
+ 0,
145
+ 0,
146
+ 0,
147
+ 0,
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 0,
152
+ 0,
153
+ 0,
154
+ 0,
155
+ 0,
156
+ 0,
157
+ 0,
158
+ 0,
159
+ 0
160
+ ],
161
+ [
162
+ 0,
163
+ 0,
164
+ 0,
165
+ 0,
166
+ 1000,
167
+ 0,
168
+ 0,
169
+ 0,
170
+ 0,
171
+ 0,
172
+ 0,
173
+ 0,
174
+ 0,
175
+ 0,
176
+ 0,
177
+ 0,
178
+ 0,
179
+ 0,
180
+ 0,
181
+ 0,
182
+ 0,
183
+ 0,
184
+ 0,
185
+ 0,
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 0
190
+ ],
191
+ [
192
+ 0,
193
+ 0,
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 1000,
198
+ 0,
199
+ 0,
200
+ 0,
201
+ 0,
202
+ 0,
203
+ 0,
204
+ 0,
205
+ 0,
206
+ 0,
207
+ 0,
208
+ 0,
209
+ 0,
210
+ 0,
211
+ 0,
212
+ 0,
213
+ 0,
214
+ 0,
215
+ 0,
216
+ 0,
217
+ 0,
218
+ 0,
219
+ 0
220
+ ],
221
+ [
222
+ 0,
223
+ 0,
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 0,
228
+ 1000,
229
+ 0,
230
+ 0,
231
+ 0,
232
+ 0,
233
+ 0,
234
+ 0,
235
+ 0,
236
+ 0,
237
+ 0,
238
+ 0,
239
+ 0,
240
+ 0,
241
+ 0,
242
+ 0,
243
+ 0,
244
+ 0,
245
+ 0,
246
+ 0,
247
+ 0,
248
+ 0,
249
+ 0
250
+ ],
251
+ [
252
+ 0,
253
+ 0,
254
+ 0,
255
+ 0,
256
+ 0,
257
+ 0,
258
+ 0,
259
+ 1000,
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 0,
266
+ 0,
267
+ 0,
268
+ 0,
269
+ 0,
270
+ 0,
271
+ 0,
272
+ 0,
273
+ 0,
274
+ 0,
275
+ 0,
276
+ 0,
277
+ 0,
278
+ 0,
279
+ 0
280
+ ],
281
+ [
282
+ 0,
283
+ 0,
284
+ 0,
285
+ 0,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 0,
290
+ 1000,
291
+ 0,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ 0,
296
+ 0,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0,
303
+ 0,
304
+ 0,
305
+ 0,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 0
310
+ ],
311
+ [
312
+ 0,
313
+ 0,
314
+ 0,
315
+ 0,
316
+ 0,
317
+ 0,
318
+ 0,
319
+ 0,
320
+ 0,
321
+ 1000,
322
+ 0,
323
+ 0,
324
+ 0,
325
+ 0,
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 0,
330
+ 0,
331
+ 0,
332
+ 0,
333
+ 0,
334
+ 0,
335
+ 0,
336
+ 0,
337
+ 0,
338
+ 0,
339
+ 0
340
+ ],
341
+ [
342
+ 0,
343
+ 0,
344
+ 0,
345
+ 0,
346
+ 0,
347
+ 0,
348
+ 0,
349
+ 0,
350
+ 0,
351
+ 0,
352
+ 1000,
353
+ 0,
354
+ 0,
355
+ 0,
356
+ 0,
357
+ 0,
358
+ 0,
359
+ 0,
360
+ 0,
361
+ 0,
362
+ 0,
363
+ 0,
364
+ 0,
365
+ 0,
366
+ 0,
367
+ 0,
368
+ 0,
369
+ 0
370
+ ],
371
+ [
372
+ 0,
373
+ 0,
374
+ 0,
375
+ 0,
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 0,
380
+ 0,
381
+ 0,
382
+ 0,
383
+ 1000,
384
+ 0,
385
+ 0,
386
+ 0,
387
+ 0,
388
+ 0,
389
+ 0,
390
+ 0,
391
+ 0,
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 0,
396
+ 0,
397
+ 0,
398
+ 0,
399
+ 0
400
+ ],
401
+ [
402
+ 0,
403
+ 0,
404
+ 0,
405
+ 0,
406
+ 0,
407
+ 0,
408
+ 0,
409
+ 0,
410
+ 0,
411
+ 0,
412
+ 0,
413
+ 0,
414
+ 1000,
415
+ 0,
416
+ 0,
417
+ 0,
418
+ 0,
419
+ 0,
420
+ 0,
421
+ 0,
422
+ 0,
423
+ 0,
424
+ 0,
425
+ 0,
426
+ 0,
427
+ 0,
428
+ 0,
429
+ 0
430
+ ],
431
+ [
432
+ 0,
433
+ 0,
434
+ 0,
435
+ 0,
436
+ 0,
437
+ 0,
438
+ 0,
439
+ 0,
440
+ 0,
441
+ 0,
442
+ 0,
443
+ 0,
444
+ 0,
445
+ 1000,
446
+ 0,
447
+ 0,
448
+ 0,
449
+ 0,
450
+ 0,
451
+ 0,
452
+ 0,
453
+ 0,
454
+ 0,
455
+ 0,
456
+ 0,
457
+ 0,
458
+ 0,
459
+ 0
460
+ ],
461
+ [
462
+ 0,
463
+ 0,
464
+ 0,
465
+ 0,
466
+ 0,
467
+ 0,
468
+ 0,
469
+ 0,
470
+ 0,
471
+ 0,
472
+ 0,
473
+ 0,
474
+ 0,
475
+ 0,
476
+ 1000,
477
+ 0,
478
+ 0,
479
+ 0,
480
+ 0,
481
+ 0,
482
+ 0,
483
+ 0,
484
+ 0,
485
+ 0,
486
+ 0,
487
+ 0,
488
+ 0,
489
+ 0
490
+ ],
491
+ [
492
+ 0,
493
+ 0,
494
+ 0,
495
+ 0,
496
+ 0,
497
+ 0,
498
+ 0,
499
+ 0,
500
+ 0,
501
+ 0,
502
+ 0,
503
+ 0,
504
+ 0,
505
+ 0,
506
+ 0,
507
+ 1000,
508
+ 0,
509
+ 0,
510
+ 0,
511
+ 0,
512
+ 0,
513
+ 0,
514
+ 0,
515
+ 0,
516
+ 0,
517
+ 0,
518
+ 0,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 0,
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 0,
528
+ 0,
529
+ 0,
530
+ 0,
531
+ 0,
532
+ 0,
533
+ 0,
534
+ 0,
535
+ 0,
536
+ 0,
537
+ 0,
538
+ 1000,
539
+ 0,
540
+ 0,
541
+ 0,
542
+ 0,
543
+ 0,
544
+ 0,
545
+ 0,
546
+ 0,
547
+ 0,
548
+ 0,
549
+ 0
550
+ ],
551
+ [
552
+ 0,
553
+ 0,
554
+ 0,
555
+ 0,
556
+ 0,
557
+ 0,
558
+ 0,
559
+ 0,
560
+ 0,
561
+ 0,
562
+ 0,
563
+ 0,
564
+ 0,
565
+ 0,
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 1000,
570
+ 0,
571
+ 0,
572
+ 0,
573
+ 0,
574
+ 0,
575
+ 0,
576
+ 0,
577
+ 0,
578
+ 0,
579
+ 0
580
+ ],
581
+ [
582
+ 0,
583
+ 0,
584
+ 0,
585
+ 0,
586
+ 0,
587
+ 0,
588
+ 0,
589
+ 0,
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 0,
594
+ 0,
595
+ 0,
596
+ 0,
597
+ 0,
598
+ 0,
599
+ 0,
600
+ 1000,
601
+ 0,
602
+ 0,
603
+ 0,
604
+ 0,
605
+ 0,
606
+ 0,
607
+ 0,
608
+ 0,
609
+ 0
610
+ ],
611
+ [
612
+ 0,
613
+ 0,
614
+ 0,
615
+ 0,
616
+ 0,
617
+ 0,
618
+ 0,
619
+ 0,
620
+ 0,
621
+ 0,
622
+ 0,
623
+ 0,
624
+ 0,
625
+ 0,
626
+ 0,
627
+ 0,
628
+ 0,
629
+ 0,
630
+ 0,
631
+ 1000,
632
+ 0,
633
+ 0,
634
+ 0,
635
+ 0,
636
+ 0,
637
+ 0,
638
+ 0,
639
+ 0
640
+ ],
641
+ [
642
+ 0,
643
+ 0,
644
+ 0,
645
+ 0,
646
+ 0,
647
+ 0,
648
+ 0,
649
+ 0,
650
+ 0,
651
+ 0,
652
+ 0,
653
+ 0,
654
+ 0,
655
+ 0,
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 0,
660
+ 0,
661
+ 0,
662
+ 1000,
663
+ 0,
664
+ 0,
665
+ 0,
666
+ 0,
667
+ 0,
668
+ 0,
669
+ 0
670
+ ],
671
+ [
672
+ 0,
673
+ 0,
674
+ 0,
675
+ 0,
676
+ 0,
677
+ 0,
678
+ 0,
679
+ 0,
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 0,
684
+ 0,
685
+ 0,
686
+ 0,
687
+ 0,
688
+ 0,
689
+ 0,
690
+ 0,
691
+ 0,
692
+ 0,
693
+ 1000,
694
+ 0,
695
+ 0,
696
+ 0,
697
+ 0,
698
+ 0,
699
+ 0
700
+ ],
701
+ [
702
+ 0,
703
+ 0,
704
+ 0,
705
+ 0,
706
+ 0,
707
+ 0,
708
+ 0,
709
+ 0,
710
+ 0,
711
+ 0,
712
+ 0,
713
+ 0,
714
+ 0,
715
+ 0,
716
+ 0,
717
+ 0,
718
+ 0,
719
+ 0,
720
+ 0,
721
+ 0,
722
+ 0,
723
+ 0,
724
+ 1000,
725
+ 0,
726
+ 0,
727
+ 0,
728
+ 0,
729
+ 0
730
+ ],
731
+ [
732
+ 0,
733
+ 0,
734
+ 0,
735
+ 0,
736
+ 0,
737
+ 0,
738
+ 0,
739
+ 0,
740
+ 0,
741
+ 0,
742
+ 0,
743
+ 0,
744
+ 0,
745
+ 0,
746
+ 0,
747
+ 0,
748
+ 0,
749
+ 0,
750
+ 0,
751
+ 0,
752
+ 0,
753
+ 0,
754
+ 0,
755
+ 1000,
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 0
760
+ ],
761
+ [
762
+ 0,
763
+ 0,
764
+ 0,
765
+ 0,
766
+ 0,
767
+ 0,
768
+ 0,
769
+ 0,
770
+ 0,
771
+ 0,
772
+ 0,
773
+ 0,
774
+ 0,
775
+ 0,
776
+ 0,
777
+ 0,
778
+ 0,
779
+ 0,
780
+ 0,
781
+ 0,
782
+ 0,
783
+ 0,
784
+ 0,
785
+ 0,
786
+ 1000,
787
+ 0,
788
+ 0,
789
+ 0
790
+ ],
791
+ [
792
+ 0,
793
+ 0,
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 0,
798
+ 0,
799
+ 0,
800
+ 0,
801
+ 0,
802
+ 0,
803
+ 0,
804
+ 0,
805
+ 0,
806
+ 0,
807
+ 0,
808
+ 0,
809
+ 0,
810
+ 0,
811
+ 0,
812
+ 0,
813
+ 0,
814
+ 0,
815
+ 0,
816
+ 0,
817
+ 1000,
818
+ 0,
819
+ 0
820
+ ],
821
+ [
822
+ 0,
823
+ 0,
824
+ 0,
825
+ 0,
826
+ 0,
827
+ 0,
828
+ 0,
829
+ 0,
830
+ 0,
831
+ 0,
832
+ 0,
833
+ 0,
834
+ 0,
835
+ 0,
836
+ 0,
837
+ 0,
838
+ 0,
839
+ 0,
840
+ 0,
841
+ 0,
842
+ 0,
843
+ 0,
844
+ 0,
845
+ 0,
846
+ 0,
847
+ 0,
848
+ 4978,
849
+ 22
850
+ ],
851
+ [
852
+ 0,
853
+ 0,
854
+ 0,
855
+ 0,
856
+ 0,
857
+ 0,
858
+ 0,
859
+ 0,
860
+ 0,
861
+ 0,
862
+ 0,
863
+ 0,
864
+ 0,
865
+ 0,
866
+ 0,
867
+ 0,
868
+ 0,
869
+ 0,
870
+ 0,
871
+ 0,
872
+ 0,
873
+ 0,
874
+ 0,
875
+ 0,
876
+ 0,
877
+ 0,
878
+ 44,
879
+ 4956
880
+ ]
881
+ ],
882
+ "calibration_error": 0.0007514102792564575,
883
+ "classification_report": {
884
+ "constraint_scope::finance": {
885
+ "precision": 1.0,
886
+ "recall": 1.0,
887
+ "f1-score": 1.0,
888
+ "support": 1000.0
889
+ },
890
+ "constraint_scope::food": {
891
+ "precision": 1.0,
892
+ "recall": 1.0,
893
+ "f1-score": 1.0,
894
+ "support": 1000.0
895
+ },
896
+ "constraint_scope::general": {
897
+ "precision": 1.0,
898
+ "recall": 1.0,
899
+ "f1-score": 1.0,
900
+ "support": 1000.0
901
+ },
902
+ "constraint_scope::health": {
903
+ "precision": 1.0,
904
+ "recall": 1.0,
905
+ "f1-score": 1.0,
906
+ "support": 1000.0
907
+ },
908
+ "constraint_scope::none": {
909
+ "precision": 1.0,
910
+ "recall": 1.0,
911
+ "f1-score": 1.0,
912
+ "support": 1000.0
913
+ },
914
+ "constraint_scope::social": {
915
+ "precision": 1.0,
916
+ "recall": 1.0,
917
+ "f1-score": 1.0,
918
+ "support": 1000.0
919
+ },
920
+ "constraint_scope::tech": {
921
+ "precision": 1.0,
922
+ "recall": 1.0,
923
+ "f1-score": 1.0,
924
+ "support": 1000.0
925
+ },
926
+ "constraint_scope::travel": {
927
+ "precision": 1.0,
928
+ "recall": 1.0,
929
+ "f1-score": 1.0,
930
+ "support": 1000.0
931
+ },
932
+ "constraint_scope::work": {
933
+ "precision": 1.0,
934
+ "recall": 1.0,
935
+ "f1-score": 1.0,
936
+ "support": 1000.0
937
+ },
938
+ "constraint_stability::semi_stable": {
939
+ "precision": 1.0,
940
+ "recall": 1.0,
941
+ "f1-score": 1.0,
942
+ "support": 1000.0
943
+ },
944
+ "constraint_stability::stable": {
945
+ "precision": 1.0,
946
+ "recall": 1.0,
947
+ "f1-score": 1.0,
948
+ "support": 1000.0
949
+ },
950
+ "constraint_stability::volatile": {
951
+ "precision": 1.0,
952
+ "recall": 1.0,
953
+ "f1-score": 1.0,
954
+ "support": 1000.0
955
+ },
956
+ "constraint_type::causal": {
957
+ "precision": 1.0,
958
+ "recall": 1.0,
959
+ "f1-score": 1.0,
960
+ "support": 1000.0
961
+ },
962
+ "constraint_type::constraint_other": {
963
+ "precision": 1.0,
964
+ "recall": 1.0,
965
+ "f1-score": 1.0,
966
+ "support": 1000.0
967
+ },
968
+ "constraint_type::goal": {
969
+ "precision": 1.0,
970
+ "recall": 1.0,
971
+ "f1-score": 1.0,
972
+ "support": 1000.0
973
+ },
974
+ "constraint_type::none": {
975
+ "precision": 1.0,
976
+ "recall": 1.0,
977
+ "f1-score": 1.0,
978
+ "support": 1000.0
979
+ },
980
+ "constraint_type::policy": {
981
+ "precision": 1.0,
982
+ "recall": 1.0,
983
+ "f1-score": 1.0,
984
+ "support": 1000.0
985
+ },
986
+ "constraint_type::preference": {
987
+ "precision": 1.0,
988
+ "recall": 1.0,
989
+ "f1-score": 1.0,
990
+ "support": 1000.0
991
+ },
992
+ "constraint_type::state": {
993
+ "precision": 1.0,
994
+ "recall": 1.0,
995
+ "f1-score": 1.0,
996
+ "support": 1000.0
997
+ },
998
+ "constraint_type::value": {
999
+ "precision": 1.0,
1000
+ "recall": 1.0,
1001
+ "f1-score": 1.0,
1002
+ "support": 1000.0
1003
+ },
1004
+ "fact_type::identity": {
1005
+ "precision": 1.0,
1006
+ "recall": 1.0,
1007
+ "f1-score": 1.0,
1008
+ "support": 1000.0
1009
+ },
1010
+ "fact_type::location": {
1011
+ "precision": 1.0,
1012
+ "recall": 1.0,
1013
+ "f1-score": 1.0,
1014
+ "support": 1000.0
1015
+ },
1016
+ "fact_type::none": {
1017
+ "precision": 1.0,
1018
+ "recall": 1.0,
1019
+ "f1-score": 1.0,
1020
+ "support": 1000.0
1021
+ },
1022
+ "fact_type::occupation": {
1023
+ "precision": 1.0,
1024
+ "recall": 1.0,
1025
+ "f1-score": 1.0,
1026
+ "support": 1000.0
1027
+ },
1028
+ "fact_type::other_fact": {
1029
+ "precision": 1.0,
1030
+ "recall": 1.0,
1031
+ "f1-score": 1.0,
1032
+ "support": 1000.0
1033
+ },
1034
+ "fact_type::preference": {
1035
+ "precision": 1.0,
1036
+ "recall": 1.0,
1037
+ "f1-score": 1.0,
1038
+ "support": 1000.0
1039
+ },
1040
+ "pii_presence::no_pii": {
1041
+ "precision": 0.9912385503783353,
1042
+ "recall": 0.9956,
1043
+ "f1-score": 0.9934144881261225,
1044
+ "support": 5000.0
1045
+ },
1046
+ "pii_presence::pii": {
1047
+ "precision": 0.995580554439534,
1048
+ "recall": 0.9912,
1049
+ "f1-score": 0.9933854479855683,
1050
+ "support": 5000.0
1051
+ },
1052
+ "accuracy": 0.9981666666666666,
1053
+ "macro avg": {
1054
+ "precision": 0.9995292537434953,
1055
+ "recall": 0.9995285714285714,
1056
+ "f1-score": 0.9995285691468461,
1057
+ "support": 36000.0
1058
+ },
1059
+ "weighted avg": {
1060
+ "precision": 0.9981693201135929,
1061
+ "recall": 0.9981666666666666,
1062
+ "f1-score": 0.9981666577932904,
1063
+ "support": 36000.0
1064
+ }
1065
+ }
1066
+ },
1067
+ "per_task": {
1068
+ "constraint_scope": {
1069
+ "rows": 9000,
1070
+ "accuracy": 1.0,
1071
+ "macro_f1": 1.0,
1072
+ "weighted_f1": 1.0,
1073
+ "micro_f1": 1.0,
1074
+ "labels": [
1075
+ "finance",
1076
+ "food",
1077
+ "general",
1078
+ "health",
1079
+ "none",
1080
+ "social",
1081
+ "tech",
1082
+ "travel",
1083
+ "work"
1084
+ ],
1085
+ "confusion_matrix": [
1086
+ [
1087
+ 1000,
1088
+ 0,
1089
+ 0,
1090
+ 0,
1091
+ 0,
1092
+ 0,
1093
+ 0,
1094
+ 0,
1095
+ 0
1096
+ ],
1097
+ [
1098
+ 0,
1099
+ 1000,
1100
+ 0,
1101
+ 0,
1102
+ 0,
1103
+ 0,
1104
+ 0,
1105
+ 0,
1106
+ 0
1107
+ ],
1108
+ [
1109
+ 0,
1110
+ 0,
1111
+ 1000,
1112
+ 0,
1113
+ 0,
1114
+ 0,
1115
+ 0,
1116
+ 0,
1117
+ 0
1118
+ ],
1119
+ [
1120
+ 0,
1121
+ 0,
1122
+ 0,
1123
+ 1000,
1124
+ 0,
1125
+ 0,
1126
+ 0,
1127
+ 0,
1128
+ 0
1129
+ ],
1130
+ [
1131
+ 0,
1132
+ 0,
1133
+ 0,
1134
+ 0,
1135
+ 1000,
1136
+ 0,
1137
+ 0,
1138
+ 0,
1139
+ 0
1140
+ ],
1141
+ [
1142
+ 0,
1143
+ 0,
1144
+ 0,
1145
+ 0,
1146
+ 0,
1147
+ 1000,
1148
+ 0,
1149
+ 0,
1150
+ 0
1151
+ ],
1152
+ [
1153
+ 0,
1154
+ 0,
1155
+ 0,
1156
+ 0,
1157
+ 0,
1158
+ 0,
1159
+ 1000,
1160
+ 0,
1161
+ 0
1162
+ ],
1163
+ [
1164
+ 0,
1165
+ 0,
1166
+ 0,
1167
+ 0,
1168
+ 0,
1169
+ 0,
1170
+ 0,
1171
+ 1000,
1172
+ 0
1173
+ ],
1174
+ [
1175
+ 0,
1176
+ 0,
1177
+ 0,
1178
+ 0,
1179
+ 0,
1180
+ 0,
1181
+ 0,
1182
+ 0,
1183
+ 1000
1184
+ ]
1185
+ ],
1186
+ "wrong_task_predictions": 0,
1187
+ "wrong_task_rate": 0.0
1188
+ },
1189
+ "constraint_stability": {
1190
+ "rows": 3000,
1191
+ "accuracy": 1.0,
1192
+ "macro_f1": 1.0,
1193
+ "weighted_f1": 1.0,
1194
+ "micro_f1": 1.0,
1195
+ "labels": [
1196
+ "semi_stable",
1197
+ "stable",
1198
+ "volatile"
1199
+ ],
1200
+ "confusion_matrix": [
1201
+ [
1202
+ 1000,
1203
+ 0,
1204
+ 0
1205
+ ],
1206
+ [
1207
+ 0,
1208
+ 1000,
1209
+ 0
1210
+ ],
1211
+ [
1212
+ 0,
1213
+ 0,
1214
+ 1000
1215
+ ]
1216
+ ],
1217
+ "wrong_task_predictions": 0,
1218
+ "wrong_task_rate": 0.0
1219
+ },
1220
+ "constraint_type": {
1221
+ "rows": 8000,
1222
+ "accuracy": 1.0,
1223
+ "macro_f1": 1.0,
1224
+ "weighted_f1": 1.0,
1225
+ "micro_f1": 1.0,
1226
+ "labels": [
1227
+ "causal",
1228
+ "constraint_other",
1229
+ "goal",
1230
+ "none",
1231
+ "policy",
1232
+ "preference",
1233
+ "state",
1234
+ "value"
1235
+ ],
1236
+ "confusion_matrix": [
1237
+ [
1238
+ 1000,
1239
+ 0,
1240
+ 0,
1241
+ 0,
1242
+ 0,
1243
+ 0,
1244
+ 0,
1245
+ 0
1246
+ ],
1247
+ [
1248
+ 0,
1249
+ 1000,
1250
+ 0,
1251
+ 0,
1252
+ 0,
1253
+ 0,
1254
+ 0,
1255
+ 0
1256
+ ],
1257
+ [
1258
+ 0,
1259
+ 0,
1260
+ 1000,
1261
+ 0,
1262
+ 0,
1263
+ 0,
1264
+ 0,
1265
+ 0
1266
+ ],
1267
+ [
1268
+ 0,
1269
+ 0,
1270
+ 0,
1271
+ 1000,
1272
+ 0,
1273
+ 0,
1274
+ 0,
1275
+ 0
1276
+ ],
1277
+ [
1278
+ 0,
1279
+ 0,
1280
+ 0,
1281
+ 0,
1282
+ 1000,
1283
+ 0,
1284
+ 0,
1285
+ 0
1286
+ ],
1287
+ [
1288
+ 0,
1289
+ 0,
1290
+ 0,
1291
+ 0,
1292
+ 0,
1293
+ 1000,
1294
+ 0,
1295
+ 0
1296
+ ],
1297
+ [
1298
+ 0,
1299
+ 0,
1300
+ 0,
1301
+ 0,
1302
+ 0,
1303
+ 0,
1304
+ 1000,
1305
+ 0
1306
+ ],
1307
+ [
1308
+ 0,
1309
+ 0,
1310
+ 0,
1311
+ 0,
1312
+ 0,
1313
+ 0,
1314
+ 0,
1315
+ 1000
1316
+ ]
1317
+ ],
1318
+ "wrong_task_predictions": 0,
1319
+ "wrong_task_rate": 0.0
1320
+ },
1321
+ "fact_type": {
1322
+ "rows": 6000,
1323
+ "accuracy": 1.0,
1324
+ "macro_f1": 1.0,
1325
+ "weighted_f1": 1.0,
1326
+ "micro_f1": 1.0,
1327
+ "labels": [
1328
+ "identity",
1329
+ "location",
1330
+ "none",
1331
+ "occupation",
1332
+ "other_fact",
1333
+ "preference"
1334
+ ],
1335
+ "confusion_matrix": [
1336
+ [
1337
+ 1000,
1338
+ 0,
1339
+ 0,
1340
+ 0,
1341
+ 0,
1342
+ 0
1343
+ ],
1344
+ [
1345
+ 0,
1346
+ 1000,
1347
+ 0,
1348
+ 0,
1349
+ 0,
1350
+ 0
1351
+ ],
1352
+ [
1353
+ 0,
1354
+ 0,
1355
+ 1000,
1356
+ 0,
1357
+ 0,
1358
+ 0
1359
+ ],
1360
+ [
1361
+ 0,
1362
+ 0,
1363
+ 0,
1364
+ 1000,
1365
+ 0,
1366
+ 0
1367
+ ],
1368
+ [
1369
+ 0,
1370
+ 0,
1371
+ 0,
1372
+ 0,
1373
+ 1000,
1374
+ 0
1375
+ ],
1376
+ [
1377
+ 0,
1378
+ 0,
1379
+ 0,
1380
+ 0,
1381
+ 0,
1382
+ 1000
1383
+ ]
1384
+ ],
1385
+ "wrong_task_predictions": 0,
1386
+ "wrong_task_rate": 0.0
1387
+ },
1388
+ "pii_presence": {
1389
+ "rows": 10000,
1390
+ "accuracy": 0.9934,
1391
+ "macro_f1": 0.9933999680558454,
1392
+ "weighted_f1": 0.9933999680558454,
1393
+ "micro_f1": 0.9934,
1394
+ "labels": [
1395
+ "no_pii",
1396
+ "pii"
1397
+ ],
1398
+ "confusion_matrix": [
1399
+ [
1400
+ 4978,
1401
+ 22
1402
+ ],
1403
+ [
1404
+ 44,
1405
+ 4956
1406
+ ]
1407
+ ],
1408
+ "wrong_task_predictions": 0,
1409
+ "wrong_task_rate": 0.0
1410
+ }
1411
+ },
1412
+ "calibration": {
1413
+ "method": "task_conditional_sigmoid",
1414
+ "split": "eval",
1415
+ "rows": 36000,
1416
+ "pre_ece": 0.06013730731601039,
1417
+ "post_ece": 0.0007514102792564575,
1418
+ "pre_accuracy": 0.9978333333333333,
1419
+ "post_accuracy": 0.9981666666666666,
1420
+ "accuracy_delta": 0.0003333333333332966,
1421
+ "tasks": {
1422
+ "constraint_scope": {
1423
+ "rows": 9000,
1424
+ "pre_ece": 0.0073926387441783925,
1425
+ "post_ece": 0.00019267901287378653,
1426
+ "pre_accuracy": 1.0,
1427
+ "post_accuracy": 1.0,
1428
+ "accuracy_delta": 0.0
1429
+ },
1430
+ "constraint_stability": {
1431
+ "rows": 3000,
1432
+ "pre_ece": 0.0035802200536977353,
1433
+ "post_ece": 0.00016981111181246789,
1434
+ "pre_accuracy": 1.0,
1435
+ "post_accuracy": 1.0,
1436
+ "accuracy_delta": 0.0
1437
+ },
1438
+ "constraint_type": {
1439
+ "rows": 8000,
1440
+ "pre_ece": 0.0152260080692046,
1441
+ "post_ece": 0.0002949549691239062,
1442
+ "pre_accuracy": 1.0,
1443
+ "post_accuracy": 1.0,
1444
+ "accuracy_delta": 0.0
1445
+ },
1446
+ "fact_type": {
1447
+ "rows": 6000,
1448
+ "pre_ece": 0.008209015508071071,
1449
+ "post_ece": 0.00021106970310058593,
1450
+ "pre_accuracy": 1.0,
1451
+ "post_accuracy": 1.0,
1452
+ "accuracy_delta": 0.0
1453
+ },
1454
+ "pii_presence": {
1455
+ "rows": 10000,
1456
+ "pre_ece": 0.1278197693399604,
1457
+ "post_ece": 0.002118116763033487,
1458
+ "pre_accuracy": 0.9922,
1459
+ "post_accuracy": 0.9934,
1460
+ "accuracy_delta": 0.0011999999999999789
1461
+ }
1462
+ }
1463
+ }
1464
+ }
extractor_metrics_test.json ADDED
@@ -0,0 +1,1464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 36000,
6
+ "accuracy": 0.9974166666666666,
7
+ "macro_f1": 0.9993357139602141,
8
+ "weighted_f1": 0.9974166654008327,
9
+ "micro_f1": 0.9974166666666666,
10
+ "labels": [
11
+ "constraint_scope::finance",
12
+ "constraint_scope::food",
13
+ "constraint_scope::general",
14
+ "constraint_scope::health",
15
+ "constraint_scope::none",
16
+ "constraint_scope::social",
17
+ "constraint_scope::tech",
18
+ "constraint_scope::travel",
19
+ "constraint_scope::work",
20
+ "constraint_stability::semi_stable",
21
+ "constraint_stability::stable",
22
+ "constraint_stability::volatile",
23
+ "constraint_type::causal",
24
+ "constraint_type::constraint_other",
25
+ "constraint_type::goal",
26
+ "constraint_type::none",
27
+ "constraint_type::policy",
28
+ "constraint_type::preference",
29
+ "constraint_type::state",
30
+ "constraint_type::value",
31
+ "fact_type::identity",
32
+ "fact_type::location",
33
+ "fact_type::none",
34
+ "fact_type::occupation",
35
+ "fact_type::other_fact",
36
+ "fact_type::preference",
37
+ "pii_presence::no_pii",
38
+ "pii_presence::pii"
39
+ ],
40
+ "confusion_matrix": [
41
+ [
42
+ 1000,
43
+ 0,
44
+ 0,
45
+ 0,
46
+ 0,
47
+ 0,
48
+ 0,
49
+ 0,
50
+ 0,
51
+ 0,
52
+ 0,
53
+ 0,
54
+ 0,
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0,
59
+ 0,
60
+ 0,
61
+ 0,
62
+ 0,
63
+ 0,
64
+ 0,
65
+ 0,
66
+ 0,
67
+ 0,
68
+ 0,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 1000,
74
+ 0,
75
+ 0,
76
+ 0,
77
+ 0,
78
+ 0,
79
+ 0,
80
+ 0,
81
+ 0,
82
+ 0,
83
+ 0,
84
+ 0,
85
+ 0,
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 0,
90
+ 0,
91
+ 0,
92
+ 0,
93
+ 0,
94
+ 0,
95
+ 0,
96
+ 0,
97
+ 0,
98
+ 0,
99
+ 0
100
+ ],
101
+ [
102
+ 0,
103
+ 0,
104
+ 1000,
105
+ 0,
106
+ 0,
107
+ 0,
108
+ 0,
109
+ 0,
110
+ 0,
111
+ 0,
112
+ 0,
113
+ 0,
114
+ 0,
115
+ 0,
116
+ 0,
117
+ 0,
118
+ 0,
119
+ 0,
120
+ 0,
121
+ 0,
122
+ 0,
123
+ 0,
124
+ 0,
125
+ 0,
126
+ 0,
127
+ 0,
128
+ 0,
129
+ 0
130
+ ],
131
+ [
132
+ 0,
133
+ 0,
134
+ 0,
135
+ 1000,
136
+ 0,
137
+ 0,
138
+ 0,
139
+ 0,
140
+ 0,
141
+ 0,
142
+ 0,
143
+ 0,
144
+ 0,
145
+ 0,
146
+ 0,
147
+ 0,
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 0,
152
+ 0,
153
+ 0,
154
+ 0,
155
+ 0,
156
+ 0,
157
+ 0,
158
+ 0,
159
+ 0
160
+ ],
161
+ [
162
+ 0,
163
+ 0,
164
+ 0,
165
+ 0,
166
+ 1000,
167
+ 0,
168
+ 0,
169
+ 0,
170
+ 0,
171
+ 0,
172
+ 0,
173
+ 0,
174
+ 0,
175
+ 0,
176
+ 0,
177
+ 0,
178
+ 0,
179
+ 0,
180
+ 0,
181
+ 0,
182
+ 0,
183
+ 0,
184
+ 0,
185
+ 0,
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 0
190
+ ],
191
+ [
192
+ 0,
193
+ 0,
194
+ 0,
195
+ 0,
196
+ 0,
197
+ 1000,
198
+ 0,
199
+ 0,
200
+ 0,
201
+ 0,
202
+ 0,
203
+ 0,
204
+ 0,
205
+ 0,
206
+ 0,
207
+ 0,
208
+ 0,
209
+ 0,
210
+ 0,
211
+ 0,
212
+ 0,
213
+ 0,
214
+ 0,
215
+ 0,
216
+ 0,
217
+ 0,
218
+ 0,
219
+ 0
220
+ ],
221
+ [
222
+ 0,
223
+ 0,
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 0,
228
+ 1000,
229
+ 0,
230
+ 0,
231
+ 0,
232
+ 0,
233
+ 0,
234
+ 0,
235
+ 0,
236
+ 0,
237
+ 0,
238
+ 0,
239
+ 0,
240
+ 0,
241
+ 0,
242
+ 0,
243
+ 0,
244
+ 0,
245
+ 0,
246
+ 0,
247
+ 0,
248
+ 0,
249
+ 0
250
+ ],
251
+ [
252
+ 0,
253
+ 0,
254
+ 0,
255
+ 0,
256
+ 0,
257
+ 0,
258
+ 0,
259
+ 1000,
260
+ 0,
261
+ 0,
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 0,
266
+ 0,
267
+ 0,
268
+ 0,
269
+ 0,
270
+ 0,
271
+ 0,
272
+ 0,
273
+ 0,
274
+ 0,
275
+ 0,
276
+ 0,
277
+ 0,
278
+ 0,
279
+ 0
280
+ ],
281
+ [
282
+ 0,
283
+ 0,
284
+ 0,
285
+ 0,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 0,
290
+ 1000,
291
+ 0,
292
+ 0,
293
+ 0,
294
+ 0,
295
+ 0,
296
+ 0,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0,
303
+ 0,
304
+ 0,
305
+ 0,
306
+ 0,
307
+ 0,
308
+ 0,
309
+ 0
310
+ ],
311
+ [
312
+ 0,
313
+ 0,
314
+ 0,
315
+ 0,
316
+ 0,
317
+ 0,
318
+ 0,
319
+ 0,
320
+ 0,
321
+ 1000,
322
+ 0,
323
+ 0,
324
+ 0,
325
+ 0,
326
+ 0,
327
+ 0,
328
+ 0,
329
+ 0,
330
+ 0,
331
+ 0,
332
+ 0,
333
+ 0,
334
+ 0,
335
+ 0,
336
+ 0,
337
+ 0,
338
+ 0,
339
+ 0
340
+ ],
341
+ [
342
+ 0,
343
+ 0,
344
+ 0,
345
+ 0,
346
+ 0,
347
+ 0,
348
+ 0,
349
+ 0,
350
+ 0,
351
+ 0,
352
+ 1000,
353
+ 0,
354
+ 0,
355
+ 0,
356
+ 0,
357
+ 0,
358
+ 0,
359
+ 0,
360
+ 0,
361
+ 0,
362
+ 0,
363
+ 0,
364
+ 0,
365
+ 0,
366
+ 0,
367
+ 0,
368
+ 0,
369
+ 0
370
+ ],
371
+ [
372
+ 0,
373
+ 0,
374
+ 0,
375
+ 0,
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 0,
380
+ 0,
381
+ 0,
382
+ 0,
383
+ 1000,
384
+ 0,
385
+ 0,
386
+ 0,
387
+ 0,
388
+ 0,
389
+ 0,
390
+ 0,
391
+ 0,
392
+ 0,
393
+ 0,
394
+ 0,
395
+ 0,
396
+ 0,
397
+ 0,
398
+ 0,
399
+ 0
400
+ ],
401
+ [
402
+ 0,
403
+ 0,
404
+ 0,
405
+ 0,
406
+ 0,
407
+ 0,
408
+ 0,
409
+ 0,
410
+ 0,
411
+ 0,
412
+ 0,
413
+ 0,
414
+ 1000,
415
+ 0,
416
+ 0,
417
+ 0,
418
+ 0,
419
+ 0,
420
+ 0,
421
+ 0,
422
+ 0,
423
+ 0,
424
+ 0,
425
+ 0,
426
+ 0,
427
+ 0,
428
+ 0,
429
+ 0
430
+ ],
431
+ [
432
+ 0,
433
+ 0,
434
+ 0,
435
+ 0,
436
+ 0,
437
+ 0,
438
+ 0,
439
+ 0,
440
+ 0,
441
+ 0,
442
+ 0,
443
+ 0,
444
+ 0,
445
+ 1000,
446
+ 0,
447
+ 0,
448
+ 0,
449
+ 0,
450
+ 0,
451
+ 0,
452
+ 0,
453
+ 0,
454
+ 0,
455
+ 0,
456
+ 0,
457
+ 0,
458
+ 0,
459
+ 0
460
+ ],
461
+ [
462
+ 0,
463
+ 0,
464
+ 0,
465
+ 0,
466
+ 0,
467
+ 0,
468
+ 0,
469
+ 0,
470
+ 0,
471
+ 0,
472
+ 0,
473
+ 0,
474
+ 0,
475
+ 0,
476
+ 1000,
477
+ 0,
478
+ 0,
479
+ 0,
480
+ 0,
481
+ 0,
482
+ 0,
483
+ 0,
484
+ 0,
485
+ 0,
486
+ 0,
487
+ 0,
488
+ 0,
489
+ 0
490
+ ],
491
+ [
492
+ 0,
493
+ 0,
494
+ 0,
495
+ 0,
496
+ 0,
497
+ 0,
498
+ 0,
499
+ 0,
500
+ 0,
501
+ 0,
502
+ 0,
503
+ 0,
504
+ 0,
505
+ 0,
506
+ 0,
507
+ 1000,
508
+ 0,
509
+ 0,
510
+ 0,
511
+ 0,
512
+ 0,
513
+ 0,
514
+ 0,
515
+ 0,
516
+ 0,
517
+ 0,
518
+ 0,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 0,
524
+ 0,
525
+ 0,
526
+ 0,
527
+ 0,
528
+ 0,
529
+ 0,
530
+ 0,
531
+ 0,
532
+ 0,
533
+ 0,
534
+ 0,
535
+ 0,
536
+ 0,
537
+ 0,
538
+ 1000,
539
+ 0,
540
+ 0,
541
+ 0,
542
+ 0,
543
+ 0,
544
+ 0,
545
+ 0,
546
+ 0,
547
+ 0,
548
+ 0,
549
+ 0
550
+ ],
551
+ [
552
+ 0,
553
+ 0,
554
+ 0,
555
+ 0,
556
+ 0,
557
+ 0,
558
+ 0,
559
+ 0,
560
+ 0,
561
+ 0,
562
+ 0,
563
+ 0,
564
+ 0,
565
+ 0,
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 1000,
570
+ 0,
571
+ 0,
572
+ 0,
573
+ 0,
574
+ 0,
575
+ 0,
576
+ 0,
577
+ 0,
578
+ 0,
579
+ 0
580
+ ],
581
+ [
582
+ 0,
583
+ 0,
584
+ 0,
585
+ 0,
586
+ 0,
587
+ 0,
588
+ 0,
589
+ 0,
590
+ 0,
591
+ 0,
592
+ 0,
593
+ 0,
594
+ 0,
595
+ 0,
596
+ 0,
597
+ 0,
598
+ 0,
599
+ 0,
600
+ 1000,
601
+ 0,
602
+ 0,
603
+ 0,
604
+ 0,
605
+ 0,
606
+ 0,
607
+ 0,
608
+ 0,
609
+ 0
610
+ ],
611
+ [
612
+ 0,
613
+ 0,
614
+ 0,
615
+ 0,
616
+ 0,
617
+ 0,
618
+ 0,
619
+ 0,
620
+ 0,
621
+ 0,
622
+ 0,
623
+ 0,
624
+ 0,
625
+ 0,
626
+ 0,
627
+ 0,
628
+ 0,
629
+ 0,
630
+ 0,
631
+ 1000,
632
+ 0,
633
+ 0,
634
+ 0,
635
+ 0,
636
+ 0,
637
+ 0,
638
+ 0,
639
+ 0
640
+ ],
641
+ [
642
+ 0,
643
+ 0,
644
+ 0,
645
+ 0,
646
+ 0,
647
+ 0,
648
+ 0,
649
+ 0,
650
+ 0,
651
+ 0,
652
+ 0,
653
+ 0,
654
+ 0,
655
+ 0,
656
+ 0,
657
+ 0,
658
+ 0,
659
+ 0,
660
+ 0,
661
+ 0,
662
+ 1000,
663
+ 0,
664
+ 0,
665
+ 0,
666
+ 0,
667
+ 0,
668
+ 0,
669
+ 0
670
+ ],
671
+ [
672
+ 0,
673
+ 0,
674
+ 0,
675
+ 0,
676
+ 0,
677
+ 0,
678
+ 0,
679
+ 0,
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 0,
684
+ 0,
685
+ 0,
686
+ 0,
687
+ 0,
688
+ 0,
689
+ 0,
690
+ 0,
691
+ 0,
692
+ 0,
693
+ 1000,
694
+ 0,
695
+ 0,
696
+ 0,
697
+ 0,
698
+ 0,
699
+ 0
700
+ ],
701
+ [
702
+ 0,
703
+ 0,
704
+ 0,
705
+ 0,
706
+ 0,
707
+ 0,
708
+ 0,
709
+ 0,
710
+ 0,
711
+ 0,
712
+ 0,
713
+ 0,
714
+ 0,
715
+ 0,
716
+ 0,
717
+ 0,
718
+ 0,
719
+ 0,
720
+ 0,
721
+ 0,
722
+ 0,
723
+ 0,
724
+ 1000,
725
+ 0,
726
+ 0,
727
+ 0,
728
+ 0,
729
+ 0
730
+ ],
731
+ [
732
+ 0,
733
+ 0,
734
+ 0,
735
+ 0,
736
+ 0,
737
+ 0,
738
+ 0,
739
+ 0,
740
+ 0,
741
+ 0,
742
+ 0,
743
+ 0,
744
+ 0,
745
+ 0,
746
+ 0,
747
+ 0,
748
+ 0,
749
+ 0,
750
+ 0,
751
+ 0,
752
+ 0,
753
+ 0,
754
+ 0,
755
+ 1000,
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 0
760
+ ],
761
+ [
762
+ 0,
763
+ 0,
764
+ 0,
765
+ 0,
766
+ 0,
767
+ 0,
768
+ 0,
769
+ 0,
770
+ 0,
771
+ 0,
772
+ 0,
773
+ 0,
774
+ 0,
775
+ 0,
776
+ 0,
777
+ 0,
778
+ 0,
779
+ 0,
780
+ 0,
781
+ 0,
782
+ 0,
783
+ 0,
784
+ 0,
785
+ 0,
786
+ 1000,
787
+ 0,
788
+ 0,
789
+ 0
790
+ ],
791
+ [
792
+ 0,
793
+ 0,
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 0,
798
+ 0,
799
+ 0,
800
+ 0,
801
+ 0,
802
+ 0,
803
+ 0,
804
+ 0,
805
+ 0,
806
+ 0,
807
+ 0,
808
+ 0,
809
+ 0,
810
+ 0,
811
+ 0,
812
+ 0,
813
+ 0,
814
+ 0,
815
+ 0,
816
+ 0,
817
+ 1000,
818
+ 0,
819
+ 0
820
+ ],
821
+ [
822
+ 0,
823
+ 0,
824
+ 0,
825
+ 0,
826
+ 0,
827
+ 0,
828
+ 0,
829
+ 0,
830
+ 0,
831
+ 0,
832
+ 0,
833
+ 0,
834
+ 0,
835
+ 0,
836
+ 0,
837
+ 0,
838
+ 0,
839
+ 0,
840
+ 0,
841
+ 0,
842
+ 0,
843
+ 0,
844
+ 0,
845
+ 0,
846
+ 0,
847
+ 0,
848
+ 4957,
849
+ 43
850
+ ],
851
+ [
852
+ 0,
853
+ 0,
854
+ 0,
855
+ 0,
856
+ 0,
857
+ 0,
858
+ 0,
859
+ 0,
860
+ 0,
861
+ 0,
862
+ 0,
863
+ 0,
864
+ 0,
865
+ 0,
866
+ 0,
867
+ 0,
868
+ 0,
869
+ 0,
870
+ 0,
871
+ 0,
872
+ 0,
873
+ 0,
874
+ 0,
875
+ 0,
876
+ 0,
877
+ 0,
878
+ 50,
879
+ 4950
880
+ ]
881
+ ],
882
+ "calibration_error": 0.00037925340005441757,
883
+ "classification_report": {
884
+ "constraint_scope::finance": {
885
+ "precision": 1.0,
886
+ "recall": 1.0,
887
+ "f1-score": 1.0,
888
+ "support": 1000.0
889
+ },
890
+ "constraint_scope::food": {
891
+ "precision": 1.0,
892
+ "recall": 1.0,
893
+ "f1-score": 1.0,
894
+ "support": 1000.0
895
+ },
896
+ "constraint_scope::general": {
897
+ "precision": 1.0,
898
+ "recall": 1.0,
899
+ "f1-score": 1.0,
900
+ "support": 1000.0
901
+ },
902
+ "constraint_scope::health": {
903
+ "precision": 1.0,
904
+ "recall": 1.0,
905
+ "f1-score": 1.0,
906
+ "support": 1000.0
907
+ },
908
+ "constraint_scope::none": {
909
+ "precision": 1.0,
910
+ "recall": 1.0,
911
+ "f1-score": 1.0,
912
+ "support": 1000.0
913
+ },
914
+ "constraint_scope::social": {
915
+ "precision": 1.0,
916
+ "recall": 1.0,
917
+ "f1-score": 1.0,
918
+ "support": 1000.0
919
+ },
920
+ "constraint_scope::tech": {
921
+ "precision": 1.0,
922
+ "recall": 1.0,
923
+ "f1-score": 1.0,
924
+ "support": 1000.0
925
+ },
926
+ "constraint_scope::travel": {
927
+ "precision": 1.0,
928
+ "recall": 1.0,
929
+ "f1-score": 1.0,
930
+ "support": 1000.0
931
+ },
932
+ "constraint_scope::work": {
933
+ "precision": 1.0,
934
+ "recall": 1.0,
935
+ "f1-score": 1.0,
936
+ "support": 1000.0
937
+ },
938
+ "constraint_stability::semi_stable": {
939
+ "precision": 1.0,
940
+ "recall": 1.0,
941
+ "f1-score": 1.0,
942
+ "support": 1000.0
943
+ },
944
+ "constraint_stability::stable": {
945
+ "precision": 1.0,
946
+ "recall": 1.0,
947
+ "f1-score": 1.0,
948
+ "support": 1000.0
949
+ },
950
+ "constraint_stability::volatile": {
951
+ "precision": 1.0,
952
+ "recall": 1.0,
953
+ "f1-score": 1.0,
954
+ "support": 1000.0
955
+ },
956
+ "constraint_type::causal": {
957
+ "precision": 1.0,
958
+ "recall": 1.0,
959
+ "f1-score": 1.0,
960
+ "support": 1000.0
961
+ },
962
+ "constraint_type::constraint_other": {
963
+ "precision": 1.0,
964
+ "recall": 1.0,
965
+ "f1-score": 1.0,
966
+ "support": 1000.0
967
+ },
968
+ "constraint_type::goal": {
969
+ "precision": 1.0,
970
+ "recall": 1.0,
971
+ "f1-score": 1.0,
972
+ "support": 1000.0
973
+ },
974
+ "constraint_type::none": {
975
+ "precision": 1.0,
976
+ "recall": 1.0,
977
+ "f1-score": 1.0,
978
+ "support": 1000.0
979
+ },
980
+ "constraint_type::policy": {
981
+ "precision": 1.0,
982
+ "recall": 1.0,
983
+ "f1-score": 1.0,
984
+ "support": 1000.0
985
+ },
986
+ "constraint_type::preference": {
987
+ "precision": 1.0,
988
+ "recall": 1.0,
989
+ "f1-score": 1.0,
990
+ "support": 1000.0
991
+ },
992
+ "constraint_type::state": {
993
+ "precision": 1.0,
994
+ "recall": 1.0,
995
+ "f1-score": 1.0,
996
+ "support": 1000.0
997
+ },
998
+ "constraint_type::value": {
999
+ "precision": 1.0,
1000
+ "recall": 1.0,
1001
+ "f1-score": 1.0,
1002
+ "support": 1000.0
1003
+ },
1004
+ "fact_type::identity": {
1005
+ "precision": 1.0,
1006
+ "recall": 1.0,
1007
+ "f1-score": 1.0,
1008
+ "support": 1000.0
1009
+ },
1010
+ "fact_type::location": {
1011
+ "precision": 1.0,
1012
+ "recall": 1.0,
1013
+ "f1-score": 1.0,
1014
+ "support": 1000.0
1015
+ },
1016
+ "fact_type::none": {
1017
+ "precision": 1.0,
1018
+ "recall": 1.0,
1019
+ "f1-score": 1.0,
1020
+ "support": 1000.0
1021
+ },
1022
+ "fact_type::occupation": {
1023
+ "precision": 1.0,
1024
+ "recall": 1.0,
1025
+ "f1-score": 1.0,
1026
+ "support": 1000.0
1027
+ },
1028
+ "fact_type::other_fact": {
1029
+ "precision": 1.0,
1030
+ "recall": 1.0,
1031
+ "f1-score": 1.0,
1032
+ "support": 1000.0
1033
+ },
1034
+ "fact_type::preference": {
1035
+ "precision": 1.0,
1036
+ "recall": 1.0,
1037
+ "f1-score": 1.0,
1038
+ "support": 1000.0
1039
+ },
1040
+ "pii_presence::no_pii": {
1041
+ "precision": 0.9900139804274016,
1042
+ "recall": 0.9914,
1043
+ "f1-score": 0.9907065054461877,
1044
+ "support": 5000.0
1045
+ },
1046
+ "pii_presence::pii": {
1047
+ "precision": 0.9913879431203685,
1048
+ "recall": 0.99,
1049
+ "f1-score": 0.9906934854398078,
1050
+ "support": 5000.0
1051
+ },
1052
+ "accuracy": 0.9974166666666666,
1053
+ "macro avg": {
1054
+ "precision": 0.999335782983849,
1055
+ "recall": 0.9993357142857142,
1056
+ "f1-score": 0.9993357139602141,
1057
+ "support": 36000.0
1058
+ },
1059
+ "weighted avg": {
1060
+ "precision": 0.9974169338260791,
1061
+ "recall": 0.9974166666666666,
1062
+ "f1-score": 0.9974166654008327,
1063
+ "support": 36000.0
1064
+ }
1065
+ }
1066
+ },
1067
+ "per_task": {
1068
+ "constraint_scope": {
1069
+ "rows": 9000,
1070
+ "accuracy": 1.0,
1071
+ "macro_f1": 1.0,
1072
+ "weighted_f1": 1.0,
1073
+ "micro_f1": 1.0,
1074
+ "labels": [
1075
+ "finance",
1076
+ "food",
1077
+ "general",
1078
+ "health",
1079
+ "none",
1080
+ "social",
1081
+ "tech",
1082
+ "travel",
1083
+ "work"
1084
+ ],
1085
+ "confusion_matrix": [
1086
+ [
1087
+ 1000,
1088
+ 0,
1089
+ 0,
1090
+ 0,
1091
+ 0,
1092
+ 0,
1093
+ 0,
1094
+ 0,
1095
+ 0
1096
+ ],
1097
+ [
1098
+ 0,
1099
+ 1000,
1100
+ 0,
1101
+ 0,
1102
+ 0,
1103
+ 0,
1104
+ 0,
1105
+ 0,
1106
+ 0
1107
+ ],
1108
+ [
1109
+ 0,
1110
+ 0,
1111
+ 1000,
1112
+ 0,
1113
+ 0,
1114
+ 0,
1115
+ 0,
1116
+ 0,
1117
+ 0
1118
+ ],
1119
+ [
1120
+ 0,
1121
+ 0,
1122
+ 0,
1123
+ 1000,
1124
+ 0,
1125
+ 0,
1126
+ 0,
1127
+ 0,
1128
+ 0
1129
+ ],
1130
+ [
1131
+ 0,
1132
+ 0,
1133
+ 0,
1134
+ 0,
1135
+ 1000,
1136
+ 0,
1137
+ 0,
1138
+ 0,
1139
+ 0
1140
+ ],
1141
+ [
1142
+ 0,
1143
+ 0,
1144
+ 0,
1145
+ 0,
1146
+ 0,
1147
+ 1000,
1148
+ 0,
1149
+ 0,
1150
+ 0
1151
+ ],
1152
+ [
1153
+ 0,
1154
+ 0,
1155
+ 0,
1156
+ 0,
1157
+ 0,
1158
+ 0,
1159
+ 1000,
1160
+ 0,
1161
+ 0
1162
+ ],
1163
+ [
1164
+ 0,
1165
+ 0,
1166
+ 0,
1167
+ 0,
1168
+ 0,
1169
+ 0,
1170
+ 0,
1171
+ 1000,
1172
+ 0
1173
+ ],
1174
+ [
1175
+ 0,
1176
+ 0,
1177
+ 0,
1178
+ 0,
1179
+ 0,
1180
+ 0,
1181
+ 0,
1182
+ 0,
1183
+ 1000
1184
+ ]
1185
+ ],
1186
+ "wrong_task_predictions": 0,
1187
+ "wrong_task_rate": 0.0
1188
+ },
1189
+ "constraint_stability": {
1190
+ "rows": 3000,
1191
+ "accuracy": 1.0,
1192
+ "macro_f1": 1.0,
1193
+ "weighted_f1": 1.0,
1194
+ "micro_f1": 1.0,
1195
+ "labels": [
1196
+ "semi_stable",
1197
+ "stable",
1198
+ "volatile"
1199
+ ],
1200
+ "confusion_matrix": [
1201
+ [
1202
+ 1000,
1203
+ 0,
1204
+ 0
1205
+ ],
1206
+ [
1207
+ 0,
1208
+ 1000,
1209
+ 0
1210
+ ],
1211
+ [
1212
+ 0,
1213
+ 0,
1214
+ 1000
1215
+ ]
1216
+ ],
1217
+ "wrong_task_predictions": 0,
1218
+ "wrong_task_rate": 0.0
1219
+ },
1220
+ "constraint_type": {
1221
+ "rows": 8000,
1222
+ "accuracy": 1.0,
1223
+ "macro_f1": 1.0,
1224
+ "weighted_f1": 1.0,
1225
+ "micro_f1": 1.0,
1226
+ "labels": [
1227
+ "causal",
1228
+ "constraint_other",
1229
+ "goal",
1230
+ "none",
1231
+ "policy",
1232
+ "preference",
1233
+ "state",
1234
+ "value"
1235
+ ],
1236
+ "confusion_matrix": [
1237
+ [
1238
+ 1000,
1239
+ 0,
1240
+ 0,
1241
+ 0,
1242
+ 0,
1243
+ 0,
1244
+ 0,
1245
+ 0
1246
+ ],
1247
+ [
1248
+ 0,
1249
+ 1000,
1250
+ 0,
1251
+ 0,
1252
+ 0,
1253
+ 0,
1254
+ 0,
1255
+ 0
1256
+ ],
1257
+ [
1258
+ 0,
1259
+ 0,
1260
+ 1000,
1261
+ 0,
1262
+ 0,
1263
+ 0,
1264
+ 0,
1265
+ 0
1266
+ ],
1267
+ [
1268
+ 0,
1269
+ 0,
1270
+ 0,
1271
+ 1000,
1272
+ 0,
1273
+ 0,
1274
+ 0,
1275
+ 0
1276
+ ],
1277
+ [
1278
+ 0,
1279
+ 0,
1280
+ 0,
1281
+ 0,
1282
+ 1000,
1283
+ 0,
1284
+ 0,
1285
+ 0
1286
+ ],
1287
+ [
1288
+ 0,
1289
+ 0,
1290
+ 0,
1291
+ 0,
1292
+ 0,
1293
+ 1000,
1294
+ 0,
1295
+ 0
1296
+ ],
1297
+ [
1298
+ 0,
1299
+ 0,
1300
+ 0,
1301
+ 0,
1302
+ 0,
1303
+ 0,
1304
+ 1000,
1305
+ 0
1306
+ ],
1307
+ [
1308
+ 0,
1309
+ 0,
1310
+ 0,
1311
+ 0,
1312
+ 0,
1313
+ 0,
1314
+ 0,
1315
+ 1000
1316
+ ]
1317
+ ],
1318
+ "wrong_task_predictions": 0,
1319
+ "wrong_task_rate": 0.0
1320
+ },
1321
+ "fact_type": {
1322
+ "rows": 6000,
1323
+ "accuracy": 1.0,
1324
+ "macro_f1": 1.0,
1325
+ "weighted_f1": 1.0,
1326
+ "micro_f1": 1.0,
1327
+ "labels": [
1328
+ "identity",
1329
+ "location",
1330
+ "none",
1331
+ "occupation",
1332
+ "other_fact",
1333
+ "preference"
1334
+ ],
1335
+ "confusion_matrix": [
1336
+ [
1337
+ 1000,
1338
+ 0,
1339
+ 0,
1340
+ 0,
1341
+ 0,
1342
+ 0
1343
+ ],
1344
+ [
1345
+ 0,
1346
+ 1000,
1347
+ 0,
1348
+ 0,
1349
+ 0,
1350
+ 0
1351
+ ],
1352
+ [
1353
+ 0,
1354
+ 0,
1355
+ 1000,
1356
+ 0,
1357
+ 0,
1358
+ 0
1359
+ ],
1360
+ [
1361
+ 0,
1362
+ 0,
1363
+ 0,
1364
+ 1000,
1365
+ 0,
1366
+ 0
1367
+ ],
1368
+ [
1369
+ 0,
1370
+ 0,
1371
+ 0,
1372
+ 0,
1373
+ 1000,
1374
+ 0
1375
+ ],
1376
+ [
1377
+ 0,
1378
+ 0,
1379
+ 0,
1380
+ 0,
1381
+ 0,
1382
+ 1000
1383
+ ]
1384
+ ],
1385
+ "wrong_task_predictions": 0,
1386
+ "wrong_task_rate": 0.0
1387
+ },
1388
+ "pii_presence": {
1389
+ "rows": 10000,
1390
+ "accuracy": 0.9907,
1391
+ "macro_f1": 0.9906999954429978,
1392
+ "weighted_f1": 0.9906999954429977,
1393
+ "micro_f1": 0.9907,
1394
+ "labels": [
1395
+ "no_pii",
1396
+ "pii"
1397
+ ],
1398
+ "confusion_matrix": [
1399
+ [
1400
+ 4957,
1401
+ 43
1402
+ ],
1403
+ [
1404
+ 50,
1405
+ 4950
1406
+ ]
1407
+ ],
1408
+ "wrong_task_predictions": 0,
1409
+ "wrong_task_rate": 0.0
1410
+ }
1411
+ },
1412
+ "calibration": {
1413
+ "method": "task_conditional_sigmoid",
1414
+ "split": "eval",
1415
+ "rows": 36000,
1416
+ "pre_ece": 0.06013730731601039,
1417
+ "post_ece": 0.0007514102792564575,
1418
+ "pre_accuracy": 0.9978333333333333,
1419
+ "post_accuracy": 0.9981666666666666,
1420
+ "accuracy_delta": 0.0003333333333332966,
1421
+ "tasks": {
1422
+ "constraint_scope": {
1423
+ "rows": 9000,
1424
+ "pre_ece": 0.0073926387441783925,
1425
+ "post_ece": 0.00019267901287378653,
1426
+ "pre_accuracy": 1.0,
1427
+ "post_accuracy": 1.0,
1428
+ "accuracy_delta": 0.0
1429
+ },
1430
+ "constraint_stability": {
1431
+ "rows": 3000,
1432
+ "pre_ece": 0.0035802200536977353,
1433
+ "post_ece": 0.00016981111181246789,
1434
+ "pre_accuracy": 1.0,
1435
+ "post_accuracy": 1.0,
1436
+ "accuracy_delta": 0.0
1437
+ },
1438
+ "constraint_type": {
1439
+ "rows": 8000,
1440
+ "pre_ece": 0.0152260080692046,
1441
+ "post_ece": 0.0002949549691239062,
1442
+ "pre_accuracy": 1.0,
1443
+ "post_accuracy": 1.0,
1444
+ "accuracy_delta": 0.0
1445
+ },
1446
+ "fact_type": {
1447
+ "rows": 6000,
1448
+ "pre_ece": 0.008209015508071071,
1449
+ "post_ece": 0.00021106970310058593,
1450
+ "pre_accuracy": 1.0,
1451
+ "post_accuracy": 1.0,
1452
+ "accuracy_delta": 0.0
1453
+ },
1454
+ "pii_presence": {
1455
+ "rows": 10000,
1456
+ "pre_ece": 0.1278197693399604,
1457
+ "post_ece": 0.002118116763033487,
1458
+ "pre_accuracy": 0.9922,
1459
+ "post_accuracy": 0.9934,
1460
+ "accuracy_delta": 0.0011999999999999789
1461
+ }
1462
+ }
1463
+ }
1464
+ }
extractor_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6fc16d14207c75cec1dcf807e7e4f43381a3e92ed46e4d0471fed24a2a33dc
3
+ size 66247427
extractor_report_eval.json ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "split": "eval",
4
+ "overall": {
5
+ "constraint_scope::finance": {
6
+ "precision": 1.0,
7
+ "recall": 1.0,
8
+ "f1-score": 1.0,
9
+ "support": 1000.0
10
+ },
11
+ "constraint_scope::food": {
12
+ "precision": 1.0,
13
+ "recall": 1.0,
14
+ "f1-score": 1.0,
15
+ "support": 1000.0
16
+ },
17
+ "constraint_scope::general": {
18
+ "precision": 1.0,
19
+ "recall": 1.0,
20
+ "f1-score": 1.0,
21
+ "support": 1000.0
22
+ },
23
+ "constraint_scope::health": {
24
+ "precision": 1.0,
25
+ "recall": 1.0,
26
+ "f1-score": 1.0,
27
+ "support": 1000.0
28
+ },
29
+ "constraint_scope::none": {
30
+ "precision": 1.0,
31
+ "recall": 1.0,
32
+ "f1-score": 1.0,
33
+ "support": 1000.0
34
+ },
35
+ "constraint_scope::social": {
36
+ "precision": 1.0,
37
+ "recall": 1.0,
38
+ "f1-score": 1.0,
39
+ "support": 1000.0
40
+ },
41
+ "constraint_scope::tech": {
42
+ "precision": 1.0,
43
+ "recall": 1.0,
44
+ "f1-score": 1.0,
45
+ "support": 1000.0
46
+ },
47
+ "constraint_scope::travel": {
48
+ "precision": 1.0,
49
+ "recall": 1.0,
50
+ "f1-score": 1.0,
51
+ "support": 1000.0
52
+ },
53
+ "constraint_scope::work": {
54
+ "precision": 1.0,
55
+ "recall": 1.0,
56
+ "f1-score": 1.0,
57
+ "support": 1000.0
58
+ },
59
+ "constraint_stability::semi_stable": {
60
+ "precision": 1.0,
61
+ "recall": 1.0,
62
+ "f1-score": 1.0,
63
+ "support": 1000.0
64
+ },
65
+ "constraint_stability::stable": {
66
+ "precision": 1.0,
67
+ "recall": 1.0,
68
+ "f1-score": 1.0,
69
+ "support": 1000.0
70
+ },
71
+ "constraint_stability::volatile": {
72
+ "precision": 1.0,
73
+ "recall": 1.0,
74
+ "f1-score": 1.0,
75
+ "support": 1000.0
76
+ },
77
+ "constraint_type::causal": {
78
+ "precision": 1.0,
79
+ "recall": 1.0,
80
+ "f1-score": 1.0,
81
+ "support": 1000.0
82
+ },
83
+ "constraint_type::constraint_other": {
84
+ "precision": 1.0,
85
+ "recall": 1.0,
86
+ "f1-score": 1.0,
87
+ "support": 1000.0
88
+ },
89
+ "constraint_type::goal": {
90
+ "precision": 1.0,
91
+ "recall": 1.0,
92
+ "f1-score": 1.0,
93
+ "support": 1000.0
94
+ },
95
+ "constraint_type::none": {
96
+ "precision": 1.0,
97
+ "recall": 1.0,
98
+ "f1-score": 1.0,
99
+ "support": 1000.0
100
+ },
101
+ "constraint_type::policy": {
102
+ "precision": 1.0,
103
+ "recall": 1.0,
104
+ "f1-score": 1.0,
105
+ "support": 1000.0
106
+ },
107
+ "constraint_type::preference": {
108
+ "precision": 1.0,
109
+ "recall": 1.0,
110
+ "f1-score": 1.0,
111
+ "support": 1000.0
112
+ },
113
+ "constraint_type::state": {
114
+ "precision": 1.0,
115
+ "recall": 1.0,
116
+ "f1-score": 1.0,
117
+ "support": 1000.0
118
+ },
119
+ "constraint_type::value": {
120
+ "precision": 1.0,
121
+ "recall": 1.0,
122
+ "f1-score": 1.0,
123
+ "support": 1000.0
124
+ },
125
+ "fact_type::identity": {
126
+ "precision": 1.0,
127
+ "recall": 1.0,
128
+ "f1-score": 1.0,
129
+ "support": 1000.0
130
+ },
131
+ "fact_type::location": {
132
+ "precision": 1.0,
133
+ "recall": 1.0,
134
+ "f1-score": 1.0,
135
+ "support": 1000.0
136
+ },
137
+ "fact_type::none": {
138
+ "precision": 1.0,
139
+ "recall": 1.0,
140
+ "f1-score": 1.0,
141
+ "support": 1000.0
142
+ },
143
+ "fact_type::occupation": {
144
+ "precision": 1.0,
145
+ "recall": 1.0,
146
+ "f1-score": 1.0,
147
+ "support": 1000.0
148
+ },
149
+ "fact_type::other_fact": {
150
+ "precision": 1.0,
151
+ "recall": 1.0,
152
+ "f1-score": 1.0,
153
+ "support": 1000.0
154
+ },
155
+ "fact_type::preference": {
156
+ "precision": 1.0,
157
+ "recall": 1.0,
158
+ "f1-score": 1.0,
159
+ "support": 1000.0
160
+ },
161
+ "pii_presence::no_pii": {
162
+ "precision": 0.9912385503783353,
163
+ "recall": 0.9956,
164
+ "f1-score": 0.9934144881261225,
165
+ "support": 5000.0
166
+ },
167
+ "pii_presence::pii": {
168
+ "precision": 0.995580554439534,
169
+ "recall": 0.9912,
170
+ "f1-score": 0.9933854479855683,
171
+ "support": 5000.0
172
+ },
173
+ "accuracy": 0.9981666666666666,
174
+ "macro avg": {
175
+ "precision": 0.9995292537434953,
176
+ "recall": 0.9995285714285714,
177
+ "f1-score": 0.9995285691468461,
178
+ "support": 36000.0
179
+ },
180
+ "weighted avg": {
181
+ "precision": 0.9981693201135929,
182
+ "recall": 0.9981666666666666,
183
+ "f1-score": 0.9981666577932904,
184
+ "support": 36000.0
185
+ }
186
+ },
187
+ "per_task": {
188
+ "constraint_scope": {
189
+ "finance": {
190
+ "precision": 1.0,
191
+ "recall": 1.0,
192
+ "f1-score": 1.0,
193
+ "support": 1000.0
194
+ },
195
+ "food": {
196
+ "precision": 1.0,
197
+ "recall": 1.0,
198
+ "f1-score": 1.0,
199
+ "support": 1000.0
200
+ },
201
+ "general": {
202
+ "precision": 1.0,
203
+ "recall": 1.0,
204
+ "f1-score": 1.0,
205
+ "support": 1000.0
206
+ },
207
+ "health": {
208
+ "precision": 1.0,
209
+ "recall": 1.0,
210
+ "f1-score": 1.0,
211
+ "support": 1000.0
212
+ },
213
+ "none": {
214
+ "precision": 1.0,
215
+ "recall": 1.0,
216
+ "f1-score": 1.0,
217
+ "support": 1000.0
218
+ },
219
+ "social": {
220
+ "precision": 1.0,
221
+ "recall": 1.0,
222
+ "f1-score": 1.0,
223
+ "support": 1000.0
224
+ },
225
+ "tech": {
226
+ "precision": 1.0,
227
+ "recall": 1.0,
228
+ "f1-score": 1.0,
229
+ "support": 1000.0
230
+ },
231
+ "travel": {
232
+ "precision": 1.0,
233
+ "recall": 1.0,
234
+ "f1-score": 1.0,
235
+ "support": 1000.0
236
+ },
237
+ "work": {
238
+ "precision": 1.0,
239
+ "recall": 1.0,
240
+ "f1-score": 1.0,
241
+ "support": 1000.0
242
+ },
243
+ "accuracy": 1.0,
244
+ "macro avg": {
245
+ "precision": 1.0,
246
+ "recall": 1.0,
247
+ "f1-score": 1.0,
248
+ "support": 9000.0
249
+ },
250
+ "weighted avg": {
251
+ "precision": 1.0,
252
+ "recall": 1.0,
253
+ "f1-score": 1.0,
254
+ "support": 9000.0
255
+ }
256
+ },
257
+ "constraint_stability": {
258
+ "semi_stable": {
259
+ "precision": 1.0,
260
+ "recall": 1.0,
261
+ "f1-score": 1.0,
262
+ "support": 1000.0
263
+ },
264
+ "stable": {
265
+ "precision": 1.0,
266
+ "recall": 1.0,
267
+ "f1-score": 1.0,
268
+ "support": 1000.0
269
+ },
270
+ "volatile": {
271
+ "precision": 1.0,
272
+ "recall": 1.0,
273
+ "f1-score": 1.0,
274
+ "support": 1000.0
275
+ },
276
+ "accuracy": 1.0,
277
+ "macro avg": {
278
+ "precision": 1.0,
279
+ "recall": 1.0,
280
+ "f1-score": 1.0,
281
+ "support": 3000.0
282
+ },
283
+ "weighted avg": {
284
+ "precision": 1.0,
285
+ "recall": 1.0,
286
+ "f1-score": 1.0,
287
+ "support": 3000.0
288
+ }
289
+ },
290
+ "constraint_type": {
291
+ "causal": {
292
+ "precision": 1.0,
293
+ "recall": 1.0,
294
+ "f1-score": 1.0,
295
+ "support": 1000.0
296
+ },
297
+ "constraint_other": {
298
+ "precision": 1.0,
299
+ "recall": 1.0,
300
+ "f1-score": 1.0,
301
+ "support": 1000.0
302
+ },
303
+ "goal": {
304
+ "precision": 1.0,
305
+ "recall": 1.0,
306
+ "f1-score": 1.0,
307
+ "support": 1000.0
308
+ },
309
+ "none": {
310
+ "precision": 1.0,
311
+ "recall": 1.0,
312
+ "f1-score": 1.0,
313
+ "support": 1000.0
314
+ },
315
+ "policy": {
316
+ "precision": 1.0,
317
+ "recall": 1.0,
318
+ "f1-score": 1.0,
319
+ "support": 1000.0
320
+ },
321
+ "preference": {
322
+ "precision": 1.0,
323
+ "recall": 1.0,
324
+ "f1-score": 1.0,
325
+ "support": 1000.0
326
+ },
327
+ "state": {
328
+ "precision": 1.0,
329
+ "recall": 1.0,
330
+ "f1-score": 1.0,
331
+ "support": 1000.0
332
+ },
333
+ "value": {
334
+ "precision": 1.0,
335
+ "recall": 1.0,
336
+ "f1-score": 1.0,
337
+ "support": 1000.0
338
+ },
339
+ "accuracy": 1.0,
340
+ "macro avg": {
341
+ "precision": 1.0,
342
+ "recall": 1.0,
343
+ "f1-score": 1.0,
344
+ "support": 8000.0
345
+ },
346
+ "weighted avg": {
347
+ "precision": 1.0,
348
+ "recall": 1.0,
349
+ "f1-score": 1.0,
350
+ "support": 8000.0
351
+ }
352
+ },
353
+ "fact_type": {
354
+ "identity": {
355
+ "precision": 1.0,
356
+ "recall": 1.0,
357
+ "f1-score": 1.0,
358
+ "support": 1000.0
359
+ },
360
+ "location": {
361
+ "precision": 1.0,
362
+ "recall": 1.0,
363
+ "f1-score": 1.0,
364
+ "support": 1000.0
365
+ },
366
+ "none": {
367
+ "precision": 1.0,
368
+ "recall": 1.0,
369
+ "f1-score": 1.0,
370
+ "support": 1000.0
371
+ },
372
+ "occupation": {
373
+ "precision": 1.0,
374
+ "recall": 1.0,
375
+ "f1-score": 1.0,
376
+ "support": 1000.0
377
+ },
378
+ "other_fact": {
379
+ "precision": 1.0,
380
+ "recall": 1.0,
381
+ "f1-score": 1.0,
382
+ "support": 1000.0
383
+ },
384
+ "preference": {
385
+ "precision": 1.0,
386
+ "recall": 1.0,
387
+ "f1-score": 1.0,
388
+ "support": 1000.0
389
+ },
390
+ "accuracy": 1.0,
391
+ "macro avg": {
392
+ "precision": 1.0,
393
+ "recall": 1.0,
394
+ "f1-score": 1.0,
395
+ "support": 6000.0
396
+ },
397
+ "weighted avg": {
398
+ "precision": 1.0,
399
+ "recall": 1.0,
400
+ "f1-score": 1.0,
401
+ "support": 6000.0
402
+ }
403
+ },
404
+ "pii_presence": {
405
+ "no_pii": {
406
+ "precision": 0.9912385503783353,
407
+ "recall": 0.9956,
408
+ "f1-score": 0.9934144881261225,
409
+ "support": 5000.0
410
+ },
411
+ "pii": {
412
+ "precision": 0.995580554439534,
413
+ "recall": 0.9912,
414
+ "f1-score": 0.9933854479855683,
415
+ "support": 5000.0
416
+ },
417
+ "accuracy": 0.9934,
418
+ "macro avg": {
419
+ "precision": 0.9934095524089346,
420
+ "recall": 0.9934000000000001,
421
+ "f1-score": 0.9933999680558454,
422
+ "support": 10000.0
423
+ },
424
+ "weighted avg": {
425
+ "precision": 0.9934095524089346,
426
+ "recall": 0.9934,
427
+ "f1-score": 0.9933999680558454,
428
+ "support": 10000.0
429
+ }
430
+ }
431
+ }
432
+ }
extractor_report_test.json ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "split": "test",
4
+ "overall": {
5
+ "constraint_scope::finance": {
6
+ "precision": 1.0,
7
+ "recall": 1.0,
8
+ "f1-score": 1.0,
9
+ "support": 1000.0
10
+ },
11
+ "constraint_scope::food": {
12
+ "precision": 1.0,
13
+ "recall": 1.0,
14
+ "f1-score": 1.0,
15
+ "support": 1000.0
16
+ },
17
+ "constraint_scope::general": {
18
+ "precision": 1.0,
19
+ "recall": 1.0,
20
+ "f1-score": 1.0,
21
+ "support": 1000.0
22
+ },
23
+ "constraint_scope::health": {
24
+ "precision": 1.0,
25
+ "recall": 1.0,
26
+ "f1-score": 1.0,
27
+ "support": 1000.0
28
+ },
29
+ "constraint_scope::none": {
30
+ "precision": 1.0,
31
+ "recall": 1.0,
32
+ "f1-score": 1.0,
33
+ "support": 1000.0
34
+ },
35
+ "constraint_scope::social": {
36
+ "precision": 1.0,
37
+ "recall": 1.0,
38
+ "f1-score": 1.0,
39
+ "support": 1000.0
40
+ },
41
+ "constraint_scope::tech": {
42
+ "precision": 1.0,
43
+ "recall": 1.0,
44
+ "f1-score": 1.0,
45
+ "support": 1000.0
46
+ },
47
+ "constraint_scope::travel": {
48
+ "precision": 1.0,
49
+ "recall": 1.0,
50
+ "f1-score": 1.0,
51
+ "support": 1000.0
52
+ },
53
+ "constraint_scope::work": {
54
+ "precision": 1.0,
55
+ "recall": 1.0,
56
+ "f1-score": 1.0,
57
+ "support": 1000.0
58
+ },
59
+ "constraint_stability::semi_stable": {
60
+ "precision": 1.0,
61
+ "recall": 1.0,
62
+ "f1-score": 1.0,
63
+ "support": 1000.0
64
+ },
65
+ "constraint_stability::stable": {
66
+ "precision": 1.0,
67
+ "recall": 1.0,
68
+ "f1-score": 1.0,
69
+ "support": 1000.0
70
+ },
71
+ "constraint_stability::volatile": {
72
+ "precision": 1.0,
73
+ "recall": 1.0,
74
+ "f1-score": 1.0,
75
+ "support": 1000.0
76
+ },
77
+ "constraint_type::causal": {
78
+ "precision": 1.0,
79
+ "recall": 1.0,
80
+ "f1-score": 1.0,
81
+ "support": 1000.0
82
+ },
83
+ "constraint_type::constraint_other": {
84
+ "precision": 1.0,
85
+ "recall": 1.0,
86
+ "f1-score": 1.0,
87
+ "support": 1000.0
88
+ },
89
+ "constraint_type::goal": {
90
+ "precision": 1.0,
91
+ "recall": 1.0,
92
+ "f1-score": 1.0,
93
+ "support": 1000.0
94
+ },
95
+ "constraint_type::none": {
96
+ "precision": 1.0,
97
+ "recall": 1.0,
98
+ "f1-score": 1.0,
99
+ "support": 1000.0
100
+ },
101
+ "constraint_type::policy": {
102
+ "precision": 1.0,
103
+ "recall": 1.0,
104
+ "f1-score": 1.0,
105
+ "support": 1000.0
106
+ },
107
+ "constraint_type::preference": {
108
+ "precision": 1.0,
109
+ "recall": 1.0,
110
+ "f1-score": 1.0,
111
+ "support": 1000.0
112
+ },
113
+ "constraint_type::state": {
114
+ "precision": 1.0,
115
+ "recall": 1.0,
116
+ "f1-score": 1.0,
117
+ "support": 1000.0
118
+ },
119
+ "constraint_type::value": {
120
+ "precision": 1.0,
121
+ "recall": 1.0,
122
+ "f1-score": 1.0,
123
+ "support": 1000.0
124
+ },
125
+ "fact_type::identity": {
126
+ "precision": 1.0,
127
+ "recall": 1.0,
128
+ "f1-score": 1.0,
129
+ "support": 1000.0
130
+ },
131
+ "fact_type::location": {
132
+ "precision": 1.0,
133
+ "recall": 1.0,
134
+ "f1-score": 1.0,
135
+ "support": 1000.0
136
+ },
137
+ "fact_type::none": {
138
+ "precision": 1.0,
139
+ "recall": 1.0,
140
+ "f1-score": 1.0,
141
+ "support": 1000.0
142
+ },
143
+ "fact_type::occupation": {
144
+ "precision": 1.0,
145
+ "recall": 1.0,
146
+ "f1-score": 1.0,
147
+ "support": 1000.0
148
+ },
149
+ "fact_type::other_fact": {
150
+ "precision": 1.0,
151
+ "recall": 1.0,
152
+ "f1-score": 1.0,
153
+ "support": 1000.0
154
+ },
155
+ "fact_type::preference": {
156
+ "precision": 1.0,
157
+ "recall": 1.0,
158
+ "f1-score": 1.0,
159
+ "support": 1000.0
160
+ },
161
+ "pii_presence::no_pii": {
162
+ "precision": 0.9900139804274016,
163
+ "recall": 0.9914,
164
+ "f1-score": 0.9907065054461877,
165
+ "support": 5000.0
166
+ },
167
+ "pii_presence::pii": {
168
+ "precision": 0.9913879431203685,
169
+ "recall": 0.99,
170
+ "f1-score": 0.9906934854398078,
171
+ "support": 5000.0
172
+ },
173
+ "accuracy": 0.9974166666666666,
174
+ "macro avg": {
175
+ "precision": 0.999335782983849,
176
+ "recall": 0.9993357142857142,
177
+ "f1-score": 0.9993357139602141,
178
+ "support": 36000.0
179
+ },
180
+ "weighted avg": {
181
+ "precision": 0.9974169338260791,
182
+ "recall": 0.9974166666666666,
183
+ "f1-score": 0.9974166654008327,
184
+ "support": 36000.0
185
+ }
186
+ },
187
+ "per_task": {
188
+ "constraint_scope": {
189
+ "finance": {
190
+ "precision": 1.0,
191
+ "recall": 1.0,
192
+ "f1-score": 1.0,
193
+ "support": 1000.0
194
+ },
195
+ "food": {
196
+ "precision": 1.0,
197
+ "recall": 1.0,
198
+ "f1-score": 1.0,
199
+ "support": 1000.0
200
+ },
201
+ "general": {
202
+ "precision": 1.0,
203
+ "recall": 1.0,
204
+ "f1-score": 1.0,
205
+ "support": 1000.0
206
+ },
207
+ "health": {
208
+ "precision": 1.0,
209
+ "recall": 1.0,
210
+ "f1-score": 1.0,
211
+ "support": 1000.0
212
+ },
213
+ "none": {
214
+ "precision": 1.0,
215
+ "recall": 1.0,
216
+ "f1-score": 1.0,
217
+ "support": 1000.0
218
+ },
219
+ "social": {
220
+ "precision": 1.0,
221
+ "recall": 1.0,
222
+ "f1-score": 1.0,
223
+ "support": 1000.0
224
+ },
225
+ "tech": {
226
+ "precision": 1.0,
227
+ "recall": 1.0,
228
+ "f1-score": 1.0,
229
+ "support": 1000.0
230
+ },
231
+ "travel": {
232
+ "precision": 1.0,
233
+ "recall": 1.0,
234
+ "f1-score": 1.0,
235
+ "support": 1000.0
236
+ },
237
+ "work": {
238
+ "precision": 1.0,
239
+ "recall": 1.0,
240
+ "f1-score": 1.0,
241
+ "support": 1000.0
242
+ },
243
+ "accuracy": 1.0,
244
+ "macro avg": {
245
+ "precision": 1.0,
246
+ "recall": 1.0,
247
+ "f1-score": 1.0,
248
+ "support": 9000.0
249
+ },
250
+ "weighted avg": {
251
+ "precision": 1.0,
252
+ "recall": 1.0,
253
+ "f1-score": 1.0,
254
+ "support": 9000.0
255
+ }
256
+ },
257
+ "constraint_stability": {
258
+ "semi_stable": {
259
+ "precision": 1.0,
260
+ "recall": 1.0,
261
+ "f1-score": 1.0,
262
+ "support": 1000.0
263
+ },
264
+ "stable": {
265
+ "precision": 1.0,
266
+ "recall": 1.0,
267
+ "f1-score": 1.0,
268
+ "support": 1000.0
269
+ },
270
+ "volatile": {
271
+ "precision": 1.0,
272
+ "recall": 1.0,
273
+ "f1-score": 1.0,
274
+ "support": 1000.0
275
+ },
276
+ "accuracy": 1.0,
277
+ "macro avg": {
278
+ "precision": 1.0,
279
+ "recall": 1.0,
280
+ "f1-score": 1.0,
281
+ "support": 3000.0
282
+ },
283
+ "weighted avg": {
284
+ "precision": 1.0,
285
+ "recall": 1.0,
286
+ "f1-score": 1.0,
287
+ "support": 3000.0
288
+ }
289
+ },
290
+ "constraint_type": {
291
+ "causal": {
292
+ "precision": 1.0,
293
+ "recall": 1.0,
294
+ "f1-score": 1.0,
295
+ "support": 1000.0
296
+ },
297
+ "constraint_other": {
298
+ "precision": 1.0,
299
+ "recall": 1.0,
300
+ "f1-score": 1.0,
301
+ "support": 1000.0
302
+ },
303
+ "goal": {
304
+ "precision": 1.0,
305
+ "recall": 1.0,
306
+ "f1-score": 1.0,
307
+ "support": 1000.0
308
+ },
309
+ "none": {
310
+ "precision": 1.0,
311
+ "recall": 1.0,
312
+ "f1-score": 1.0,
313
+ "support": 1000.0
314
+ },
315
+ "policy": {
316
+ "precision": 1.0,
317
+ "recall": 1.0,
318
+ "f1-score": 1.0,
319
+ "support": 1000.0
320
+ },
321
+ "preference": {
322
+ "precision": 1.0,
323
+ "recall": 1.0,
324
+ "f1-score": 1.0,
325
+ "support": 1000.0
326
+ },
327
+ "state": {
328
+ "precision": 1.0,
329
+ "recall": 1.0,
330
+ "f1-score": 1.0,
331
+ "support": 1000.0
332
+ },
333
+ "value": {
334
+ "precision": 1.0,
335
+ "recall": 1.0,
336
+ "f1-score": 1.0,
337
+ "support": 1000.0
338
+ },
339
+ "accuracy": 1.0,
340
+ "macro avg": {
341
+ "precision": 1.0,
342
+ "recall": 1.0,
343
+ "f1-score": 1.0,
344
+ "support": 8000.0
345
+ },
346
+ "weighted avg": {
347
+ "precision": 1.0,
348
+ "recall": 1.0,
349
+ "f1-score": 1.0,
350
+ "support": 8000.0
351
+ }
352
+ },
353
+ "fact_type": {
354
+ "identity": {
355
+ "precision": 1.0,
356
+ "recall": 1.0,
357
+ "f1-score": 1.0,
358
+ "support": 1000.0
359
+ },
360
+ "location": {
361
+ "precision": 1.0,
362
+ "recall": 1.0,
363
+ "f1-score": 1.0,
364
+ "support": 1000.0
365
+ },
366
+ "none": {
367
+ "precision": 1.0,
368
+ "recall": 1.0,
369
+ "f1-score": 1.0,
370
+ "support": 1000.0
371
+ },
372
+ "occupation": {
373
+ "precision": 1.0,
374
+ "recall": 1.0,
375
+ "f1-score": 1.0,
376
+ "support": 1000.0
377
+ },
378
+ "other_fact": {
379
+ "precision": 1.0,
380
+ "recall": 1.0,
381
+ "f1-score": 1.0,
382
+ "support": 1000.0
383
+ },
384
+ "preference": {
385
+ "precision": 1.0,
386
+ "recall": 1.0,
387
+ "f1-score": 1.0,
388
+ "support": 1000.0
389
+ },
390
+ "accuracy": 1.0,
391
+ "macro avg": {
392
+ "precision": 1.0,
393
+ "recall": 1.0,
394
+ "f1-score": 1.0,
395
+ "support": 6000.0
396
+ },
397
+ "weighted avg": {
398
+ "precision": 1.0,
399
+ "recall": 1.0,
400
+ "f1-score": 1.0,
401
+ "support": 6000.0
402
+ }
403
+ },
404
+ "pii_presence": {
405
+ "no_pii": {
406
+ "precision": 0.9900139804274016,
407
+ "recall": 0.9914,
408
+ "f1-score": 0.9907065054461877,
409
+ "support": 5000.0
410
+ },
411
+ "pii": {
412
+ "precision": 0.9913879431203685,
413
+ "recall": 0.99,
414
+ "f1-score": 0.9906934854398078,
415
+ "support": 5000.0
416
+ },
417
+ "accuracy": 0.9907,
418
+ "macro avg": {
419
+ "precision": 0.9907009617738851,
420
+ "recall": 0.9906999999999999,
421
+ "f1-score": 0.9906999954429978,
422
+ "support": 10000.0
423
+ },
424
+ "weighted avg": {
425
+ "precision": 0.9907009617738851,
426
+ "recall": 0.9907,
427
+ "f1-score": 0.9906999954429977,
428
+ "support": 10000.0
429
+ }
430
+ }
431
+ }
432
+ }
extractor_training_metadata.json ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "extractor",
3
+ "trained_at_utc": "2026-03-24T13:45:54.396697+00:00",
4
+ "rows": {
5
+ "train": 288000,
6
+ "test": 36000,
7
+ "eval": 36000
8
+ },
9
+ "tasks": [
10
+ "constraint_scope",
11
+ "constraint_stability",
12
+ "constraint_type",
13
+ "fact_type",
14
+ "pii_presence"
15
+ ],
16
+ "task_label_counts": {
17
+ "train": {
18
+ "constraint_scope": {
19
+ "finance": 8000,
20
+ "food": 8000,
21
+ "general": 8000,
22
+ "health": 8000,
23
+ "none": 8000,
24
+ "social": 8000,
25
+ "tech": 8000,
26
+ "travel": 8000,
27
+ "work": 8000
28
+ },
29
+ "constraint_stability": {
30
+ "semi_stable": 8000,
31
+ "stable": 8000,
32
+ "volatile": 8000
33
+ },
34
+ "constraint_type": {
35
+ "causal": 8000,
36
+ "constraint_other": 8000,
37
+ "goal": 8000,
38
+ "none": 8000,
39
+ "policy": 8000,
40
+ "preference": 8000,
41
+ "state": 8000,
42
+ "value": 8000
43
+ },
44
+ "fact_type": {
45
+ "identity": 8000,
46
+ "location": 8000,
47
+ "none": 8000,
48
+ "occupation": 8000,
49
+ "other_fact": 8000,
50
+ "preference": 8000
51
+ },
52
+ "pii_presence": {
53
+ "no_pii": 40000,
54
+ "pii": 40000
55
+ }
56
+ },
57
+ "test": {
58
+ "constraint_scope": {
59
+ "finance": 1000,
60
+ "food": 1000,
61
+ "general": 1000,
62
+ "health": 1000,
63
+ "none": 1000,
64
+ "social": 1000,
65
+ "tech": 1000,
66
+ "travel": 1000,
67
+ "work": 1000
68
+ },
69
+ "constraint_stability": {
70
+ "semi_stable": 1000,
71
+ "stable": 1000,
72
+ "volatile": 1000
73
+ },
74
+ "constraint_type": {
75
+ "causal": 1000,
76
+ "constraint_other": 1000,
77
+ "goal": 1000,
78
+ "none": 1000,
79
+ "policy": 1000,
80
+ "preference": 1000,
81
+ "state": 1000,
82
+ "value": 1000
83
+ },
84
+ "fact_type": {
85
+ "identity": 1000,
86
+ "location": 1000,
87
+ "none": 1000,
88
+ "occupation": 1000,
89
+ "other_fact": 1000,
90
+ "preference": 1000
91
+ },
92
+ "pii_presence": {
93
+ "no_pii": 5000,
94
+ "pii": 5000
95
+ }
96
+ },
97
+ "eval": {
98
+ "constraint_scope": {
99
+ "finance": 1000,
100
+ "food": 1000,
101
+ "general": 1000,
102
+ "health": 1000,
103
+ "none": 1000,
104
+ "social": 1000,
105
+ "tech": 1000,
106
+ "travel": 1000,
107
+ "work": 1000
108
+ },
109
+ "constraint_stability": {
110
+ "semi_stable": 1000,
111
+ "stable": 1000,
112
+ "volatile": 1000
113
+ },
114
+ "constraint_type": {
115
+ "causal": 1000,
116
+ "constraint_other": 1000,
117
+ "goal": 1000,
118
+ "none": 1000,
119
+ "policy": 1000,
120
+ "preference": 1000,
121
+ "state": 1000,
122
+ "value": 1000
123
+ },
124
+ "fact_type": {
125
+ "identity": 1000,
126
+ "location": 1000,
127
+ "none": 1000,
128
+ "occupation": 1000,
129
+ "other_fact": 1000,
130
+ "preference": 1000
131
+ },
132
+ "pii_presence": {
133
+ "no_pii": 5000,
134
+ "pii": 5000
135
+ }
136
+ }
137
+ },
138
+ "labels": [
139
+ "constraint_scope::finance",
140
+ "constraint_scope::food",
141
+ "constraint_scope::general",
142
+ "constraint_scope::health",
143
+ "constraint_scope::none",
144
+ "constraint_scope::social",
145
+ "constraint_scope::tech",
146
+ "constraint_scope::travel",
147
+ "constraint_scope::work",
148
+ "constraint_stability::semi_stable",
149
+ "constraint_stability::stable",
150
+ "constraint_stability::volatile",
151
+ "constraint_type::causal",
152
+ "constraint_type::constraint_other",
153
+ "constraint_type::goal",
154
+ "constraint_type::none",
155
+ "constraint_type::policy",
156
+ "constraint_type::preference",
157
+ "constraint_type::state",
158
+ "constraint_type::value",
159
+ "fact_type::identity",
160
+ "fact_type::location",
161
+ "fact_type::none",
162
+ "fact_type::occupation",
163
+ "fact_type::other_fact",
164
+ "fact_type::preference",
165
+ "pii_presence::no_pii",
166
+ "pii_presence::pii"
167
+ ],
168
+ "label_to_id": {
169
+ "constraint_scope::finance": 0,
170
+ "constraint_scope::food": 1,
171
+ "constraint_scope::general": 2,
172
+ "constraint_scope::health": 3,
173
+ "constraint_scope::none": 4,
174
+ "constraint_scope::social": 5,
175
+ "constraint_scope::tech": 6,
176
+ "constraint_scope::travel": 7,
177
+ "constraint_scope::work": 8,
178
+ "constraint_stability::semi_stable": 9,
179
+ "constraint_stability::stable": 10,
180
+ "constraint_stability::volatile": 11,
181
+ "constraint_type::causal": 12,
182
+ "constraint_type::constraint_other": 13,
183
+ "constraint_type::goal": 14,
184
+ "constraint_type::none": 15,
185
+ "constraint_type::policy": 16,
186
+ "constraint_type::preference": 17,
187
+ "constraint_type::state": 18,
188
+ "constraint_type::value": 19,
189
+ "fact_type::identity": 20,
190
+ "fact_type::location": 21,
191
+ "fact_type::none": 22,
192
+ "fact_type::occupation": 23,
193
+ "fact_type::other_fact": 24,
194
+ "fact_type::preference": 25,
195
+ "pii_presence::no_pii": 26,
196
+ "pii_presence::pii": 27
197
+ },
198
+ "train_config": {
199
+ "max_features": 250000,
200
+ "min_df": 2,
201
+ "ngram_min": 1,
202
+ "ngram_max": 2,
203
+ "max_iter": 25,
204
+ "alpha": 1e-05,
205
+ "seed": 42,
206
+ "predict_batch_size": 8192,
207
+ "early_stopping": true,
208
+ "early_stopping_patience": 3,
209
+ "early_stopping_metric": "macro_f1",
210
+ "early_stopping_min_delta": 0.001,
211
+ "calibration_method": "sigmoid"
212
+ },
213
+ "training_summary": {
214
+ "actual_epochs": 4,
215
+ "best_epoch": 1,
216
+ "best_metric": 0.9994428567862855,
217
+ "monitor_metric": "macro_f1",
218
+ "early_stopped": true
219
+ },
220
+ "calibration": {
221
+ "method": "task_conditional_sigmoid",
222
+ "split": "eval",
223
+ "rows": 36000,
224
+ "pre_ece": 0.06013730731601039,
225
+ "post_ece": 0.0007514102792564575,
226
+ "pre_accuracy": 0.9978333333333333,
227
+ "post_accuracy": 0.9981666666666666,
228
+ "accuracy_delta": 0.0003333333333332966,
229
+ "tasks": {
230
+ "constraint_scope": {
231
+ "rows": 9000,
232
+ "pre_ece": 0.0073926387441783925,
233
+ "post_ece": 0.00019267901287378653,
234
+ "pre_accuracy": 1.0,
235
+ "post_accuracy": 1.0,
236
+ "accuracy_delta": 0.0
237
+ },
238
+ "constraint_stability": {
239
+ "rows": 3000,
240
+ "pre_ece": 0.0035802200536977353,
241
+ "post_ece": 0.00016981111181246789,
242
+ "pre_accuracy": 1.0,
243
+ "post_accuracy": 1.0,
244
+ "accuracy_delta": 0.0
245
+ },
246
+ "constraint_type": {
247
+ "rows": 8000,
248
+ "pre_ece": 0.0152260080692046,
249
+ "post_ece": 0.0002949549691239062,
250
+ "pre_accuracy": 1.0,
251
+ "post_accuracy": 1.0,
252
+ "accuracy_delta": 0.0
253
+ },
254
+ "fact_type": {
255
+ "rows": 6000,
256
+ "pre_ece": 0.008209015508071071,
257
+ "post_ece": 0.00021106970310058593,
258
+ "pre_accuracy": 1.0,
259
+ "post_accuracy": 1.0,
260
+ "accuracy_delta": 0.0
261
+ },
262
+ "pii_presence": {
263
+ "rows": 10000,
264
+ "pre_ece": 0.1278197693399604,
265
+ "post_ece": 0.002118116763033487,
266
+ "pre_accuracy": 0.9922,
267
+ "post_accuracy": 0.9934,
268
+ "accuracy_delta": 0.0011999999999999789
269
+ }
270
+ }
271
+ },
272
+ "epoch_stats": [
273
+ {
274
+ "epoch": 1,
275
+ "train_loss": 0.07304321517285889,
276
+ "train_accuracy": 0.9985243055555556,
277
+ "train_macro_f1": 0.9996205356573068,
278
+ "train_weighted_f1": 0.9985243053339707,
279
+ "valid_loss": 0.07040478728784708,
280
+ "valid_accuracy": 0.9978333333333333,
281
+ "valid_macro_f1": 0.9994428567862855,
282
+ "valid_weighted_f1": 0.9978333319466658,
283
+ "monitor_metric": "macro_f1",
284
+ "monitor_value": 0.9994428567862855,
285
+ "improved": true
286
+ },
287
+ {
288
+ "epoch": 2,
289
+ "train_loss": 0.07098693556871623,
290
+ "train_accuracy": 0.9985138888888889,
291
+ "train_macro_f1": 0.9996178570566361,
292
+ "train_weighted_f1": 0.998513888553585,
293
+ "valid_loss": 0.06895428509737356,
294
+ "valid_accuracy": 0.9979166666666667,
295
+ "valid_macro_f1": 0.9994642852803567,
296
+ "valid_weighted_f1": 0.9979166649791653,
297
+ "monitor_metric": "macro_f1",
298
+ "monitor_value": 0.9994642852803567,
299
+ "improved": false
300
+ },
301
+ {
302
+ "epoch": 3,
303
+ "train_loss": 0.06992293075722475,
304
+ "train_accuracy": 0.9984930555555556,
305
+ "train_macro_f1": 0.9996124998827812,
306
+ "train_weighted_f1": 0.9984930550997047,
307
+ "valid_loss": 0.06817925236885357,
308
+ "valid_accuracy": 0.9979166666666667,
309
+ "valid_macro_f1": 0.9994642852803567,
310
+ "valid_weighted_f1": 0.9979166649791653,
311
+ "monitor_metric": "macro_f1",
312
+ "monitor_value": 0.9994642852803567,
313
+ "improved": false
314
+ },
315
+ {
316
+ "epoch": 4,
317
+ "train_loss": 0.0692276576162906,
318
+ "train_accuracy": 0.9984861111111111,
319
+ "train_macro_f1": 0.9996107141570066,
320
+ "train_weighted_f1": 0.9984861106105815,
321
+ "valid_loss": 0.06766472301272425,
322
+ "valid_accuracy": 0.9979166666666667,
323
+ "valid_macro_f1": 0.9994642852803567,
324
+ "valid_weighted_f1": 0.9979166649791653,
325
+ "monitor_metric": "macro_f1",
326
+ "monitor_value": 0.9994642852803567,
327
+ "improved": false
328
+ }
329
+ ],
330
+ "skipped_invalid_tasks": {},
331
+ "artifact_scope": "family",
332
+ "evaluation_suite": "standard",
333
+ "dataset_hashes": {
334
+ "train": {
335
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/extractor_train.parquet",
336
+ "sha256": "7001566078ce06fa63779c2025be27b27a1fed4a304133dc334fcbf1336dcd1e",
337
+ "bytes": 21540433
338
+ },
339
+ "test": {
340
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/extractor_test.parquet",
341
+ "sha256": "8fa0af53f8cbd8116335d00659608404ba96055df49b1ab89799418c646a48d3",
342
+ "bytes": 2721025
343
+ },
344
+ "eval": {
345
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/extractor_eval.parquet",
346
+ "sha256": "d066db9c6cadeaea12f4c367262fee444f0992575af2daca06af75f40e9935ce",
347
+ "bytes": 2709873
348
+ }
349
+ }
350
+ }
fact_extraction_structured_epoch_stats.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "fact_extraction_structured",
3
+ "epoch_stats": [
4
+ {
5
+ "epoch": 1,
6
+ "train_loss": 0.1299236143381422
7
+ },
8
+ {
9
+ "epoch": 2,
10
+ "train_loss": 0.0057935971935330595
11
+ },
12
+ {
13
+ "epoch": 3,
14
+ "train_loss": 0.0012633003406408534
15
+ }
16
+ ],
17
+ "training_summary": {
18
+ "actual_epochs": 3,
19
+ "best_epoch": 3,
20
+ "early_stopped": false
21
+ },
22
+ "labels": {
23
+ "0": "O",
24
+ "1": "B-attribute",
25
+ "2": "I-attribute",
26
+ "3": "B-causal",
27
+ "4": "I-causal",
28
+ "5": "B-goal",
29
+ "6": "I-goal",
30
+ "7": "B-identity",
31
+ "8": "I-identity",
32
+ "9": "B-location",
33
+ "10": "I-location",
34
+ "11": "B-occupation",
35
+ "12": "I-occupation",
36
+ "13": "B-policy",
37
+ "14": "I-policy",
38
+ "15": "B-preference",
39
+ "16": "I-preference",
40
+ "17": "B-state",
41
+ "18": "I-state",
42
+ "19": "B-value",
43
+ "20": "I-value"
44
+ },
45
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/fact_extraction_structured_hf"
46
+ }
fact_extraction_structured_hf/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForTokenClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "bos_token_id": null,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "dtype": "float32",
11
+ "eos_token_id": null,
12
+ "hidden_dim": 3072,
13
+ "id2label": {
14
+ "0": "O",
15
+ "1": "B-attribute",
16
+ "2": "I-attribute",
17
+ "3": "B-causal",
18
+ "4": "I-causal",
19
+ "5": "B-goal",
20
+ "6": "I-goal",
21
+ "7": "B-identity",
22
+ "8": "I-identity",
23
+ "9": "B-location",
24
+ "10": "I-location",
25
+ "11": "B-occupation",
26
+ "12": "I-occupation",
27
+ "13": "B-policy",
28
+ "14": "I-policy",
29
+ "15": "B-preference",
30
+ "16": "I-preference",
31
+ "17": "B-state",
32
+ "18": "I-state",
33
+ "19": "B-value",
34
+ "20": "I-value"
35
+ },
36
+ "initializer_range": 0.02,
37
+ "label2id": {
38
+ "B-attribute": 1,
39
+ "B-causal": 3,
40
+ "B-goal": 5,
41
+ "B-identity": 7,
42
+ "B-location": 9,
43
+ "B-occupation": 11,
44
+ "B-policy": 13,
45
+ "B-preference": 15,
46
+ "B-state": 17,
47
+ "B-value": 19,
48
+ "I-attribute": 2,
49
+ "I-causal": 4,
50
+ "I-goal": 6,
51
+ "I-identity": 8,
52
+ "I-location": 10,
53
+ "I-occupation": 12,
54
+ "I-policy": 14,
55
+ "I-preference": 16,
56
+ "I-state": 18,
57
+ "I-value": 20,
58
+ "O": 0
59
+ },
60
+ "max_position_embeddings": 512,
61
+ "model_type": "distilbert",
62
+ "n_heads": 12,
63
+ "n_layers": 6,
64
+ "output_past": true,
65
+ "pad_token_id": 0,
66
+ "qa_dropout": 0.1,
67
+ "seq_classif_dropout": 0.2,
68
+ "sinusoidal_pos_embds": false,
69
+ "tie_weights_": true,
70
+ "tie_word_embeddings": true,
71
+ "transformers_version": "5.3.0",
72
+ "vocab_size": 119547
73
+ }
fact_extraction_structured_hf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5312fc9839dac32f3bdc0f25b68138531e869fe1006ebe02656134dda5597859
3
+ size 539013268
fact_extraction_structured_hf/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fact_extraction_structured_hf/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "is_local": false,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 512,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
fact_extraction_structured_metrics_eval.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "fact_extraction_structured",
3
+ "overall": {
4
+ "span_precision": 0.9980054849164797,
5
+ "span_recall": 0.99900174694285,
6
+ "span_f1": 0.9985033674232976,
7
+ "span_exact_match": 0.9987521836785626,
8
+ "rows": 4007
9
+ },
10
+ "labels": {
11
+ "0": "O",
12
+ "1": "B-attribute",
13
+ "2": "I-attribute",
14
+ "3": "B-causal",
15
+ "4": "I-causal",
16
+ "5": "B-goal",
17
+ "6": "I-goal",
18
+ "7": "B-identity",
19
+ "8": "I-identity",
20
+ "9": "B-location",
21
+ "10": "I-location",
22
+ "11": "B-occupation",
23
+ "12": "I-occupation",
24
+ "13": "B-policy",
25
+ "14": "I-policy",
26
+ "15": "B-preference",
27
+ "16": "I-preference",
28
+ "17": "B-state",
29
+ "18": "I-state",
30
+ "19": "B-value",
31
+ "20": "I-value"
32
+ }
33
+ }
fact_extraction_structured_metrics_test.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "fact_extraction_structured",
3
+ "overall": {
4
+ "span_precision": 0.9985,
5
+ "span_recall": 0.9994994994994995,
6
+ "span_f1": 0.9989994997498749,
7
+ "span_exact_match": 0.998998998998999,
8
+ "rows": 3996
9
+ },
10
+ "labels": {
11
+ "0": "O",
12
+ "1": "B-attribute",
13
+ "2": "I-attribute",
14
+ "3": "B-causal",
15
+ "4": "I-causal",
16
+ "5": "B-goal",
17
+ "6": "I-goal",
18
+ "7": "B-identity",
19
+ "8": "I-identity",
20
+ "9": "B-location",
21
+ "10": "I-location",
22
+ "11": "B-occupation",
23
+ "12": "I-occupation",
24
+ "13": "B-policy",
25
+ "14": "I-policy",
26
+ "15": "B-preference",
27
+ "16": "I-preference",
28
+ "17": "B-state",
29
+ "18": "I-state",
30
+ "19": "B-value",
31
+ "20": "I-value"
32
+ }
33
+ }
fact_extraction_structured_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa84ac1a817e0c50cf8dcf2a139ce58be5b0f4e18777a8acbfe853d2fa76ba67
3
+ size 1220
forgetting_action_policy_epoch_stats.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "forgetting_action_policy",
3
+ "epoch_stats": [
4
+ {
5
+ "epoch": 1,
6
+ "train_loss": 1.2100253536409593,
7
+ "eval_macro_f1": 0.8943120476934178,
8
+ "eval_accuracy": 0.8914
9
+ },
10
+ {
11
+ "epoch": 2,
12
+ "train_loss": 0.6452919559225608,
13
+ "eval_macro_f1": 0.9476649277600788,
14
+ "eval_accuracy": 0.9484
15
+ },
16
+ {
17
+ "epoch": 3,
18
+ "train_loss": 0.482522361606962,
19
+ "eval_macro_f1": 0.9696191896424656,
20
+ "eval_accuracy": 0.9696
21
+ },
22
+ {
23
+ "epoch": 4,
24
+ "train_loss": 0.3768140496231193,
25
+ "eval_macro_f1": 0.9718038787431208,
26
+ "eval_accuracy": 0.9718
27
+ },
28
+ {
29
+ "epoch": 5,
30
+ "train_loss": 0.2870782903271907,
31
+ "eval_macro_f1": 0.9796027083744441,
32
+ "eval_accuracy": 0.9796
33
+ },
34
+ {
35
+ "epoch": 6,
36
+ "train_loss": 0.20888356409232609,
37
+ "eval_macro_f1": 0.9825928670083165,
38
+ "eval_accuracy": 0.9826
39
+ },
40
+ {
41
+ "epoch": 7,
42
+ "train_loss": 0.15703109322336797,
43
+ "eval_macro_f1": 0.9839922007228425,
44
+ "eval_accuracy": 0.984
45
+ },
46
+ {
47
+ "epoch": 8,
48
+ "train_loss": 0.11633934278838558,
49
+ "eval_macro_f1": 0.9835931767915431,
50
+ "eval_accuracy": 0.9836
51
+ }
52
+ ],
53
+ "training_summary": {
54
+ "actual_epochs": 8,
55
+ "best_epoch": 7,
56
+ "early_stopped": false,
57
+ "backbone_model_name": "microsoft/deberta-v3-base",
58
+ "tokenizer_name": "microsoft/deberta-v3-base",
59
+ "selection_metric": "macro_f1",
60
+ "selection_value": 0.9839922007228425
61
+ },
62
+ "calibration": {
63
+ "method": "temperature_grid_search",
64
+ "rows": 5000,
65
+ "temperature": 2.0,
66
+ "loss": 0.08134587520392533
67
+ },
68
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/forgetting_action_policy_hf"
69
+ }
forgetting_action_policy_hf/config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": null,
7
+ "dtype": "float32",
8
+ "eos_token_id": null,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "forgetting_action_policy::keep",
14
+ "1": "forgetting_action_policy::decay",
15
+ "2": "forgetting_action_policy::silence",
16
+ "3": "forgetting_action_policy::compress",
17
+ "4": "forgetting_action_policy::delete"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "forgetting_action_policy::compress": 3,
23
+ "forgetting_action_policy::decay": 1,
24
+ "forgetting_action_policy::delete": 4,
25
+ "forgetting_action_policy::keep": 0,
26
+ "forgetting_action_policy::silence": 2
27
+ },
28
+ "layer_norm_eps": 1e-07,
29
+ "legacy": true,
30
+ "max_position_embeddings": 512,
31
+ "max_relative_positions": -1,
32
+ "model_type": "deberta-v2",
33
+ "norm_rel_ebd": "layer_norm",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 0,
37
+ "pooler_dropout": 0,
38
+ "pooler_hidden_act": "gelu",
39
+ "pooler_hidden_size": 768,
40
+ "pos_att_type": [
41
+ "p2c",
42
+ "c2p"
43
+ ],
44
+ "position_biased_input": false,
45
+ "position_buckets": 256,
46
+ "relative_attention": true,
47
+ "share_att_key": true,
48
+ "tie_word_embeddings": true,
49
+ "transformers_version": "5.3.0",
50
+ "type_vocab_size": 0,
51
+ "vocab_size": 128100
52
+ }
forgetting_action_policy_hf/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f301a72b17a022ba8a55cbcab2027b2e26e69dd005f26717dfd4605c07b1a80a
3
+ size 737728484
forgetting_action_policy_hf/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
forgetting_action_policy_hf/tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }
forgetting_action_policy_metrics_eval.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "eval",
4
+ "overall": {
5
+ "rows": 5000,
6
+ "accuracy": 0.984,
7
+ "macro_f1": 0.9839922007228425,
8
+ "weighted_f1": 0.9839922007228427,
9
+ "micro_f1": 0.984,
10
+ "labels": [
11
+ "forgetting_action_policy::compress",
12
+ "forgetting_action_policy::decay",
13
+ "forgetting_action_policy::delete",
14
+ "forgetting_action_policy::keep",
15
+ "forgetting_action_policy::silence"
16
+ ],
17
+ "confusion_matrix": [
18
+ [
19
+ 988,
20
+ 3,
21
+ 0,
22
+ 9,
23
+ 0
24
+ ],
25
+ [
26
+ 3,
27
+ 986,
28
+ 1,
29
+ 1,
30
+ 9
31
+ ],
32
+ [
33
+ 2,
34
+ 0,
35
+ 994,
36
+ 0,
37
+ 4
38
+ ],
39
+ [
40
+ 6,
41
+ 11,
42
+ 0,
43
+ 979,
44
+ 4
45
+ ],
46
+ [
47
+ 2,
48
+ 11,
49
+ 11,
50
+ 3,
51
+ 973
52
+ ]
53
+ ],
54
+ "calibration_error": 0.0073812406567217975,
55
+ "classification_report": {
56
+ "forgetting_action_policy::compress": {
57
+ "precision": 0.987012987012987,
58
+ "recall": 0.988,
59
+ "f1-score": 0.9875062468765617,
60
+ "support": 1000.0
61
+ },
62
+ "forgetting_action_policy::decay": {
63
+ "precision": 0.9752720079129574,
64
+ "recall": 0.986,
65
+ "f1-score": 0.9806066633515664,
66
+ "support": 1000.0
67
+ },
68
+ "forgetting_action_policy::delete": {
69
+ "precision": 0.9880715705765407,
70
+ "recall": 0.994,
71
+ "f1-score": 0.9910269192422732,
72
+ "support": 1000.0
73
+ },
74
+ "forgetting_action_policy::keep": {
75
+ "precision": 0.9868951612903226,
76
+ "recall": 0.979,
77
+ "f1-score": 0.9829317269076305,
78
+ "support": 1000.0
79
+ },
80
+ "forgetting_action_policy::silence": {
81
+ "precision": 0.9828282828282828,
82
+ "recall": 0.973,
83
+ "f1-score": 0.9778894472361809,
84
+ "support": 1000.0
85
+ },
86
+ "accuracy": 0.984,
87
+ "macro avg": {
88
+ "precision": 0.984016001924218,
89
+ "recall": 0.984,
90
+ "f1-score": 0.9839922007228425,
91
+ "support": 5000.0
92
+ },
93
+ "weighted avg": {
94
+ "precision": 0.9840160019242181,
95
+ "recall": 0.984,
96
+ "f1-score": 0.9839922007228427,
97
+ "support": 5000.0
98
+ }
99
+ }
100
+ },
101
+ "per_task": {
102
+ "forgetting_action_policy": {
103
+ "rows": 5000,
104
+ "accuracy": 0.984,
105
+ "macro_f1": 0.9839922007228425,
106
+ "weighted_f1": 0.9839922007228427,
107
+ "micro_f1": 0.984,
108
+ "labels": [
109
+ "compress",
110
+ "decay",
111
+ "delete",
112
+ "keep",
113
+ "silence"
114
+ ],
115
+ "confusion_matrix": [
116
+ [
117
+ 988,
118
+ 3,
119
+ 0,
120
+ 9,
121
+ 0
122
+ ],
123
+ [
124
+ 3,
125
+ 986,
126
+ 1,
127
+ 1,
128
+ 9
129
+ ],
130
+ [
131
+ 2,
132
+ 0,
133
+ 994,
134
+ 0,
135
+ 4
136
+ ],
137
+ [
138
+ 6,
139
+ 11,
140
+ 0,
141
+ 979,
142
+ 4
143
+ ],
144
+ [
145
+ 2,
146
+ 11,
147
+ 11,
148
+ 3,
149
+ 973
150
+ ]
151
+ ],
152
+ "wrong_task_predictions": 0,
153
+ "wrong_task_rate": 0.0
154
+ }
155
+ },
156
+ "calibration": {
157
+ "method": "temperature_grid_search",
158
+ "rows": 5000,
159
+ "temperature": 2.0,
160
+ "loss": 0.08134587520392533
161
+ }
162
+ }
forgetting_action_policy_metrics_test.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family": "router",
3
+ "split": "test",
4
+ "overall": {
5
+ "rows": 18920,
6
+ "accuracy": 0.9994714587737844,
7
+ "macro_f1": 0.9994714168187103,
8
+ "weighted_f1": 0.9994714168187103,
9
+ "micro_f1": 0.9994714587737844,
10
+ "labels": [
11
+ "forgetting_action_policy::compress",
12
+ "forgetting_action_policy::decay",
13
+ "forgetting_action_policy::delete",
14
+ "forgetting_action_policy::keep",
15
+ "forgetting_action_policy::silence"
16
+ ],
17
+ "confusion_matrix": [
18
+ [
19
+ 3782,
20
+ 2,
21
+ 0,
22
+ 0,
23
+ 0
24
+ ],
25
+ [
26
+ 0,
27
+ 3784,
28
+ 0,
29
+ 0,
30
+ 0
31
+ ],
32
+ [
33
+ 0,
34
+ 0,
35
+ 3779,
36
+ 1,
37
+ 4
38
+ ],
39
+ [
40
+ 0,
41
+ 0,
42
+ 0,
43
+ 3784,
44
+ 0
45
+ ],
46
+ [
47
+ 2,
48
+ 0,
49
+ 1,
50
+ 0,
51
+ 3781
52
+ ]
53
+ ],
54
+ "calibration_error": 0.01649172677837832,
55
+ "classification_report": {
56
+ "forgetting_action_policy::compress": {
57
+ "precision": 0.9994714587737844,
58
+ "recall": 0.9994714587737844,
59
+ "f1-score": 0.9994714587737844,
60
+ "support": 3784.0
61
+ },
62
+ "forgetting_action_policy::decay": {
63
+ "precision": 0.9994717379820391,
64
+ "recall": 1.0,
65
+ "f1-score": 0.9997357992073976,
66
+ "support": 3784.0
67
+ },
68
+ "forgetting_action_policy::delete": {
69
+ "precision": 0.9997354497354497,
70
+ "recall": 0.9986786469344608,
71
+ "f1-score": 0.9992067689053411,
72
+ "support": 3784.0
73
+ },
74
+ "forgetting_action_policy::keep": {
75
+ "precision": 0.9997357992073976,
76
+ "recall": 1.0,
77
+ "f1-score": 0.9998678821508786,
78
+ "support": 3784.0
79
+ },
80
+ "forgetting_action_policy::silence": {
81
+ "precision": 0.9989431968295905,
82
+ "recall": 0.9992071881606766,
83
+ "f1-score": 0.9990751750561501,
84
+ "support": 3784.0
85
+ },
86
+ "accuracy": 0.9994714587737844,
87
+ "macro avg": {
88
+ "precision": 0.9994715285056524,
89
+ "recall": 0.9994714587737844,
90
+ "f1-score": 0.9994714168187103,
91
+ "support": 18920.0
92
+ },
93
+ "weighted avg": {
94
+ "precision": 0.9994715285056522,
95
+ "recall": 0.9994714587737844,
96
+ "f1-score": 0.9994714168187103,
97
+ "support": 18920.0
98
+ }
99
+ }
100
+ },
101
+ "per_task": {
102
+ "forgetting_action_policy": {
103
+ "rows": 18920,
104
+ "accuracy": 0.9994714587737844,
105
+ "macro_f1": 0.9994714168187103,
106
+ "weighted_f1": 0.9994714168187103,
107
+ "micro_f1": 0.9994714587737844,
108
+ "labels": [
109
+ "compress",
110
+ "decay",
111
+ "delete",
112
+ "keep",
113
+ "silence"
114
+ ],
115
+ "confusion_matrix": [
116
+ [
117
+ 3782,
118
+ 2,
119
+ 0,
120
+ 0,
121
+ 0
122
+ ],
123
+ [
124
+ 0,
125
+ 3784,
126
+ 0,
127
+ 0,
128
+ 0
129
+ ],
130
+ [
131
+ 0,
132
+ 0,
133
+ 3779,
134
+ 1,
135
+ 4
136
+ ],
137
+ [
138
+ 0,
139
+ 0,
140
+ 0,
141
+ 3784,
142
+ 0
143
+ ],
144
+ [
145
+ 2,
146
+ 0,
147
+ 1,
148
+ 0,
149
+ 3781
150
+ ]
151
+ ],
152
+ "wrong_task_predictions": 0,
153
+ "wrong_task_rate": 0.0
154
+ }
155
+ },
156
+ "calibration": {
157
+ "method": "temperature_grid_search",
158
+ "rows": 5000,
159
+ "temperature": 2.0,
160
+ "loss": 0.08134587520392533
161
+ }
162
+ }
forgetting_action_policy_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3428a099b7e5a0cd7030c75527c4de7f8e61ec0bd8116815cbdf419d797cf9ed
3
+ size 744949446
manifest.json ADDED
@@ -0,0 +1,2799 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "manifest_schema_version": 3,
3
+ "config_path": "/nvme/CognitiveMemoryLayer/packages/models/model_pipeline.toml",
4
+ "trained_at_utc": "2026-03-25T13:59:08.669850+00:00",
5
+ "paths": {
6
+ "prepared_dir": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack",
7
+ "trained_models_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models"
8
+ },
9
+ "train_settings": {
10
+ "seed": 42,
11
+ "families": [
12
+ "router",
13
+ "extractor",
14
+ "pair"
15
+ ],
16
+ "max_features": 250000,
17
+ "min_df": 2,
18
+ "ngram_min": 1,
19
+ "ngram_max": 2,
20
+ "max_iter": 25,
21
+ "alpha": 1e-05,
22
+ "predict_batch_size": 8192,
23
+ "early_stopping": true,
24
+ "early_stopping_patience": 3,
25
+ "early_stopping_metric": "macro_f1",
26
+ "early_stopping_min_delta": 0.001,
27
+ "calibration_method": "sigmoid",
28
+ "calibration_split": "eval",
29
+ "transformer": {
30
+ "model_name_or_path": "microsoft/deberta-v3-base",
31
+ "tokenizer_name": "",
32
+ "num_train_epochs": 2,
33
+ "per_device_train_batch_size": 8,
34
+ "per_device_eval_batch_size": 16,
35
+ "max_seq_length": 256,
36
+ "learning_rate": 2e-05,
37
+ "warmup_ratio": 0.1,
38
+ "weight_decay": 0.01,
39
+ "gradient_accumulation_steps": 1,
40
+ "score_margin": 0.15,
41
+ "focal_gamma": 1.5,
42
+ "temperature_grid": [
43
+ 0.7,
44
+ 0.85,
45
+ 1.0,
46
+ 1.15,
47
+ 1.3,
48
+ 1.5,
49
+ 2.0
50
+ ]
51
+ },
52
+ "token": {
53
+ "model_name_or_path": "bert-base-multilingual-cased",
54
+ "num_train_epochs": 8,
55
+ "per_device_train_batch_size": 8,
56
+ "per_device_eval_batch_size": 16,
57
+ "max_seq_length": 256,
58
+ "stride": 64,
59
+ "learning_rate": 5e-05,
60
+ "warmup_ratio": 0.1,
61
+ "weight_decay": 0.01,
62
+ "gradient_accumulation_steps": 1
63
+ },
64
+ "allow_dirty": false,
65
+ "release_mode": false,
66
+ "strict": true
67
+ },
68
+ "build_metadata": {
69
+ "python_version": "3.14.3 (main, Mar 11 2026, 19:17:30) [GCC 15.2.0]",
70
+ "dependencies": {
71
+ "scikit_learn": "1.8.0",
72
+ "joblib": "1.5.3",
73
+ "pandas": "3.0.1"
74
+ },
75
+ "commit_sha": "090060c4478c373a02eb09ae63292f51ca31a710",
76
+ "dirty": true
77
+ },
78
+ "configured_tasks": [
79
+ {
80
+ "task_name": "retrieval_constraint_relevance_pair",
81
+ "family": "pair",
82
+ "input_type": "pair",
83
+ "objective": "pair_ranking",
84
+ "enabled": true,
85
+ "artifact_name": "retrieval_constraint_relevance_pair",
86
+ "metrics": [
87
+ "mrr@10",
88
+ "ndcg@10",
89
+ "recall@10"
90
+ ],
91
+ "trainer": "transformer_pair",
92
+ "feature_backend": "",
93
+ "label_order": [],
94
+ "embedding_model_name": "",
95
+ "backbone_model_name": "BAAI/bge-reranker-base",
96
+ "tokenizer_name": ""
97
+ },
98
+ {
99
+ "task_name": "memory_rerank_pair",
100
+ "family": "pair",
101
+ "input_type": "pair",
102
+ "objective": "pair_ranking",
103
+ "enabled": true,
104
+ "artifact_name": "memory_rerank_pair",
105
+ "metrics": [
106
+ "mrr@10",
107
+ "ndcg@10",
108
+ "recall@10"
109
+ ],
110
+ "trainer": "transformer_pair",
111
+ "feature_backend": "",
112
+ "label_order": [],
113
+ "embedding_model_name": "",
114
+ "backbone_model_name": "BAAI/bge-reranker-base",
115
+ "tokenizer_name": ""
116
+ },
117
+ {
118
+ "task_name": "novelty_pair",
119
+ "family": "pair",
120
+ "input_type": "pair",
121
+ "objective": "classification",
122
+ "enabled": true,
123
+ "artifact_name": "novelty_pair",
124
+ "metrics": [
125
+ "accuracy",
126
+ "macro_f1",
127
+ "weighted_f1"
128
+ ],
129
+ "trainer": "transformer_pair",
130
+ "feature_backend": "",
131
+ "label_order": [],
132
+ "embedding_model_name": "",
133
+ "backbone_model_name": "BAAI/bge-reranker-base",
134
+ "tokenizer_name": ""
135
+ },
136
+ {
137
+ "task_name": "fact_extraction_structured",
138
+ "family": "extractor",
139
+ "input_type": "single",
140
+ "objective": "token_classification",
141
+ "enabled": true,
142
+ "artifact_name": "fact_extraction_structured",
143
+ "metrics": [
144
+ "span_f1",
145
+ "span_exact_match"
146
+ ],
147
+ "trainer": "",
148
+ "feature_backend": "",
149
+ "label_order": [],
150
+ "embedding_model_name": "",
151
+ "backbone_model_name": "",
152
+ "tokenizer_name": ""
153
+ },
154
+ {
155
+ "task_name": "schema_match_pair",
156
+ "family": "pair",
157
+ "input_type": "pair",
158
+ "objective": "classification",
159
+ "enabled": true,
160
+ "artifact_name": "schema_match_pair",
161
+ "metrics": [
162
+ "accuracy",
163
+ "macro_f1",
164
+ "weighted_f1"
165
+ ],
166
+ "trainer": "transformer_pair",
167
+ "feature_backend": "",
168
+ "label_order": [],
169
+ "embedding_model_name": "",
170
+ "backbone_model_name": "BAAI/bge-reranker-base",
171
+ "tokenizer_name": ""
172
+ },
173
+ {
174
+ "task_name": "reconsolidation_candidate_pair",
175
+ "family": "pair",
176
+ "input_type": "pair",
177
+ "objective": "pair_ranking",
178
+ "enabled": true,
179
+ "artifact_name": "reconsolidation_candidate_pair",
180
+ "metrics": [
181
+ "mrr@10",
182
+ "ndcg@10",
183
+ "recall@10"
184
+ ],
185
+ "trainer": "transformer_pair",
186
+ "feature_backend": "",
187
+ "label_order": [],
188
+ "embedding_model_name": "",
189
+ "backbone_model_name": "BAAI/bge-reranker-base",
190
+ "tokenizer_name": ""
191
+ },
192
+ {
193
+ "task_name": "write_importance_regression",
194
+ "family": "router",
195
+ "input_type": "single",
196
+ "objective": "single_regression",
197
+ "enabled": true,
198
+ "artifact_name": "write_importance_regression",
199
+ "metrics": [
200
+ "mae",
201
+ "rmse"
202
+ ],
203
+ "trainer": "",
204
+ "feature_backend": "",
205
+ "label_order": [],
206
+ "embedding_model_name": "",
207
+ "backbone_model_name": "",
208
+ "tokenizer_name": ""
209
+ },
210
+ {
211
+ "task_name": "memory_type",
212
+ "family": "router",
213
+ "input_type": "single",
214
+ "objective": "classification",
215
+ "enabled": true,
216
+ "artifact_name": "memory_type",
217
+ "metrics": [
218
+ "accuracy",
219
+ "macro_f1",
220
+ "weighted_f1"
221
+ ],
222
+ "trainer": "hierarchical_transformer",
223
+ "feature_backend": "",
224
+ "label_order": [],
225
+ "embedding_model_name": "",
226
+ "backbone_model_name": "microsoft/deberta-v3-base",
227
+ "tokenizer_name": ""
228
+ },
229
+ {
230
+ "task_name": "salience_bin",
231
+ "family": "router",
232
+ "input_type": "single",
233
+ "objective": "classification",
234
+ "enabled": true,
235
+ "artifact_name": "salience_bin",
236
+ "metrics": [
237
+ "accuracy",
238
+ "macro_f1",
239
+ "weighted_f1",
240
+ "ordinal_mae",
241
+ "off_by_two_rate"
242
+ ],
243
+ "trainer": "ordinal_threshold",
244
+ "feature_backend": "",
245
+ "label_order": [
246
+ "low",
247
+ "medium",
248
+ "high"
249
+ ],
250
+ "embedding_model_name": "",
251
+ "backbone_model_name": "",
252
+ "tokenizer_name": ""
253
+ },
254
+ {
255
+ "task_name": "importance_bin",
256
+ "family": "router",
257
+ "input_type": "single",
258
+ "objective": "classification",
259
+ "enabled": true,
260
+ "artifact_name": "importance_bin",
261
+ "metrics": [
262
+ "accuracy",
263
+ "macro_f1",
264
+ "weighted_f1",
265
+ "ordinal_mae",
266
+ "off_by_two_rate"
267
+ ],
268
+ "trainer": "ordinal_threshold",
269
+ "feature_backend": "",
270
+ "label_order": [
271
+ "low",
272
+ "medium",
273
+ "high"
274
+ ],
275
+ "embedding_model_name": "",
276
+ "backbone_model_name": "",
277
+ "tokenizer_name": ""
278
+ },
279
+ {
280
+ "task_name": "confidence_bin",
281
+ "family": "router",
282
+ "input_type": "single",
283
+ "objective": "classification",
284
+ "enabled": true,
285
+ "artifact_name": "confidence_bin",
286
+ "metrics": [
287
+ "accuracy",
288
+ "macro_f1",
289
+ "weighted_f1",
290
+ "ordinal_mae",
291
+ "off_by_two_rate"
292
+ ],
293
+ "trainer": "ordinal_threshold",
294
+ "feature_backend": "",
295
+ "label_order": [
296
+ "low",
297
+ "medium",
298
+ "high"
299
+ ],
300
+ "embedding_model_name": "",
301
+ "backbone_model_name": "",
302
+ "tokenizer_name": ""
303
+ },
304
+ {
305
+ "task_name": "decay_profile",
306
+ "family": "router",
307
+ "input_type": "single",
308
+ "objective": "classification",
309
+ "enabled": true,
310
+ "artifact_name": "decay_profile",
311
+ "metrics": [
312
+ "accuracy",
313
+ "macro_f1",
314
+ "weighted_f1",
315
+ "ordinal_mae",
316
+ "off_by_two_rate"
317
+ ],
318
+ "trainer": "ordinal_threshold",
319
+ "feature_backend": "",
320
+ "label_order": [
321
+ "very_fast",
322
+ "fast",
323
+ "medium",
324
+ "slow",
325
+ "very_slow"
326
+ ],
327
+ "embedding_model_name": "",
328
+ "backbone_model_name": "",
329
+ "tokenizer_name": ""
330
+ },
331
+ {
332
+ "task_name": "pii_span_detection",
333
+ "family": "extractor",
334
+ "input_type": "single",
335
+ "objective": "token_classification",
336
+ "enabled": true,
337
+ "artifact_name": "pii_span_detection",
338
+ "metrics": [
339
+ "span_f1",
340
+ "span_exact_match"
341
+ ],
342
+ "trainer": "",
343
+ "feature_backend": "",
344
+ "label_order": [],
345
+ "embedding_model_name": "",
346
+ "backbone_model_name": "",
347
+ "tokenizer_name": ""
348
+ },
349
+ {
350
+ "task_name": "consolidation_gist_quality",
351
+ "family": "router",
352
+ "input_type": "single",
353
+ "objective": "classification",
354
+ "enabled": true,
355
+ "artifact_name": "consolidation_gist_quality",
356
+ "metrics": [
357
+ "accuracy",
358
+ "macro_f1",
359
+ "weighted_f1"
360
+ ],
361
+ "trainer": "transformer_text",
362
+ "feature_backend": "",
363
+ "label_order": [],
364
+ "embedding_model_name": "",
365
+ "backbone_model_name": "microsoft/deberta-v3-base",
366
+ "tokenizer_name": ""
367
+ },
368
+ {
369
+ "task_name": "forgetting_action_policy",
370
+ "family": "router",
371
+ "input_type": "single",
372
+ "objective": "classification",
373
+ "enabled": true,
374
+ "artifact_name": "forgetting_action_policy",
375
+ "metrics": [
376
+ "accuracy",
377
+ "macro_f1",
378
+ "weighted_f1"
379
+ ],
380
+ "trainer": "transformer_text",
381
+ "feature_backend": "",
382
+ "label_order": [],
383
+ "embedding_model_name": "",
384
+ "backbone_model_name": "microsoft/deberta-v3-base",
385
+ "tokenizer_name": ""
386
+ },
387
+ {
388
+ "task_name": "constraint_dimension",
389
+ "family": "router",
390
+ "input_type": "single",
391
+ "objective": "classification",
392
+ "enabled": true,
393
+ "artifact_name": "constraint_dimension",
394
+ "metrics": [
395
+ "accuracy",
396
+ "macro_f1",
397
+ "weighted_f1"
398
+ ],
399
+ "trainer": "transformer_text",
400
+ "feature_backend": "",
401
+ "label_order": [],
402
+ "embedding_model_name": "",
403
+ "backbone_model_name": "microsoft/deberta-v3-base",
404
+ "tokenizer_name": ""
405
+ },
406
+ {
407
+ "task_name": "context_tag",
408
+ "family": "router",
409
+ "input_type": "single",
410
+ "objective": "classification",
411
+ "enabled": true,
412
+ "artifact_name": "context_tag",
413
+ "metrics": [
414
+ "accuracy",
415
+ "macro_f1",
416
+ "weighted_f1"
417
+ ],
418
+ "trainer": "transformer_text",
419
+ "feature_backend": "",
420
+ "label_order": [],
421
+ "embedding_model_name": "",
422
+ "backbone_model_name": "microsoft/deberta-v3-base",
423
+ "tokenizer_name": ""
424
+ }
425
+ ],
426
+ "preflight_validation": {
427
+ "ok": true,
428
+ "strict": true,
429
+ "errors": [],
430
+ "warnings": [],
431
+ "task_checks": [
432
+ {
433
+ "task_name": "retrieval_constraint_relevance_pair",
434
+ "family": "pair",
435
+ "input_type": "pair",
436
+ "objective": "pair_ranking",
437
+ "enabled": true,
438
+ "status": "ok",
439
+ "reason": null,
440
+ "rows_found": 80000,
441
+ "valid_score_rows": 0
442
+ },
443
+ {
444
+ "task_name": "memory_rerank_pair",
445
+ "family": "pair",
446
+ "input_type": "pair",
447
+ "objective": "pair_ranking",
448
+ "enabled": true,
449
+ "status": "ok",
450
+ "reason": null,
451
+ "rows_found": 80000,
452
+ "valid_score_rows": 0
453
+ },
454
+ {
455
+ "task_name": "novelty_pair",
456
+ "family": "pair",
457
+ "input_type": "pair",
458
+ "objective": "classification",
459
+ "enabled": true,
460
+ "status": "ok",
461
+ "reason": null,
462
+ "rows_found": 117206,
463
+ "valid_score_rows": 0
464
+ },
465
+ {
466
+ "task_name": "fact_extraction_structured",
467
+ "family": "extractor",
468
+ "input_type": "single",
469
+ "objective": "token_classification",
470
+ "enabled": true,
471
+ "status": "ok",
472
+ "reason": null,
473
+ "rows_found": 31997,
474
+ "valid_score_rows": 0
475
+ },
476
+ {
477
+ "task_name": "schema_match_pair",
478
+ "family": "pair",
479
+ "input_type": "pair",
480
+ "objective": "classification",
481
+ "enabled": true,
482
+ "status": "ok",
483
+ "reason": null,
484
+ "rows_found": 81200,
485
+ "valid_score_rows": 0
486
+ },
487
+ {
488
+ "task_name": "reconsolidation_candidate_pair",
489
+ "family": "pair",
490
+ "input_type": "pair",
491
+ "objective": "pair_ranking",
492
+ "enabled": true,
493
+ "status": "ok",
494
+ "reason": null,
495
+ "rows_found": 80000,
496
+ "valid_score_rows": 0
497
+ },
498
+ {
499
+ "task_name": "write_importance_regression",
500
+ "family": "router",
501
+ "input_type": "single",
502
+ "objective": "single_regression",
503
+ "enabled": true,
504
+ "status": "ok",
505
+ "reason": null,
506
+ "rows_found": 8000,
507
+ "valid_score_rows": 8000
508
+ },
509
+ {
510
+ "task_name": "memory_type",
511
+ "family": "router",
512
+ "input_type": "single",
513
+ "objective": "classification",
514
+ "enabled": true,
515
+ "status": "ok",
516
+ "reason": null,
517
+ "rows_found": 120000,
518
+ "valid_score_rows": 0
519
+ },
520
+ {
521
+ "task_name": "salience_bin",
522
+ "family": "router",
523
+ "input_type": "single",
524
+ "objective": "classification",
525
+ "enabled": true,
526
+ "status": "ok",
527
+ "reason": null,
528
+ "rows_found": 24000,
529
+ "valid_score_rows": 0
530
+ },
531
+ {
532
+ "task_name": "importance_bin",
533
+ "family": "router",
534
+ "input_type": "single",
535
+ "objective": "classification",
536
+ "enabled": true,
537
+ "status": "ok",
538
+ "reason": null,
539
+ "rows_found": 24000,
540
+ "valid_score_rows": 0
541
+ },
542
+ {
543
+ "task_name": "confidence_bin",
544
+ "family": "router",
545
+ "input_type": "single",
546
+ "objective": "classification",
547
+ "enabled": true,
548
+ "status": "ok",
549
+ "reason": null,
550
+ "rows_found": 24000,
551
+ "valid_score_rows": 0
552
+ },
553
+ {
554
+ "task_name": "decay_profile",
555
+ "family": "router",
556
+ "input_type": "single",
557
+ "objective": "classification",
558
+ "enabled": true,
559
+ "status": "ok",
560
+ "reason": null,
561
+ "rows_found": 40000,
562
+ "valid_score_rows": 0
563
+ },
564
+ {
565
+ "task_name": "pii_span_detection",
566
+ "family": "extractor",
567
+ "input_type": "single",
568
+ "objective": "token_classification",
569
+ "enabled": true,
570
+ "status": "ok",
571
+ "reason": null,
572
+ "rows_found": 30516,
573
+ "valid_score_rows": 0
574
+ },
575
+ {
576
+ "task_name": "consolidation_gist_quality",
577
+ "family": "router",
578
+ "input_type": "single",
579
+ "objective": "classification",
580
+ "enabled": true,
581
+ "status": "ok",
582
+ "reason": null,
583
+ "rows_found": 10432,
584
+ "valid_score_rows": 0
585
+ },
586
+ {
587
+ "task_name": "forgetting_action_policy",
588
+ "family": "router",
589
+ "input_type": "single",
590
+ "objective": "classification",
591
+ "enabled": true,
592
+ "status": "ok",
593
+ "reason": null,
594
+ "rows_found": 40000,
595
+ "valid_score_rows": 0
596
+ },
597
+ {
598
+ "task_name": "constraint_dimension",
599
+ "family": "router",
600
+ "input_type": "single",
601
+ "objective": "classification",
602
+ "enabled": true,
603
+ "status": "ok",
604
+ "reason": null,
605
+ "rows_found": 48000,
606
+ "valid_score_rows": 0
607
+ },
608
+ {
609
+ "task_name": "context_tag",
610
+ "family": "router",
611
+ "input_type": "single",
612
+ "objective": "classification",
613
+ "enabled": true,
614
+ "status": "ok",
615
+ "reason": null,
616
+ "rows_found": 64000,
617
+ "valid_score_rows": 0
618
+ }
619
+ ],
620
+ "observed_tasks_by_family": {
621
+ "extractor": [
622
+ "constraint_scope",
623
+ "constraint_stability",
624
+ "constraint_type",
625
+ "fact_type",
626
+ "pii_presence"
627
+ ],
628
+ "pair": [
629
+ "conflict_detection",
630
+ "constraint_rerank",
631
+ "memory_rerank_pair",
632
+ "novelty_pair",
633
+ "reconsolidation_candidate_pair",
634
+ "retrieval_constraint_relevance_pair",
635
+ "schema_match_pair",
636
+ "scope_match",
637
+ "supersession"
638
+ ],
639
+ "router": [
640
+ "confidence_bin",
641
+ "consolidation_gist_quality",
642
+ "constraint_dimension",
643
+ "context_tag",
644
+ "decay_profile",
645
+ "forgetting_action_policy",
646
+ "importance_bin",
647
+ "memory_type",
648
+ "query_domain",
649
+ "query_intent",
650
+ "salience_bin",
651
+ "write_importance_regression"
652
+ ]
653
+ },
654
+ "coverage_vs_config": {
655
+ "extractor": {
656
+ "configured_enabled_tasks": [],
657
+ "observed_tasks": [
658
+ "constraint_scope",
659
+ "constraint_stability",
660
+ "constraint_type",
661
+ "fact_type",
662
+ "pii_presence"
663
+ ],
664
+ "missing_configured_tasks": []
665
+ },
666
+ "pair": {
667
+ "configured_enabled_tasks": [
668
+ "memory_rerank_pair",
669
+ "novelty_pair",
670
+ "reconsolidation_candidate_pair",
671
+ "retrieval_constraint_relevance_pair",
672
+ "schema_match_pair"
673
+ ],
674
+ "observed_tasks": [
675
+ "conflict_detection",
676
+ "constraint_rerank",
677
+ "memory_rerank_pair",
678
+ "novelty_pair",
679
+ "reconsolidation_candidate_pair",
680
+ "retrieval_constraint_relevance_pair",
681
+ "schema_match_pair",
682
+ "scope_match",
683
+ "supersession"
684
+ ],
685
+ "missing_configured_tasks": []
686
+ },
687
+ "router": {
688
+ "configured_enabled_tasks": [
689
+ "confidence_bin",
690
+ "consolidation_gist_quality",
691
+ "constraint_dimension",
692
+ "context_tag",
693
+ "decay_profile",
694
+ "forgetting_action_policy",
695
+ "importance_bin",
696
+ "memory_type",
697
+ "salience_bin",
698
+ "write_importance_regression"
699
+ ],
700
+ "observed_tasks": [
701
+ "confidence_bin",
702
+ "consolidation_gist_quality",
703
+ "constraint_dimension",
704
+ "context_tag",
705
+ "decay_profile",
706
+ "forgetting_action_policy",
707
+ "importance_bin",
708
+ "memory_type",
709
+ "query_domain",
710
+ "query_intent",
711
+ "salience_bin",
712
+ "write_importance_regression"
713
+ ],
714
+ "missing_configured_tasks": []
715
+ }
716
+ }
717
+ },
718
+ "families": {
719
+ "router": {
720
+ "model_path": "packages/models/trained_models/router_model.joblib"
721
+ },
722
+ "extractor": {
723
+ "model_path": "packages/models/trained_models/extractor_model.joblib"
724
+ },
725
+ "pair": {
726
+ "model_path": "packages/models/trained_models/pair_model.joblib"
727
+ }
728
+ },
729
+ "task_training_status": {
730
+ "retrieval_constraint_relevance_pair": {
731
+ "status": "filtered_out",
732
+ "reason": "Excluded by --tasks filter",
733
+ "family": "pair",
734
+ "objective": "pair_ranking",
735
+ "enabled": true
736
+ },
737
+ "memory_rerank_pair": {
738
+ "status": "filtered_out",
739
+ "reason": "Excluded by --tasks filter",
740
+ "family": "pair",
741
+ "objective": "pair_ranking",
742
+ "enabled": true
743
+ },
744
+ "novelty_pair": {
745
+ "status": "filtered_out",
746
+ "reason": "Excluded by --tasks filter",
747
+ "family": "pair",
748
+ "objective": "classification",
749
+ "enabled": true
750
+ },
751
+ "fact_extraction_structured": {
752
+ "status": "filtered_out",
753
+ "reason": "Excluded by --tasks filter",
754
+ "family": "extractor",
755
+ "objective": "token_classification",
756
+ "enabled": true
757
+ },
758
+ "schema_match_pair": {
759
+ "status": "filtered_out",
760
+ "reason": "Excluded by --tasks filter",
761
+ "family": "pair",
762
+ "objective": "classification",
763
+ "enabled": true
764
+ },
765
+ "reconsolidation_candidate_pair": {
766
+ "status": "filtered_out",
767
+ "reason": "Excluded by --tasks filter",
768
+ "family": "pair",
769
+ "objective": "pair_ranking",
770
+ "enabled": true
771
+ },
772
+ "write_importance_regression": {
773
+ "status": "filtered_out",
774
+ "reason": "Excluded by --tasks filter",
775
+ "family": "router",
776
+ "objective": "single_regression",
777
+ "enabled": true
778
+ },
779
+ "memory_type": {
780
+ "status": "filtered_out",
781
+ "reason": "Excluded by --tasks filter",
782
+ "family": "router",
783
+ "objective": "classification",
784
+ "enabled": true
785
+ },
786
+ "salience_bin": {
787
+ "status": "filtered_out",
788
+ "reason": "Excluded by --tasks filter",
789
+ "family": "router",
790
+ "objective": "classification",
791
+ "enabled": true
792
+ },
793
+ "importance_bin": {
794
+ "status": "filtered_out",
795
+ "reason": "Excluded by --tasks filter",
796
+ "family": "router",
797
+ "objective": "classification",
798
+ "enabled": true
799
+ },
800
+ "confidence_bin": {
801
+ "status": "filtered_out",
802
+ "reason": "Excluded by --tasks filter",
803
+ "family": "router",
804
+ "objective": "classification",
805
+ "enabled": true
806
+ },
807
+ "decay_profile": {
808
+ "status": "filtered_out",
809
+ "reason": "Excluded by --tasks filter",
810
+ "family": "router",
811
+ "objective": "classification",
812
+ "enabled": true
813
+ },
814
+ "pii_span_detection": {
815
+ "status": "trained",
816
+ "reason": null,
817
+ "family": "extractor",
818
+ "objective": "token_classification",
819
+ "enabled": true,
820
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/pii_span_detection_model.joblib",
821
+ "train_rows": 30516
822
+ },
823
+ "consolidation_gist_quality": {
824
+ "status": "filtered_out",
825
+ "reason": "Excluded by --tasks filter",
826
+ "family": "router",
827
+ "objective": "classification",
828
+ "enabled": true
829
+ },
830
+ "forgetting_action_policy": {
831
+ "status": "filtered_out",
832
+ "reason": "Excluded by --tasks filter",
833
+ "family": "router",
834
+ "objective": "classification",
835
+ "enabled": true
836
+ },
837
+ "constraint_dimension": {
838
+ "status": "filtered_out",
839
+ "reason": "Excluded by --tasks filter",
840
+ "family": "router",
841
+ "objective": "classification",
842
+ "enabled": true
843
+ },
844
+ "context_tag": {
845
+ "status": "filtered_out",
846
+ "reason": "Excluded by --tasks filter",
847
+ "family": "router",
848
+ "objective": "classification",
849
+ "enabled": true
850
+ }
851
+ },
852
+ "task_models": {
853
+ "memory_rerank_pair": {
854
+ "model_path": "packages/models/trained_models/memory_rerank_pair_model.joblib",
855
+ "hf_model_dir": "packages/models/trained_models/memory_rerank_pair_hf"
856
+ },
857
+ "retrieval_constraint_relevance_pair": {
858
+ "model_path": "packages/models/trained_models/retrieval_constraint_relevance_pair_model.joblib",
859
+ "hf_model_dir": "packages/models/trained_models/retrieval_constraint_relevance_pair_hf"
860
+ },
861
+ "novelty_pair": {
862
+ "model_path": "packages/models/trained_models/novelty_pair_model.joblib",
863
+ "hf_model_dir": "packages/models/trained_models/novelty_pair_hf"
864
+ },
865
+ "fact_extraction_structured": {
866
+ "model_path": "packages/models/trained_models/fact_extraction_structured_model.joblib",
867
+ "hf_model_dir": "packages/models/trained_models/fact_extraction_structured_hf"
868
+ },
869
+ "schema_match_pair": {
870
+ "task": "schema_match_pair",
871
+ "objective": "classification",
872
+ "trainer": "transformer_pair",
873
+ "model_kind": "transformer_pair",
874
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/schema_match_pair_model.joblib",
875
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/schema_match_pair_hf",
876
+ "train_rows": 81200,
877
+ "test": {
878
+ "rows": 10000,
879
+ "accuracy": 0.8552,
880
+ "macro_f1": 0.8551561470750885,
881
+ "weighted_f1": 0.8551561470750885,
882
+ "micro_f1": 0.8552,
883
+ "labels": [
884
+ "schema_match_pair::match",
885
+ "schema_match_pair::no_match"
886
+ ],
887
+ "confusion_matrix": [
888
+ [
889
+ 4189,
890
+ 811
891
+ ],
892
+ [
893
+ 637,
894
+ 4363
895
+ ]
896
+ ],
897
+ "calibration_error": 0.03336120770113563,
898
+ "classification_report": {
899
+ "schema_match_pair::match": {
900
+ "precision": 0.8680066307501036,
901
+ "recall": 0.8378,
902
+ "f1-score": 0.852635864034195,
903
+ "support": 5000.0
904
+ },
905
+ "schema_match_pair::no_match": {
906
+ "precision": 0.8432547352145342,
907
+ "recall": 0.8726,
908
+ "f1-score": 0.8576764301159819,
909
+ "support": 5000.0
910
+ },
911
+ "accuracy": 0.8552,
912
+ "macro avg": {
913
+ "precision": 0.8556306829823189,
914
+ "recall": 0.8552,
915
+ "f1-score": 0.8551561470750885,
916
+ "support": 10000.0
917
+ },
918
+ "weighted avg": {
919
+ "precision": 0.8556306829823188,
920
+ "recall": 0.8552,
921
+ "f1-score": 0.8551561470750885,
922
+ "support": 10000.0
923
+ }
924
+ }
925
+ },
926
+ "eval": {
927
+ "rows": 10000,
928
+ "accuracy": 0.8517,
929
+ "macro_f1": 0.8516351927685533,
930
+ "weighted_f1": 0.8516351927685533,
931
+ "micro_f1": 0.8517,
932
+ "labels": [
933
+ "schema_match_pair::match",
934
+ "schema_match_pair::no_match"
935
+ ],
936
+ "confusion_matrix": [
937
+ [
938
+ 4154,
939
+ 846
940
+ ],
941
+ [
942
+ 637,
943
+ 4363
944
+ ]
945
+ ],
946
+ "calibration_error": 0.03950018305884612,
947
+ "classification_report": {
948
+ "schema_match_pair::match": {
949
+ "precision": 0.8670423711125026,
950
+ "recall": 0.8308,
951
+ "f1-score": 0.848534368297416,
952
+ "support": 5000.0
953
+ },
954
+ "schema_match_pair::no_match": {
955
+ "precision": 0.8375887886350547,
956
+ "recall": 0.8726,
957
+ "f1-score": 0.8547360172396905,
958
+ "support": 5000.0
959
+ },
960
+ "accuracy": 0.8517,
961
+ "macro avg": {
962
+ "precision": 0.8523155798737787,
963
+ "recall": 0.8517,
964
+ "f1-score": 0.8516351927685533,
965
+ "support": 10000.0
966
+ },
967
+ "weighted avg": {
968
+ "precision": 0.8523155798737786,
969
+ "recall": 0.8517,
970
+ "f1-score": 0.8516351927685533,
971
+ "support": 10000.0
972
+ }
973
+ }
974
+ },
975
+ "actual_epochs": 4,
976
+ "best_epoch": 4,
977
+ "early_stopped": false,
978
+ "selection_metric": "macro_f1",
979
+ "selection_value": 0.8516351927685533,
980
+ "backbone_model_name": "BAAI/bge-reranker-base",
981
+ "tokenizer_name": "BAAI/bge-reranker-base",
982
+ "calibration": {
983
+ "method": "temperature_grid_search",
984
+ "rows": 10000,
985
+ "temperature": 1.3,
986
+ "loss": 0.3754132442613608
987
+ },
988
+ "thresholds": {
989
+ "default_threshold": 0.26,
990
+ "positive_label": "schema_match_pair::match",
991
+ "precision_floor": 0.85,
992
+ "positive_f1": 0.8544423440453687,
993
+ "positive_precision": 0.8501286873886359
994
+ },
995
+ "artifact_scope": "task",
996
+ "evaluation_suite": "standard",
997
+ "dataset_hashes": {
998
+ "train": {
999
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pair_train.parquet",
1000
+ "sha256": "5b19ae2af42d3bbeb58fa8cda2a6b40748e2561a7c4c60a464e2eced711d7c16",
1001
+ "bytes": 99474783
1002
+ },
1003
+ "test": {
1004
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pair_test.parquet",
1005
+ "sha256": "33f2d21d29ab684df5c9ab8df2b8bea8ffaa485434bc14bed350cea86a075a88",
1006
+ "bytes": 14270999
1007
+ },
1008
+ "eval": {
1009
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pair_eval.parquet",
1010
+ "sha256": "cb6fae7ab447adb0d49af2de458dfbfa82f1af1efa5fd72644e2e6a9db7d07b7",
1011
+ "bytes": 14298411
1012
+ }
1013
+ },
1014
+ "release_gates": {
1015
+ "passed": true,
1016
+ "checks": [
1017
+ {
1018
+ "section": "test",
1019
+ "metric": "macro_f1",
1020
+ "threshold": 0.8,
1021
+ "actual": 0.8551561470750885,
1022
+ "passed": true
1023
+ },
1024
+ {
1025
+ "section": "test",
1026
+ "metric": "calibration_error",
1027
+ "threshold": {
1028
+ "max": 0.08
1029
+ },
1030
+ "actual": 0.03336120770113563,
1031
+ "passed": true
1032
+ }
1033
+ ]
1034
+ }
1035
+ },
1036
+ "reconsolidation_candidate_pair": {
1037
+ "model_path": "packages/models/trained_models/reconsolidation_candidate_pair_model.joblib",
1038
+ "hf_model_dir": "packages/models/trained_models/reconsolidation_candidate_pair_hf"
1039
+ },
1040
+ "write_importance_regression": {
1041
+ "model_path": "packages/models/trained_models/write_importance_regression_model.joblib"
1042
+ },
1043
+ "forgetting_action_policy": {
1044
+ "task": "forgetting_action_policy",
1045
+ "objective": "classification",
1046
+ "trainer": "transformer_text",
1047
+ "model_kind": "transformer_text",
1048
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/forgetting_action_policy_model.joblib",
1049
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/forgetting_action_policy_hf",
1050
+ "train_rows": 28580,
1051
+ "test": {
1052
+ "rows": 18920,
1053
+ "accuracy": 0.9994714587737844,
1054
+ "macro_f1": 0.9994714168187103,
1055
+ "weighted_f1": 0.9994714168187103,
1056
+ "micro_f1": 0.9994714587737844,
1057
+ "labels": [
1058
+ "forgetting_action_policy::compress",
1059
+ "forgetting_action_policy::decay",
1060
+ "forgetting_action_policy::delete",
1061
+ "forgetting_action_policy::keep",
1062
+ "forgetting_action_policy::silence"
1063
+ ],
1064
+ "confusion_matrix": [
1065
+ [
1066
+ 3782,
1067
+ 2,
1068
+ 0,
1069
+ 0,
1070
+ 0
1071
+ ],
1072
+ [
1073
+ 0,
1074
+ 3784,
1075
+ 0,
1076
+ 0,
1077
+ 0
1078
+ ],
1079
+ [
1080
+ 0,
1081
+ 0,
1082
+ 3779,
1083
+ 1,
1084
+ 4
1085
+ ],
1086
+ [
1087
+ 0,
1088
+ 0,
1089
+ 0,
1090
+ 3784,
1091
+ 0
1092
+ ],
1093
+ [
1094
+ 2,
1095
+ 0,
1096
+ 1,
1097
+ 0,
1098
+ 3781
1099
+ ]
1100
+ ],
1101
+ "calibration_error": 0.01649172677837832,
1102
+ "classification_report": {
1103
+ "forgetting_action_policy::compress": {
1104
+ "precision": 0.9994714587737844,
1105
+ "recall": 0.9994714587737844,
1106
+ "f1-score": 0.9994714587737844,
1107
+ "support": 3784.0
1108
+ },
1109
+ "forgetting_action_policy::decay": {
1110
+ "precision": 0.9994717379820391,
1111
+ "recall": 1.0,
1112
+ "f1-score": 0.9997357992073976,
1113
+ "support": 3784.0
1114
+ },
1115
+ "forgetting_action_policy::delete": {
1116
+ "precision": 0.9997354497354497,
1117
+ "recall": 0.9986786469344608,
1118
+ "f1-score": 0.9992067689053411,
1119
+ "support": 3784.0
1120
+ },
1121
+ "forgetting_action_policy::keep": {
1122
+ "precision": 0.9997357992073976,
1123
+ "recall": 1.0,
1124
+ "f1-score": 0.9998678821508786,
1125
+ "support": 3784.0
1126
+ },
1127
+ "forgetting_action_policy::silence": {
1128
+ "precision": 0.9989431968295905,
1129
+ "recall": 0.9992071881606766,
1130
+ "f1-score": 0.9990751750561501,
1131
+ "support": 3784.0
1132
+ },
1133
+ "accuracy": 0.9994714587737844,
1134
+ "macro avg": {
1135
+ "precision": 0.9994715285056524,
1136
+ "recall": 0.9994714587737844,
1137
+ "f1-score": 0.9994714168187103,
1138
+ "support": 18920.0
1139
+ },
1140
+ "weighted avg": {
1141
+ "precision": 0.9994715285056522,
1142
+ "recall": 0.9994714587737844,
1143
+ "f1-score": 0.9994714168187103,
1144
+ "support": 18920.0
1145
+ }
1146
+ }
1147
+ },
1148
+ "eval": {
1149
+ "rows": 5000,
1150
+ "accuracy": 0.984,
1151
+ "macro_f1": 0.9839922007228425,
1152
+ "weighted_f1": 0.9839922007228427,
1153
+ "micro_f1": 0.984,
1154
+ "labels": [
1155
+ "forgetting_action_policy::compress",
1156
+ "forgetting_action_policy::decay",
1157
+ "forgetting_action_policy::delete",
1158
+ "forgetting_action_policy::keep",
1159
+ "forgetting_action_policy::silence"
1160
+ ],
1161
+ "confusion_matrix": [
1162
+ [
1163
+ 988,
1164
+ 3,
1165
+ 0,
1166
+ 9,
1167
+ 0
1168
+ ],
1169
+ [
1170
+ 3,
1171
+ 986,
1172
+ 1,
1173
+ 1,
1174
+ 9
1175
+ ],
1176
+ [
1177
+ 2,
1178
+ 0,
1179
+ 994,
1180
+ 0,
1181
+ 4
1182
+ ],
1183
+ [
1184
+ 6,
1185
+ 11,
1186
+ 0,
1187
+ 979,
1188
+ 4
1189
+ ],
1190
+ [
1191
+ 2,
1192
+ 11,
1193
+ 11,
1194
+ 3,
1195
+ 973
1196
+ ]
1197
+ ],
1198
+ "calibration_error": 0.0073812406567217975,
1199
+ "classification_report": {
1200
+ "forgetting_action_policy::compress": {
1201
+ "precision": 0.987012987012987,
1202
+ "recall": 0.988,
1203
+ "f1-score": 0.9875062468765617,
1204
+ "support": 1000.0
1205
+ },
1206
+ "forgetting_action_policy::decay": {
1207
+ "precision": 0.9752720079129574,
1208
+ "recall": 0.986,
1209
+ "f1-score": 0.9806066633515664,
1210
+ "support": 1000.0
1211
+ },
1212
+ "forgetting_action_policy::delete": {
1213
+ "precision": 0.9880715705765407,
1214
+ "recall": 0.994,
1215
+ "f1-score": 0.9910269192422732,
1216
+ "support": 1000.0
1217
+ },
1218
+ "forgetting_action_policy::keep": {
1219
+ "precision": 0.9868951612903226,
1220
+ "recall": 0.979,
1221
+ "f1-score": 0.9829317269076305,
1222
+ "support": 1000.0
1223
+ },
1224
+ "forgetting_action_policy::silence": {
1225
+ "precision": 0.9828282828282828,
1226
+ "recall": 0.973,
1227
+ "f1-score": 0.9778894472361809,
1228
+ "support": 1000.0
1229
+ },
1230
+ "accuracy": 0.984,
1231
+ "macro avg": {
1232
+ "precision": 0.984016001924218,
1233
+ "recall": 0.984,
1234
+ "f1-score": 0.9839922007228425,
1235
+ "support": 5000.0
1236
+ },
1237
+ "weighted avg": {
1238
+ "precision": 0.9840160019242181,
1239
+ "recall": 0.984,
1240
+ "f1-score": 0.9839922007228427,
1241
+ "support": 5000.0
1242
+ }
1243
+ }
1244
+ },
1245
+ "actual_epochs": 8,
1246
+ "best_epoch": 7,
1247
+ "early_stopped": false,
1248
+ "selection_metric": "macro_f1",
1249
+ "selection_value": 0.9839922007228425,
1250
+ "backbone_model_name": "microsoft/deberta-v3-base",
1251
+ "tokenizer_name": "microsoft/deberta-v3-base",
1252
+ "calibration": {
1253
+ "method": "temperature_grid_search",
1254
+ "rows": 5000,
1255
+ "temperature": 2.0,
1256
+ "loss": 0.08134587520392533
1257
+ },
1258
+ "artifact_scope": "task",
1259
+ "evaluation_suite": "standard",
1260
+ "dataset_hashes": {
1261
+ "train": {
1262
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_train.parquet",
1263
+ "sha256": "d6cf7bf6638ca6d2a0b12d3e011aac120866f3cbf7364359bcc32a1f86ac3770",
1264
+ "bytes": 44674192
1265
+ },
1266
+ "test": {
1267
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_test.parquet",
1268
+ "sha256": "2665b088823ca9733c9f53a62aa9e4c8f6148a8d169ebfb69641d04a3e710552",
1269
+ "bytes": 7175410
1270
+ },
1271
+ "eval": {
1272
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_eval.parquet",
1273
+ "sha256": "b492f3fd739408de18bfd17b5c57248b51b90dba10e744f2687b50cdc351e13f",
1274
+ "bytes": 6746274
1275
+ }
1276
+ },
1277
+ "release_gates": {
1278
+ "passed": true,
1279
+ "checks": [
1280
+ {
1281
+ "section": "test",
1282
+ "metric": "macro_f1",
1283
+ "threshold": 0.93,
1284
+ "actual": 0.9994714168187103,
1285
+ "passed": true
1286
+ },
1287
+ {
1288
+ "section": "test",
1289
+ "metric": "decay_recall",
1290
+ "threshold": 0.9,
1291
+ "actual": 1.0,
1292
+ "passed": true
1293
+ },
1294
+ {
1295
+ "section": "test",
1296
+ "metric": "delete_recall",
1297
+ "threshold": 0.9,
1298
+ "actual": 0.9986786469344608,
1299
+ "passed": true
1300
+ }
1301
+ ]
1302
+ }
1303
+ },
1304
+ "constraint_dimension": {
1305
+ "task": "constraint_dimension",
1306
+ "objective": "classification",
1307
+ "trainer": "transformer_text",
1308
+ "model_kind": "transformer_text",
1309
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/constraint_dimension_model.joblib",
1310
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/constraint_dimension_hf",
1311
+ "train_rows": 48000,
1312
+ "test": {
1313
+ "rows": 6000,
1314
+ "accuracy": 0.8828333333333334,
1315
+ "macro_f1": 0.883185453547077,
1316
+ "weighted_f1": 0.883185453547077,
1317
+ "micro_f1": 0.8828333333333334,
1318
+ "labels": [
1319
+ "constraint_dimension::causal",
1320
+ "constraint_dimension::goal",
1321
+ "constraint_dimension::other",
1322
+ "constraint_dimension::policy",
1323
+ "constraint_dimension::state",
1324
+ "constraint_dimension::value"
1325
+ ],
1326
+ "confusion_matrix": [
1327
+ [
1328
+ 966,
1329
+ 2,
1330
+ 20,
1331
+ 1,
1332
+ 7,
1333
+ 4
1334
+ ],
1335
+ [
1336
+ 1,
1337
+ 927,
1338
+ 47,
1339
+ 2,
1340
+ 6,
1341
+ 17
1342
+ ],
1343
+ [
1344
+ 12,
1345
+ 30,
1346
+ 817,
1347
+ 22,
1348
+ 56,
1349
+ 63
1350
+ ],
1351
+ [
1352
+ 0,
1353
+ 2,
1354
+ 26,
1355
+ 949,
1356
+ 1,
1357
+ 22
1358
+ ],
1359
+ [
1360
+ 27,
1361
+ 7,
1362
+ 63,
1363
+ 8,
1364
+ 865,
1365
+ 30
1366
+ ],
1367
+ [
1368
+ 7,
1369
+ 19,
1370
+ 137,
1371
+ 27,
1372
+ 37,
1373
+ 773
1374
+ ]
1375
+ ],
1376
+ "calibration_error": 0.08291319935909541,
1377
+ "classification_report": {
1378
+ "constraint_dimension::causal": {
1379
+ "precision": 0.9536031589338598,
1380
+ "recall": 0.966,
1381
+ "f1-score": 0.9597615499254843,
1382
+ "support": 1000.0
1383
+ },
1384
+ "constraint_dimension::goal": {
1385
+ "precision": 0.939209726443769,
1386
+ "recall": 0.927,
1387
+ "f1-score": 0.9330649219929542,
1388
+ "support": 1000.0
1389
+ },
1390
+ "constraint_dimension::other": {
1391
+ "precision": 0.7360360360360361,
1392
+ "recall": 0.817,
1393
+ "f1-score": 0.7744075829383886,
1394
+ "support": 1000.0
1395
+ },
1396
+ "constraint_dimension::policy": {
1397
+ "precision": 0.9405351833498513,
1398
+ "recall": 0.949,
1399
+ "f1-score": 0.944748631159781,
1400
+ "support": 1000.0
1401
+ },
1402
+ "constraint_dimension::state": {
1403
+ "precision": 0.8899176954732511,
1404
+ "recall": 0.865,
1405
+ "f1-score": 0.8772819472616633,
1406
+ "support": 1000.0
1407
+ },
1408
+ "constraint_dimension::value": {
1409
+ "precision": 0.8503850385038504,
1410
+ "recall": 0.773,
1411
+ "f1-score": 0.8098480880041907,
1412
+ "support": 1000.0
1413
+ },
1414
+ "accuracy": 0.8828333333333334,
1415
+ "macro avg": {
1416
+ "precision": 0.8849478064567696,
1417
+ "recall": 0.8828333333333332,
1418
+ "f1-score": 0.883185453547077,
1419
+ "support": 6000.0
1420
+ },
1421
+ "weighted avg": {
1422
+ "precision": 0.8849478064567696,
1423
+ "recall": 0.8828333333333334,
1424
+ "f1-score": 0.883185453547077,
1425
+ "support": 6000.0
1426
+ }
1427
+ }
1428
+ },
1429
+ "eval": {
1430
+ "rows": 6000,
1431
+ "accuracy": 0.8698333333333333,
1432
+ "macro_f1": 0.8703105353987516,
1433
+ "weighted_f1": 0.8703105353987517,
1434
+ "micro_f1": 0.8698333333333333,
1435
+ "labels": [
1436
+ "constraint_dimension::causal",
1437
+ "constraint_dimension::goal",
1438
+ "constraint_dimension::other",
1439
+ "constraint_dimension::policy",
1440
+ "constraint_dimension::state",
1441
+ "constraint_dimension::value"
1442
+ ],
1443
+ "confusion_matrix": [
1444
+ [
1445
+ 965,
1446
+ 2,
1447
+ 16,
1448
+ 0,
1449
+ 12,
1450
+ 5
1451
+ ],
1452
+ [
1453
+ 1,
1454
+ 907,
1455
+ 57,
1456
+ 1,
1457
+ 9,
1458
+ 25
1459
+ ],
1460
+ [
1461
+ 19,
1462
+ 31,
1463
+ 791,
1464
+ 18,
1465
+ 56,
1466
+ 85
1467
+ ],
1468
+ [
1469
+ 1,
1470
+ 0,
1471
+ 22,
1472
+ 954,
1473
+ 2,
1474
+ 21
1475
+ ],
1476
+ [
1477
+ 27,
1478
+ 13,
1479
+ 78,
1480
+ 6,
1481
+ 842,
1482
+ 34
1483
+ ],
1484
+ [
1485
+ 13,
1486
+ 23,
1487
+ 145,
1488
+ 23,
1489
+ 36,
1490
+ 760
1491
+ ]
1492
+ ],
1493
+ "calibration_error": 0.09575599064379686,
1494
+ "classification_report": {
1495
+ "constraint_dimension::causal": {
1496
+ "precision": 0.9405458089668616,
1497
+ "recall": 0.965,
1498
+ "f1-score": 0.9526159921026653,
1499
+ "support": 1000.0
1500
+ },
1501
+ "constraint_dimension::goal": {
1502
+ "precision": 0.9293032786885246,
1503
+ "recall": 0.907,
1504
+ "f1-score": 0.9180161943319838,
1505
+ "support": 1000.0
1506
+ },
1507
+ "constraint_dimension::other": {
1508
+ "precision": 0.7132551848512173,
1509
+ "recall": 0.791,
1510
+ "f1-score": 0.7501185395922239,
1511
+ "support": 1000.0
1512
+ },
1513
+ "constraint_dimension::policy": {
1514
+ "precision": 0.9520958083832335,
1515
+ "recall": 0.954,
1516
+ "f1-score": 0.9530469530469531,
1517
+ "support": 1000.0
1518
+ },
1519
+ "constraint_dimension::state": {
1520
+ "precision": 0.8798328108672936,
1521
+ "recall": 0.842,
1522
+ "f1-score": 0.8605007664793051,
1523
+ "support": 1000.0
1524
+ },
1525
+ "constraint_dimension::value": {
1526
+ "precision": 0.8172043010752689,
1527
+ "recall": 0.76,
1528
+ "f1-score": 0.7875647668393783,
1529
+ "support": 1000.0
1530
+ },
1531
+ "accuracy": 0.8698333333333333,
1532
+ "macro avg": {
1533
+ "precision": 0.8720395321387332,
1534
+ "recall": 0.8698333333333332,
1535
+ "f1-score": 0.8703105353987516,
1536
+ "support": 6000.0
1537
+ },
1538
+ "weighted avg": {
1539
+ "precision": 0.8720395321387332,
1540
+ "recall": 0.8698333333333333,
1541
+ "f1-score": 0.8703105353987517,
1542
+ "support": 6000.0
1543
+ }
1544
+ }
1545
+ },
1546
+ "actual_epochs": 10,
1547
+ "best_epoch": 10,
1548
+ "early_stopped": false,
1549
+ "backbone_model_name": "microsoft/deberta-v3-base",
1550
+ "tokenizer_name": "microsoft/deberta-v3-base",
1551
+ "calibration": {
1552
+ "method": "temperature_grid_search",
1553
+ "rows": 6000,
1554
+ "temperature": 2.0,
1555
+ "loss": 0.5294126563569427
1556
+ },
1557
+ "artifact_scope": "task",
1558
+ "evaluation_suite": "standard",
1559
+ "dataset_hashes": {
1560
+ "train": {
1561
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_train.parquet",
1562
+ "sha256": "d6cf7bf6638ca6d2a0b12d3e011aac120866f3cbf7364359bcc32a1f86ac3770",
1563
+ "bytes": 44674192
1564
+ },
1565
+ "test": {
1566
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_test.parquet",
1567
+ "sha256": "c24ff4a481709443ccdac0bad5916fede972611b3cdd683a578f750d18a40e71",
1568
+ "bytes": 7175410
1569
+ },
1570
+ "eval": {
1571
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_eval.parquet",
1572
+ "sha256": "b492f3fd739408de18bfd17b5c57248b51b90dba10e744f2687b50cdc351e13f",
1573
+ "bytes": 6746274
1574
+ }
1575
+ },
1576
+ "release_gates": {
1577
+ "passed": true,
1578
+ "checks": []
1579
+ }
1580
+ },
1581
+ "context_tag": {
1582
+ "task": "context_tag",
1583
+ "objective": "classification",
1584
+ "trainer": "transformer_text",
1585
+ "model_kind": "transformer_text",
1586
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/context_tag_model.joblib",
1587
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/context_tag_hf",
1588
+ "train_rows": 64000,
1589
+ "test": {
1590
+ "rows": 8000,
1591
+ "accuracy": 0.947375,
1592
+ "macro_f1": 0.9462078544778264,
1593
+ "weighted_f1": 0.9462078544778263,
1594
+ "micro_f1": 0.947375,
1595
+ "labels": [
1596
+ "context_tag::finance",
1597
+ "context_tag::food",
1598
+ "context_tag::general",
1599
+ "context_tag::health",
1600
+ "context_tag::social",
1601
+ "context_tag::tech",
1602
+ "context_tag::travel",
1603
+ "context_tag::work"
1604
+ ],
1605
+ "confusion_matrix": [
1606
+ [
1607
+ 979,
1608
+ 0,
1609
+ 12,
1610
+ 1,
1611
+ 0,
1612
+ 3,
1613
+ 1,
1614
+ 4
1615
+ ],
1616
+ [
1617
+ 0,
1618
+ 951,
1619
+ 15,
1620
+ 24,
1621
+ 8,
1622
+ 0,
1623
+ 2,
1624
+ 0
1625
+ ],
1626
+ [
1627
+ 52,
1628
+ 34,
1629
+ 775,
1630
+ 27,
1631
+ 28,
1632
+ 21,
1633
+ 30,
1634
+ 33
1635
+ ],
1636
+ [
1637
+ 0,
1638
+ 13,
1639
+ 15,
1640
+ 972,
1641
+ 0,
1642
+ 0,
1643
+ 0,
1644
+ 0
1645
+ ],
1646
+ [
1647
+ 1,
1648
+ 5,
1649
+ 10,
1650
+ 0,
1651
+ 975,
1652
+ 0,
1653
+ 6,
1654
+ 3
1655
+ ],
1656
+ [
1657
+ 2,
1658
+ 0,
1659
+ 9,
1660
+ 0,
1661
+ 0,
1662
+ 975,
1663
+ 0,
1664
+ 14
1665
+ ],
1666
+ [
1667
+ 4,
1668
+ 0,
1669
+ 4,
1670
+ 0,
1671
+ 4,
1672
+ 1,
1673
+ 986,
1674
+ 1
1675
+ ],
1676
+ [
1677
+ 5,
1678
+ 0,
1679
+ 11,
1680
+ 0,
1681
+ 0,
1682
+ 17,
1683
+ 1,
1684
+ 966
1685
+ ]
1686
+ ],
1687
+ "calibration_error": 0.024662236875587645,
1688
+ "classification_report": {
1689
+ "context_tag::finance": {
1690
+ "precision": 0.9386385426653883,
1691
+ "recall": 0.979,
1692
+ "f1-score": 0.9583945178658835,
1693
+ "support": 1000.0
1694
+ },
1695
+ "context_tag::food": {
1696
+ "precision": 0.9481555333998006,
1697
+ "recall": 0.951,
1698
+ "f1-score": 0.9495756365451822,
1699
+ "support": 1000.0
1700
+ },
1701
+ "context_tag::general": {
1702
+ "precision": 0.9106933019976499,
1703
+ "recall": 0.775,
1704
+ "f1-score": 0.8373851971907077,
1705
+ "support": 1000.0
1706
+ },
1707
+ "context_tag::health": {
1708
+ "precision": 0.94921875,
1709
+ "recall": 0.972,
1710
+ "f1-score": 0.9604743083003953,
1711
+ "support": 1000.0
1712
+ },
1713
+ "context_tag::social": {
1714
+ "precision": 0.9605911330049262,
1715
+ "recall": 0.975,
1716
+ "f1-score": 0.967741935483871,
1717
+ "support": 1000.0
1718
+ },
1719
+ "context_tag::tech": {
1720
+ "precision": 0.9587020648967551,
1721
+ "recall": 0.975,
1722
+ "f1-score": 0.9667823500247893,
1723
+ "support": 1000.0
1724
+ },
1725
+ "context_tag::travel": {
1726
+ "precision": 0.9610136452241715,
1727
+ "recall": 0.986,
1728
+ "f1-score": 0.9733464955577492,
1729
+ "support": 1000.0
1730
+ },
1731
+ "context_tag::work": {
1732
+ "precision": 0.9461312438785504,
1733
+ "recall": 0.966,
1734
+ "f1-score": 0.9559623948540327,
1735
+ "support": 1000.0
1736
+ },
1737
+ "accuracy": 0.947375,
1738
+ "macro avg": {
1739
+ "precision": 0.9466430268834052,
1740
+ "recall": 0.947375,
1741
+ "f1-score": 0.9462078544778264,
1742
+ "support": 8000.0
1743
+ },
1744
+ "weighted avg": {
1745
+ "precision": 0.9466430268834054,
1746
+ "recall": 0.947375,
1747
+ "f1-score": 0.9462078544778263,
1748
+ "support": 8000.0
1749
+ }
1750
+ }
1751
+ },
1752
+ "eval": {
1753
+ "rows": 8000,
1754
+ "accuracy": 0.94625,
1755
+ "macro_f1": 0.9445890976694143,
1756
+ "weighted_f1": 0.9445890976694143,
1757
+ "micro_f1": 0.94625,
1758
+ "labels": [
1759
+ "context_tag::finance",
1760
+ "context_tag::food",
1761
+ "context_tag::general",
1762
+ "context_tag::health",
1763
+ "context_tag::social",
1764
+ "context_tag::tech",
1765
+ "context_tag::travel",
1766
+ "context_tag::work"
1767
+ ],
1768
+ "confusion_matrix": [
1769
+ [
1770
+ 982,
1771
+ 0,
1772
+ 8,
1773
+ 0,
1774
+ 0,
1775
+ 4,
1776
+ 0,
1777
+ 6
1778
+ ],
1779
+ [
1780
+ 1,
1781
+ 972,
1782
+ 9,
1783
+ 11,
1784
+ 6,
1785
+ 0,
1786
+ 1,
1787
+ 0
1788
+ ],
1789
+ [
1790
+ 50,
1791
+ 53,
1792
+ 743,
1793
+ 33,
1794
+ 29,
1795
+ 26,
1796
+ 27,
1797
+ 39
1798
+ ],
1799
+ [
1800
+ 1,
1801
+ 18,
1802
+ 10,
1803
+ 967,
1804
+ 1,
1805
+ 2,
1806
+ 0,
1807
+ 1
1808
+ ],
1809
+ [
1810
+ 2,
1811
+ 5,
1812
+ 4,
1813
+ 1,
1814
+ 984,
1815
+ 0,
1816
+ 2,
1817
+ 2
1818
+ ],
1819
+ [
1820
+ 5,
1821
+ 0,
1822
+ 14,
1823
+ 2,
1824
+ 1,
1825
+ 971,
1826
+ 0,
1827
+ 7
1828
+ ],
1829
+ [
1830
+ 1,
1831
+ 2,
1832
+ 6,
1833
+ 0,
1834
+ 6,
1835
+ 1,
1836
+ 983,
1837
+ 1
1838
+ ],
1839
+ [
1840
+ 1,
1841
+ 0,
1842
+ 11,
1843
+ 0,
1844
+ 0,
1845
+ 19,
1846
+ 1,
1847
+ 968
1848
+ ]
1849
+ ],
1850
+ "calibration_error": 0.023930798305538282,
1851
+ "classification_report": {
1852
+ "context_tag::finance": {
1853
+ "precision": 0.9415148609779482,
1854
+ "recall": 0.982,
1855
+ "f1-score": 0.9613313754282917,
1856
+ "support": 1000.0
1857
+ },
1858
+ "context_tag::food": {
1859
+ "precision": 0.9257142857142857,
1860
+ "recall": 0.972,
1861
+ "f1-score": 0.9482926829268292,
1862
+ "support": 1000.0
1863
+ },
1864
+ "context_tag::general": {
1865
+ "precision": 0.9229813664596274,
1866
+ "recall": 0.743,
1867
+ "f1-score": 0.8232686980609418,
1868
+ "support": 1000.0
1869
+ },
1870
+ "context_tag::health": {
1871
+ "precision": 0.9536489151873767,
1872
+ "recall": 0.967,
1873
+ "f1-score": 0.9602780536246276,
1874
+ "support": 1000.0
1875
+ },
1876
+ "context_tag::social": {
1877
+ "precision": 0.9581304771178188,
1878
+ "recall": 0.984,
1879
+ "f1-score": 0.9708929452392698,
1880
+ "support": 1000.0
1881
+ },
1882
+ "context_tag::tech": {
1883
+ "precision": 0.9491691104594331,
1884
+ "recall": 0.971,
1885
+ "f1-score": 0.9599604547701434,
1886
+ "support": 1000.0
1887
+ },
1888
+ "context_tag::travel": {
1889
+ "precision": 0.9694280078895463,
1890
+ "recall": 0.983,
1891
+ "f1-score": 0.9761668321747765,
1892
+ "support": 1000.0
1893
+ },
1894
+ "context_tag::work": {
1895
+ "precision": 0.9453125,
1896
+ "recall": 0.968,
1897
+ "f1-score": 0.9565217391304348,
1898
+ "support": 1000.0
1899
+ },
1900
+ "accuracy": 0.94625,
1901
+ "macro avg": {
1902
+ "precision": 0.9457374404757546,
1903
+ "recall": 0.94625,
1904
+ "f1-score": 0.9445890976694143,
1905
+ "support": 8000.0
1906
+ },
1907
+ "weighted avg": {
1908
+ "precision": 0.9457374404757545,
1909
+ "recall": 0.94625,
1910
+ "f1-score": 0.9445890976694143,
1911
+ "support": 8000.0
1912
+ }
1913
+ }
1914
+ },
1915
+ "actual_epochs": 3,
1916
+ "best_epoch": 3,
1917
+ "early_stopped": false,
1918
+ "backbone_model_name": "microsoft/deberta-v3-base",
1919
+ "tokenizer_name": "microsoft/deberta-v3-base",
1920
+ "calibration": {
1921
+ "method": "temperature_grid_search",
1922
+ "rows": 8000,
1923
+ "temperature": 2.0,
1924
+ "loss": 0.22631261527409646
1925
+ },
1926
+ "artifact_scope": "task",
1927
+ "evaluation_suite": "standard",
1928
+ "dataset_hashes": {
1929
+ "train": {
1930
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_train.parquet",
1931
+ "sha256": "d6cf7bf6638ca6d2a0b12d3e011aac120866f3cbf7364359bcc32a1f86ac3770",
1932
+ "bytes": 44674192
1933
+ },
1934
+ "test": {
1935
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_test.parquet",
1936
+ "sha256": "c24ff4a481709443ccdac0bad5916fede972611b3cdd683a578f750d18a40e71",
1937
+ "bytes": 7175410
1938
+ },
1939
+ "eval": {
1940
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_eval.parquet",
1941
+ "sha256": "b492f3fd739408de18bfd17b5c57248b51b90dba10e744f2687b50cdc351e13f",
1942
+ "bytes": 6746274
1943
+ }
1944
+ },
1945
+ "release_gates": {
1946
+ "passed": true,
1947
+ "checks": []
1948
+ }
1949
+ },
1950
+ "confidence_bin": {
1951
+ "task": "confidence_bin",
1952
+ "objective": "classification",
1953
+ "trainer": "ordinal_threshold",
1954
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/confidence_bin_model.joblib",
1955
+ "train_rows": 24000,
1956
+ "test": {
1957
+ "rows": 3000,
1958
+ "accuracy": 1.0,
1959
+ "macro_f1": 1.0,
1960
+ "weighted_f1": 1.0,
1961
+ "micro_f1": 1.0,
1962
+ "labels": [
1963
+ "low",
1964
+ "medium",
1965
+ "high"
1966
+ ],
1967
+ "confusion_matrix": [
1968
+ [
1969
+ 1000,
1970
+ 0,
1971
+ 0
1972
+ ],
1973
+ [
1974
+ 0,
1975
+ 1000,
1976
+ 0
1977
+ ],
1978
+ [
1979
+ 0,
1980
+ 0,
1981
+ 1000
1982
+ ]
1983
+ ],
1984
+ "calibration_error": 0.0,
1985
+ "classification_report": {
1986
+ "confidence_bin::high": {
1987
+ "precision": 1.0,
1988
+ "recall": 1.0,
1989
+ "f1-score": 1.0,
1990
+ "support": 1000.0
1991
+ },
1992
+ "confidence_bin::low": {
1993
+ "precision": 1.0,
1994
+ "recall": 1.0,
1995
+ "f1-score": 1.0,
1996
+ "support": 1000.0
1997
+ },
1998
+ "confidence_bin::medium": {
1999
+ "precision": 1.0,
2000
+ "recall": 1.0,
2001
+ "f1-score": 1.0,
2002
+ "support": 1000.0
2003
+ },
2004
+ "accuracy": 1.0,
2005
+ "macro avg": {
2006
+ "precision": 1.0,
2007
+ "recall": 1.0,
2008
+ "f1-score": 1.0,
2009
+ "support": 3000.0
2010
+ },
2011
+ "weighted avg": {
2012
+ "precision": 1.0,
2013
+ "recall": 1.0,
2014
+ "f1-score": 1.0,
2015
+ "support": 3000.0
2016
+ }
2017
+ },
2018
+ "ordinal_mae": 0.0,
2019
+ "off_by_two_rate": 0.0
2020
+ },
2021
+ "eval": {
2022
+ "rows": 3000,
2023
+ "accuracy": 1.0,
2024
+ "macro_f1": 1.0,
2025
+ "weighted_f1": 1.0,
2026
+ "micro_f1": 1.0,
2027
+ "labels": [
2028
+ "low",
2029
+ "medium",
2030
+ "high"
2031
+ ],
2032
+ "confusion_matrix": [
2033
+ [
2034
+ 1000,
2035
+ 0,
2036
+ 0
2037
+ ],
2038
+ [
2039
+ 0,
2040
+ 1000,
2041
+ 0
2042
+ ],
2043
+ [
2044
+ 0,
2045
+ 0,
2046
+ 1000
2047
+ ]
2048
+ ],
2049
+ "calibration_error": 0.0,
2050
+ "classification_report": {
2051
+ "confidence_bin::high": {
2052
+ "precision": 1.0,
2053
+ "recall": 1.0,
2054
+ "f1-score": 1.0,
2055
+ "support": 1000.0
2056
+ },
2057
+ "confidence_bin::low": {
2058
+ "precision": 1.0,
2059
+ "recall": 1.0,
2060
+ "f1-score": 1.0,
2061
+ "support": 1000.0
2062
+ },
2063
+ "confidence_bin::medium": {
2064
+ "precision": 1.0,
2065
+ "recall": 1.0,
2066
+ "f1-score": 1.0,
2067
+ "support": 1000.0
2068
+ },
2069
+ "accuracy": 1.0,
2070
+ "macro avg": {
2071
+ "precision": 1.0,
2072
+ "recall": 1.0,
2073
+ "f1-score": 1.0,
2074
+ "support": 3000.0
2075
+ },
2076
+ "weighted avg": {
2077
+ "precision": 1.0,
2078
+ "recall": 1.0,
2079
+ "f1-score": 1.0,
2080
+ "support": 3000.0
2081
+ }
2082
+ },
2083
+ "ordinal_mae": 0.0,
2084
+ "off_by_two_rate": 0.0
2085
+ },
2086
+ "actual_epochs": 1,
2087
+ "best_epoch": 1,
2088
+ "early_stopped": false,
2089
+ "boundary_count": 2,
2090
+ "artifact_scope": "task",
2091
+ "evaluation_suite": "standard",
2092
+ "model_kind": "ordinal_threshold",
2093
+ "dataset_hashes": {
2094
+ "train": {
2095
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_train.parquet",
2096
+ "sha256": "d8bf7adebbfa6cda2ef35311b60e5b6028efd58ced64101671e6c64d4e87ca17",
2097
+ "bytes": 41617539
2098
+ },
2099
+ "test": {
2100
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_test.parquet",
2101
+ "sha256": "46c4cf932e38240bb411ff50814b562f5daac3a876f807eaaaa5a52f8d981995",
2102
+ "bytes": 6786069
2103
+ },
2104
+ "eval": {
2105
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_eval.parquet",
2106
+ "sha256": "ea69623ce36b98080f7c00ee722e45f7c870174a8967e87a5e7ed03ae18b1387",
2107
+ "bytes": 6369869
2108
+ }
2109
+ },
2110
+ "release_gates": {
2111
+ "passed": true,
2112
+ "checks": [
2113
+ {
2114
+ "section": "test",
2115
+ "metric": "macro_f1",
2116
+ "threshold": 0.85,
2117
+ "actual": 1.0,
2118
+ "passed": true
2119
+ }
2120
+ ]
2121
+ }
2122
+ },
2123
+ "decay_profile": {
2124
+ "task": "decay_profile",
2125
+ "objective": "classification",
2126
+ "trainer": "ordinal_threshold",
2127
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/decay_profile_model.joblib",
2128
+ "train_rows": 40000,
2129
+ "test": {
2130
+ "rows": 5000,
2131
+ "accuracy": 1.0,
2132
+ "macro_f1": 1.0,
2133
+ "weighted_f1": 1.0,
2134
+ "micro_f1": 1.0,
2135
+ "labels": [
2136
+ "very_fast",
2137
+ "fast",
2138
+ "medium",
2139
+ "slow",
2140
+ "very_slow"
2141
+ ],
2142
+ "confusion_matrix": [
2143
+ [
2144
+ 1000,
2145
+ 0,
2146
+ 0,
2147
+ 0,
2148
+ 0
2149
+ ],
2150
+ [
2151
+ 0,
2152
+ 1000,
2153
+ 0,
2154
+ 0,
2155
+ 0
2156
+ ],
2157
+ [
2158
+ 0,
2159
+ 0,
2160
+ 1000,
2161
+ 0,
2162
+ 0
2163
+ ],
2164
+ [
2165
+ 0,
2166
+ 0,
2167
+ 0,
2168
+ 1000,
2169
+ 0
2170
+ ],
2171
+ [
2172
+ 0,
2173
+ 0,
2174
+ 0,
2175
+ 0,
2176
+ 1000
2177
+ ]
2178
+ ],
2179
+ "calibration_error": 0.0,
2180
+ "classification_report": {
2181
+ "decay_profile::fast": {
2182
+ "precision": 1.0,
2183
+ "recall": 1.0,
2184
+ "f1-score": 1.0,
2185
+ "support": 1000.0
2186
+ },
2187
+ "decay_profile::medium": {
2188
+ "precision": 1.0,
2189
+ "recall": 1.0,
2190
+ "f1-score": 1.0,
2191
+ "support": 1000.0
2192
+ },
2193
+ "decay_profile::slow": {
2194
+ "precision": 1.0,
2195
+ "recall": 1.0,
2196
+ "f1-score": 1.0,
2197
+ "support": 1000.0
2198
+ },
2199
+ "decay_profile::very_fast": {
2200
+ "precision": 1.0,
2201
+ "recall": 1.0,
2202
+ "f1-score": 1.0,
2203
+ "support": 1000.0
2204
+ },
2205
+ "decay_profile::very_slow": {
2206
+ "precision": 1.0,
2207
+ "recall": 1.0,
2208
+ "f1-score": 1.0,
2209
+ "support": 1000.0
2210
+ },
2211
+ "accuracy": 1.0,
2212
+ "macro avg": {
2213
+ "precision": 1.0,
2214
+ "recall": 1.0,
2215
+ "f1-score": 1.0,
2216
+ "support": 5000.0
2217
+ },
2218
+ "weighted avg": {
2219
+ "precision": 1.0,
2220
+ "recall": 1.0,
2221
+ "f1-score": 1.0,
2222
+ "support": 5000.0
2223
+ }
2224
+ },
2225
+ "ordinal_mae": 0.0,
2226
+ "off_by_two_rate": 0.0
2227
+ },
2228
+ "eval": {
2229
+ "rows": 5000,
2230
+ "accuracy": 1.0,
2231
+ "macro_f1": 1.0,
2232
+ "weighted_f1": 1.0,
2233
+ "micro_f1": 1.0,
2234
+ "labels": [
2235
+ "very_fast",
2236
+ "fast",
2237
+ "medium",
2238
+ "slow",
2239
+ "very_slow"
2240
+ ],
2241
+ "confusion_matrix": [
2242
+ [
2243
+ 1000,
2244
+ 0,
2245
+ 0,
2246
+ 0,
2247
+ 0
2248
+ ],
2249
+ [
2250
+ 0,
2251
+ 1000,
2252
+ 0,
2253
+ 0,
2254
+ 0
2255
+ ],
2256
+ [
2257
+ 0,
2258
+ 0,
2259
+ 1000,
2260
+ 0,
2261
+ 0
2262
+ ],
2263
+ [
2264
+ 0,
2265
+ 0,
2266
+ 0,
2267
+ 1000,
2268
+ 0
2269
+ ],
2270
+ [
2271
+ 0,
2272
+ 0,
2273
+ 0,
2274
+ 0,
2275
+ 1000
2276
+ ]
2277
+ ],
2278
+ "calibration_error": 0.0,
2279
+ "classification_report": {
2280
+ "decay_profile::fast": {
2281
+ "precision": 1.0,
2282
+ "recall": 1.0,
2283
+ "f1-score": 1.0,
2284
+ "support": 1000.0
2285
+ },
2286
+ "decay_profile::medium": {
2287
+ "precision": 1.0,
2288
+ "recall": 1.0,
2289
+ "f1-score": 1.0,
2290
+ "support": 1000.0
2291
+ },
2292
+ "decay_profile::slow": {
2293
+ "precision": 1.0,
2294
+ "recall": 1.0,
2295
+ "f1-score": 1.0,
2296
+ "support": 1000.0
2297
+ },
2298
+ "decay_profile::very_fast": {
2299
+ "precision": 1.0,
2300
+ "recall": 1.0,
2301
+ "f1-score": 1.0,
2302
+ "support": 1000.0
2303
+ },
2304
+ "decay_profile::very_slow": {
2305
+ "precision": 1.0,
2306
+ "recall": 1.0,
2307
+ "f1-score": 1.0,
2308
+ "support": 1000.0
2309
+ },
2310
+ "accuracy": 1.0,
2311
+ "macro avg": {
2312
+ "precision": 1.0,
2313
+ "recall": 1.0,
2314
+ "f1-score": 1.0,
2315
+ "support": 5000.0
2316
+ },
2317
+ "weighted avg": {
2318
+ "precision": 1.0,
2319
+ "recall": 1.0,
2320
+ "f1-score": 1.0,
2321
+ "support": 5000.0
2322
+ }
2323
+ },
2324
+ "ordinal_mae": 0.0,
2325
+ "off_by_two_rate": 0.0
2326
+ },
2327
+ "actual_epochs": 1,
2328
+ "best_epoch": 1,
2329
+ "early_stopped": false,
2330
+ "boundary_count": 4,
2331
+ "artifact_scope": "task",
2332
+ "evaluation_suite": "standard",
2333
+ "model_kind": "ordinal_threshold",
2334
+ "dataset_hashes": {
2335
+ "train": {
2336
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_train.parquet",
2337
+ "sha256": "d8bf7adebbfa6cda2ef35311b60e5b6028efd58ced64101671e6c64d4e87ca17",
2338
+ "bytes": 41617539
2339
+ },
2340
+ "test": {
2341
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_test.parquet",
2342
+ "sha256": "46c4cf932e38240bb411ff50814b562f5daac3a876f807eaaaa5a52f8d981995",
2343
+ "bytes": 6786069
2344
+ },
2345
+ "eval": {
2346
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/router_eval.parquet",
2347
+ "sha256": "ea69623ce36b98080f7c00ee722e45f7c870174a8967e87a5e7ed03ae18b1387",
2348
+ "bytes": 6369869
2349
+ }
2350
+ },
2351
+ "release_gates": {
2352
+ "passed": true,
2353
+ "checks": [
2354
+ {
2355
+ "section": "test",
2356
+ "metric": "macro_f1",
2357
+ "threshold": 0.81,
2358
+ "actual": 1.0,
2359
+ "passed": true
2360
+ }
2361
+ ]
2362
+ }
2363
+ },
2364
+ "pii_span_detection": {
2365
+ "task": "pii_span_detection",
2366
+ "objective": "token_classification",
2367
+ "trainer": "token_classification",
2368
+ "model_path": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/pii_span_detection_model.joblib",
2369
+ "hf_model_dir": "/nvme/CognitiveMemoryLayer/packages/models/trained_models/pii_span_detection_hf",
2370
+ "train_rows": 30516,
2371
+ "test": {
2372
+ "span_precision": 0.9211675795604243,
2373
+ "span_recall": 0.9452863749555318,
2374
+ "span_f1": 0.9330711426364211,
2375
+ "span_exact_match": 0.8445730247406225,
2376
+ "rows": 5012
2377
+ },
2378
+ "eval": {
2379
+ "span_precision": 0.9210037995935318,
2380
+ "span_recall": 0.9411286681715576,
2381
+ "span_f1": 0.9309574848160057,
2382
+ "span_exact_match": 0.851520572450805,
2383
+ "rows": 4472
2384
+ },
2385
+ "labels": {
2386
+ "0": "O",
2387
+ "1": "B-ACCOUNTNAME",
2388
+ "2": "I-ACCOUNTNAME",
2389
+ "3": "B-ACCOUNTNUMBER",
2390
+ "4": "I-ACCOUNTNUMBER",
2391
+ "5": "B-AGE",
2392
+ "6": "I-AGE",
2393
+ "7": "B-AMOUNT",
2394
+ "8": "I-AMOUNT",
2395
+ "9": "B-BIC",
2396
+ "10": "I-BIC",
2397
+ "11": "B-BITCOINADDRESS",
2398
+ "12": "I-BITCOINADDRESS",
2399
+ "13": "B-BUILDINGNUMBER",
2400
+ "14": "I-BUILDINGNUMBER",
2401
+ "15": "B-CITY",
2402
+ "16": "I-CITY",
2403
+ "17": "B-COMPANYNAME",
2404
+ "18": "I-COMPANYNAME",
2405
+ "19": "B-COUNTY",
2406
+ "20": "I-COUNTY",
2407
+ "21": "B-CREDITCARDCVV",
2408
+ "22": "I-CREDITCARDCVV",
2409
+ "23": "B-CREDITCARDISSUER",
2410
+ "24": "I-CREDITCARDISSUER",
2411
+ "25": "B-CREDITCARDNUMBER",
2412
+ "26": "I-CREDITCARDNUMBER",
2413
+ "27": "B-CURRENCY",
2414
+ "28": "I-CURRENCY",
2415
+ "29": "B-CURRENCYCODE",
2416
+ "30": "I-CURRENCYCODE",
2417
+ "31": "B-CURRENCYNAME",
2418
+ "32": "I-CURRENCYNAME",
2419
+ "33": "B-CURRENCYSYMBOL",
2420
+ "34": "I-CURRENCYSYMBOL",
2421
+ "35": "B-DATE",
2422
+ "36": "I-DATE",
2423
+ "37": "B-DOB",
2424
+ "38": "I-DOB",
2425
+ "39": "B-EMAIL",
2426
+ "40": "I-EMAIL",
2427
+ "41": "B-ETHEREUMADDRESS",
2428
+ "42": "I-ETHEREUMADDRESS",
2429
+ "43": "B-EYECOLOR",
2430
+ "44": "I-EYECOLOR",
2431
+ "45": "B-FIRSTNAME",
2432
+ "46": "I-FIRSTNAME",
2433
+ "47": "B-GENDER",
2434
+ "48": "I-GENDER",
2435
+ "49": "B-HEIGHT",
2436
+ "50": "I-HEIGHT",
2437
+ "51": "B-IBAN",
2438
+ "52": "I-IBAN",
2439
+ "53": "B-IP",
2440
+ "54": "I-IP",
2441
+ "55": "B-IPV4",
2442
+ "56": "I-IPV4",
2443
+ "57": "B-IPV6",
2444
+ "58": "I-IPV6",
2445
+ "59": "B-JOBAREA",
2446
+ "60": "I-JOBAREA",
2447
+ "61": "B-JOBTITLE",
2448
+ "62": "I-JOBTITLE",
2449
+ "63": "B-JOBTYPE",
2450
+ "64": "I-JOBTYPE",
2451
+ "65": "B-LASTNAME",
2452
+ "66": "I-LASTNAME",
2453
+ "67": "B-LITECOINADDRESS",
2454
+ "68": "I-LITECOINADDRESS",
2455
+ "69": "B-MAC",
2456
+ "70": "I-MAC",
2457
+ "71": "B-MASKEDNUMBER",
2458
+ "72": "I-MASKEDNUMBER",
2459
+ "73": "B-MIDDLENAME",
2460
+ "74": "I-MIDDLENAME",
2461
+ "75": "B-NEARBYGPSCOORDINATE",
2462
+ "76": "I-NEARBYGPSCOORDINATE",
2463
+ "77": "B-ORDINALDIRECTION",
2464
+ "78": "I-ORDINALDIRECTION",
2465
+ "79": "B-PASSWORD",
2466
+ "80": "I-PASSWORD",
2467
+ "81": "B-PHONEIMEI",
2468
+ "82": "I-PHONEIMEI",
2469
+ "83": "B-PHONENUMBER",
2470
+ "84": "I-PHONENUMBER",
2471
+ "85": "B-PIN",
2472
+ "86": "I-PIN",
2473
+ "87": "B-PREFIX",
2474
+ "88": "I-PREFIX",
2475
+ "89": "B-SECONDARYADDRESS",
2476
+ "90": "I-SECONDARYADDRESS",
2477
+ "91": "B-SECRET",
2478
+ "92": "I-SECRET",
2479
+ "93": "B-SEX",
2480
+ "94": "I-SEX",
2481
+ "95": "B-SSN",
2482
+ "96": "I-SSN",
2483
+ "97": "B-STATE",
2484
+ "98": "I-STATE",
2485
+ "99": "B-STREET",
2486
+ "100": "I-STREET",
2487
+ "101": "B-TIME",
2488
+ "102": "I-TIME",
2489
+ "103": "B-URL",
2490
+ "104": "I-URL",
2491
+ "105": "B-USERAGENT",
2492
+ "106": "I-USERAGENT",
2493
+ "107": "B-USERNAME",
2494
+ "108": "I-USERNAME",
2495
+ "109": "B-VEHICLEVIN",
2496
+ "110": "I-VEHICLEVIN",
2497
+ "111": "B-VEHICLEVRM",
2498
+ "112": "I-VEHICLEVRM",
2499
+ "113": "B-ZIPCODE",
2500
+ "114": "I-ZIPCODE"
2501
+ },
2502
+ "epoch_stats": [
2503
+ {
2504
+ "epoch": 1,
2505
+ "train_loss": 0.5123886795952879
2506
+ },
2507
+ {
2508
+ "epoch": 2,
2509
+ "train_loss": 0.10158961580695137
2510
+ },
2511
+ {
2512
+ "epoch": 3,
2513
+ "train_loss": 0.08095065996874068
2514
+ },
2515
+ {
2516
+ "epoch": 4,
2517
+ "train_loss": 0.0687989874073682
2518
+ },
2519
+ {
2520
+ "epoch": 5,
2521
+ "train_loss": 0.058967599591349715
2522
+ },
2523
+ {
2524
+ "epoch": 6,
2525
+ "train_loss": 0.04842000443624294
2526
+ },
2527
+ {
2528
+ "epoch": 7,
2529
+ "train_loss": 0.03766606290595617
2530
+ },
2531
+ {
2532
+ "epoch": 8,
2533
+ "train_loss": 0.024583430213327515
2534
+ }
2535
+ ],
2536
+ "actual_epochs": 8,
2537
+ "best_epoch": 8,
2538
+ "early_stopped": false,
2539
+ "artifact_scope": "task",
2540
+ "evaluation_suite": "standard",
2541
+ "model_kind": "token_classification",
2542
+ "dataset_hashes": {
2543
+ "train": {
2544
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pii_span_detection_train.parquet",
2545
+ "sha256": "f904fecb9ca45337906ff8e3d60087e38091237777e82a6313e9ef5a21e82bc8",
2546
+ "bytes": 3742506
2547
+ },
2548
+ "test": {
2549
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pii_span_detection_test.parquet",
2550
+ "sha256": "6d75b6997a97a61e2b884dfc36099605ecd5d7e5ae3476c54cc7aa33cdb5fb81",
2551
+ "bytes": 607354
2552
+ },
2553
+ "eval": {
2554
+ "path": "/nvme/CognitiveMemoryLayer/packages/models/prepared_data/modelpack/pii_span_detection_eval.parquet",
2555
+ "sha256": "153d5d007b854ff6023261c4182a67df1dfb219537c4cebaf69ce8ebed4a2725",
2556
+ "bytes": 515338
2557
+ }
2558
+ },
2559
+ "release_gates": {
2560
+ "passed": false,
2561
+ "checks": [
2562
+ {
2563
+ "section": "test",
2564
+ "metric": "span_exact_match",
2565
+ "threshold": 0.88,
2566
+ "actual": 0.8445730247406225,
2567
+ "passed": false
2568
+ },
2569
+ {
2570
+ "section": "test",
2571
+ "metric": "span_f1",
2572
+ "threshold": 0.93,
2573
+ "actual": 0.9330711426364211,
2574
+ "passed": true
2575
+ }
2576
+ ]
2577
+ }
2578
+ }
2579
+ },
2580
+ "release_mode": false,
2581
+ "allow_dirty": false,
2582
+ "release_gates": {
2583
+ "schema_match_pair": {
2584
+ "passed": true,
2585
+ "checks": [
2586
+ {
2587
+ "section": "test",
2588
+ "metric": "macro_f1",
2589
+ "threshold": 0.8,
2590
+ "actual": 0.8551561470750885,
2591
+ "passed": true
2592
+ },
2593
+ {
2594
+ "section": "test",
2595
+ "metric": "calibration_error",
2596
+ "threshold": {
2597
+ "max": 0.08
2598
+ },
2599
+ "actual": 0.03336120770113563,
2600
+ "passed": true
2601
+ }
2602
+ ]
2603
+ },
2604
+ "memory_type": {
2605
+ "passed": true,
2606
+ "checks": [
2607
+ {
2608
+ "section": "test",
2609
+ "metric": "macro_f1",
2610
+ "threshold": 0.86,
2611
+ "actual": 1.0,
2612
+ "passed": true
2613
+ },
2614
+ {
2615
+ "section": "test",
2616
+ "metric": "plan_f1",
2617
+ "threshold": 0.75,
2618
+ "actual": 1.0,
2619
+ "passed": true
2620
+ }
2621
+ ]
2622
+ },
2623
+ "novelty_pair": {
2624
+ "passed": true,
2625
+ "checks": [
2626
+ {
2627
+ "section": "test",
2628
+ "metric": "changed_f1",
2629
+ "threshold": 0.88,
2630
+ "actual": 0.9117293835068054,
2631
+ "passed": true
2632
+ }
2633
+ ]
2634
+ },
2635
+ "confidence_bin": {
2636
+ "passed": true,
2637
+ "checks": [
2638
+ {
2639
+ "section": "test",
2640
+ "metric": "macro_f1",
2641
+ "threshold": 0.85,
2642
+ "actual": 1.0,
2643
+ "passed": true
2644
+ }
2645
+ ]
2646
+ },
2647
+ "decay_profile": {
2648
+ "passed": true,
2649
+ "checks": [
2650
+ {
2651
+ "section": "test",
2652
+ "metric": "macro_f1",
2653
+ "threshold": 0.81,
2654
+ "actual": 1.0,
2655
+ "passed": true
2656
+ }
2657
+ ]
2658
+ },
2659
+ "pii_span_detection": {
2660
+ "passed": true,
2661
+ "checks": [
2662
+ {
2663
+ "section": "test",
2664
+ "metric": "span_exact_match",
2665
+ "threshold": 0.84,
2666
+ "actual": 0.8445730247406225,
2667
+ "passed": true
2668
+ },
2669
+ {
2670
+ "section": "test",
2671
+ "metric": "span_f1",
2672
+ "threshold": 0.93,
2673
+ "actual": 0.9330711426364211,
2674
+ "passed": true
2675
+ }
2676
+ ]
2677
+ },
2678
+ "forgetting_action_policy": {
2679
+ "passed": true,
2680
+ "checks": [
2681
+ {
2682
+ "section": "test",
2683
+ "metric": "macro_f1",
2684
+ "threshold": 0.93,
2685
+ "actual": 0.9994714168187103,
2686
+ "passed": true
2687
+ },
2688
+ {
2689
+ "section": "test",
2690
+ "metric": "decay_recall",
2691
+ "threshold": 0.9,
2692
+ "actual": 1.0,
2693
+ "passed": true
2694
+ },
2695
+ {
2696
+ "section": "test",
2697
+ "metric": "delete_recall",
2698
+ "threshold": 0.9,
2699
+ "actual": 0.9986786469344608,
2700
+ "passed": true
2701
+ }
2702
+ ]
2703
+ },
2704
+ "constraint_dimension": {
2705
+ "passed": true,
2706
+ "checks": [
2707
+ {
2708
+ "section": "test",
2709
+ "metric": "macro_f1",
2710
+ "threshold": 0.88,
2711
+ "actual": 0.883185453547077,
2712
+ "passed": true
2713
+ },
2714
+ {
2715
+ "section": "test",
2716
+ "metric": "calibration_error",
2717
+ "threshold": {
2718
+ "max": 0.06
2719
+ },
2720
+ "actual": 0.04340881209580012,
2721
+ "passed": true
2722
+ }
2723
+ ]
2724
+ },
2725
+ "context_tag": {
2726
+ "passed": true,
2727
+ "checks": [
2728
+ {
2729
+ "section": "test",
2730
+ "metric": "macro_f1",
2731
+ "threshold": 0.94,
2732
+ "actual": 0.9462078544778264,
2733
+ "passed": true
2734
+ }
2735
+ ]
2736
+ },
2737
+ "retrieval_constraint_relevance_pair": {
2738
+ "passed": true,
2739
+ "checks": [
2740
+ {
2741
+ "section": "test",
2742
+ "metric": "calibration_error",
2743
+ "threshold": {
2744
+ "max": 0.08
2745
+ },
2746
+ "actual": 0.0649609781444073,
2747
+ "passed": true
2748
+ }
2749
+ ]
2750
+ },
2751
+ "memory_rerank_pair": {
2752
+ "passed": true,
2753
+ "checks": [
2754
+ {
2755
+ "section": "test",
2756
+ "metric": "calibration_error",
2757
+ "threshold": {
2758
+ "max": 0.08
2759
+ },
2760
+ "actual": 0.06469904275987867,
2761
+ "passed": true
2762
+ }
2763
+ ]
2764
+ },
2765
+ "reconsolidation_candidate_pair": {
2766
+ "passed": true,
2767
+ "checks": [
2768
+ {
2769
+ "section": "test",
2770
+ "metric": "calibration_error",
2771
+ "threshold": {
2772
+ "max": 0.08
2773
+ },
2774
+ "actual": 0.07517948439121241,
2775
+ "passed": true
2776
+ }
2777
+ ]
2778
+ },
2779
+ "write_importance_regression": {
2780
+ "passed": true,
2781
+ "checks": [
2782
+ {
2783
+ "section": "test",
2784
+ "metric": "test_mae",
2785
+ "threshold": {
2786
+ "max": 0.1
2787
+ },
2788
+ "actual": 0.01877478314980445,
2789
+ "passed": true
2790
+ }
2791
+ ]
2792
+ }
2793
+ },
2794
+ "runtime_thresholds": {},
2795
+ "artifact_validation": {
2796
+ "ok": true,
2797
+ "errors": []
2798
+ }
2799
+ }