Marcos12886 commited on
Commit
38513ec
1 Parent(s): e956955

Subir modelo definitivo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +72 -3
  2. checkpoint-100/config.json +85 -0
  3. checkpoint-100/model.safetensors +3 -0
  4. checkpoint-100/optimizer.pt +3 -0
  5. checkpoint-100/rng_state.pth +3 -0
  6. checkpoint-100/scheduler.pt +3 -0
  7. checkpoint-100/trainer_state.json +132 -0
  8. checkpoint-100/training_args.bin +3 -0
  9. checkpoint-103/config.json +85 -0
  10. checkpoint-103/model.safetensors +3 -0
  11. checkpoint-103/optimizer.pt +3 -0
  12. checkpoint-103/rng_state.pth +3 -0
  13. checkpoint-103/scheduler.pt +3 -0
  14. checkpoint-103/trainer_state.json +907 -0
  15. checkpoint-103/training_args.bin +3 -0
  16. checkpoint-108/config.json +85 -0
  17. checkpoint-108/model.safetensors +3 -0
  18. checkpoint-108/optimizer.pt +3 -0
  19. checkpoint-108/rng_state.pth +3 -0
  20. checkpoint-108/scheduler.pt +3 -0
  21. checkpoint-108/trainer_state.json +945 -0
  22. checkpoint-108/training_args.bin +3 -0
  23. checkpoint-112/config.json +85 -0
  24. checkpoint-112/model.safetensors +3 -0
  25. checkpoint-112/optimizer.pt +3 -0
  26. checkpoint-112/rng_state.pth +3 -0
  27. checkpoint-112/scheduler.pt +3 -0
  28. checkpoint-112/trainer_state.json +983 -0
  29. checkpoint-112/training_args.bin +3 -0
  30. checkpoint-117/config.json +85 -0
  31. checkpoint-117/model.safetensors +3 -0
  32. checkpoint-117/optimizer.pt +3 -0
  33. checkpoint-117/rng_state.pth +3 -0
  34. checkpoint-117/scheduler.pt +3 -0
  35. checkpoint-117/trainer_state.json +1021 -0
  36. checkpoint-117/training_args.bin +3 -0
  37. checkpoint-121/config.json +85 -0
  38. checkpoint-121/model.safetensors +3 -0
  39. checkpoint-121/optimizer.pt +3 -0
  40. checkpoint-121/rng_state.pth +3 -0
  41. checkpoint-121/scheduler.pt +3 -0
  42. checkpoint-121/trainer_state.json +1059 -0
  43. checkpoint-121/training_args.bin +3 -0
  44. checkpoint-126/config.json +85 -0
  45. checkpoint-126/model.safetensors +3 -0
  46. checkpoint-126/optimizer.pt +3 -0
  47. checkpoint-126/rng_state.pth +3 -0
  48. checkpoint-126/scheduler.pt +3 -0
  49. checkpoint-126/trainer_state.json +1097 -0
  50. checkpoint-126/training_args.bin +3 -0
README.md CHANGED
@@ -1,3 +1,72 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: ntu-spml/distilhubert
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ - f1
10
+ - precision
11
+ - recall
12
+ model-index:
13
+ - name: distilhubert-finetuned-mixed-data
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # distilhubert-finetuned-mixed-data
21
+
22
+ This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 0.8806
25
+ - Accuracy: 0.7912
26
+ - F1: 0.7772
27
+ - Precision: 0.8022
28
+ - Recall: 0.7912
29
+ - Confusion Matrix: [[59, 1, 1, 2], [20, 35, 22, 0], [2, 7, 68, 0], [2, 0, 0, 54]]
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 0.0005
49
+ - train_batch_size: 128
50
+ - eval_batch_size: 128
51
+ - seed: 123
52
+ - gradient_accumulation_steps: 2
53
+ - total_train_batch_size: 256
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: cosine_with_restarts
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 40
58
+ - mixed_precision_training: Native AMP
59
+ - label_smoothing_factor: 0.1
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall | Confusion Matrix |
64
+ |:-------------:|:-------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|:--------------------------------------------------------------:|
65
+ | 0.4221 | 22.2222 | 100 | 0.8806 | 0.7912 | 0.7772 | 0.8022 | 0.7912 | [[59, 1, 1, 2], [20, 35, 22, 0], [2, 7, 68, 0], [2, 0, 0, 54]] |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.44.2
71
+ - Pytorch 2.4.1+cu121
72
+ - Tokenizers 0.19.1
checkpoint-100/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fd7ed3d85f5c5508fdba113cb218b717922e4e2b452800b8b2f7e86ce40400
3
+ size 94765560
checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d52014472918afa1cd037f4d14642147157140d55641d5cd90d54a9b6fd5639
3
+ size 189556666
checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1571753b8b87733ddd5f0d1f9dee2fdc0442f683f5ae2636dd01c78bb407f35a
3
+ size 14308
checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2860514b4d60fa49e8171cee670c64927b3bd5b60ab32b25ae055e67c087f8
3
+ size 1064
checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8240470006184293,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-100",
4
+ "epoch": 22.22222222222222,
5
+ "eval_steps": 50,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 11.11111111111111,
13
+ "grad_norm": 3.250781297683716,
14
+ "learning_rate": 0.00027532317171194046,
15
+ "loss": 0.8288,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 11.11111111111111,
20
+ "eval_accuracy": 0.8131868131868132,
21
+ "eval_confusion_matrix": [
22
+ [
23
+ 60,
24
+ 11,
25
+ 1,
26
+ 7
27
+ ],
28
+ [
29
+ 9,
30
+ 40,
31
+ 17,
32
+ 0
33
+ ],
34
+ [
35
+ 0,
36
+ 6,
37
+ 63,
38
+ 0
39
+ ],
40
+ [
41
+ 0,
42
+ 0,
43
+ 0,
44
+ 59
45
+ ]
46
+ ],
47
+ "eval_f1": 0.8081937118034679,
48
+ "eval_loss": 0.7091822624206543,
49
+ "eval_precision": 0.8110642218193705,
50
+ "eval_recall": 0.8131868131868132,
51
+ "eval_runtime": 3.0071,
52
+ "eval_samples_per_second": 90.786,
53
+ "eval_steps_per_second": 0.998,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 22.22222222222222,
58
+ "grad_norm": 2.9194233417510986,
59
+ "learning_rate": 0.00015290869976577364,
60
+ "loss": 0.4125,
61
+ "step": 100
62
+ },
63
+ {
64
+ "epoch": 22.22222222222222,
65
+ "eval_accuracy": 0.8278388278388278,
66
+ "eval_confusion_matrix": [
67
+ [
68
+ 63,
69
+ 7,
70
+ 2,
71
+ 7
72
+ ],
73
+ [
74
+ 5,
75
+ 41,
76
+ 20,
77
+ 0
78
+ ],
79
+ [
80
+ 0,
81
+ 6,
82
+ 63,
83
+ 0
84
+ ],
85
+ [
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 59
90
+ ]
91
+ ],
92
+ "eval_f1": 0.8240470006184293,
93
+ "eval_loss": 0.8097973465919495,
94
+ "eval_precision": 0.8321827410062705,
95
+ "eval_recall": 0.8278388278388278,
96
+ "eval_runtime": 3.0011,
97
+ "eval_samples_per_second": 90.968,
98
+ "eval_steps_per_second": 1.0,
99
+ "step": 100
100
+ }
101
+ ],
102
+ "logging_steps": 50,
103
+ "max_steps": 180,
104
+ "num_input_tokens_seen": 0,
105
+ "num_train_epochs": 45,
106
+ "save_steps": 50,
107
+ "stateful_callbacks": {
108
+ "EarlyStoppingCallback": {
109
+ "args": {
110
+ "early_stopping_patience": 5,
111
+ "early_stopping_threshold": 0.001
112
+ },
113
+ "attributes": {
114
+ "early_stopping_patience_counter": 0
115
+ }
116
+ },
117
+ "TrainerControl": {
118
+ "args": {
119
+ "should_epoch_stop": false,
120
+ "should_evaluate": false,
121
+ "should_log": false,
122
+ "should_save": true,
123
+ "should_training_stop": false
124
+ },
125
+ "attributes": {}
126
+ }
127
+ },
128
+ "total_flos": 5.511815490816e+16,
129
+ "train_batch_size": 128,
130
+ "trial_name": null,
131
+ "trial_params": null
132
+ }
checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1a597595ff76a9a22ed372d7da43b8f98eac449e4a9a4f3f55925bca85cb26c
3
+ size 5240
checkpoint-103/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-103/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f7d6ad497eba26b3750aa84d3f2f51a6c75d441de28d6e5dffc2ba791ef85cb
3
+ size 94765560
checkpoint-103/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e42ad9a6b8f81c14e7bf08d7fcd25f21c4525b5b243c2ac5dd8007a8ef28d51
3
+ size 189556666
checkpoint-103/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:758935c8432355b6f68b49d4629c80a8b5bf68f490598468627160be3b03f9d5
3
+ size 14308
checkpoint-103/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76735059535a9df7daab319ea81a27335a904e3b4dea492d74215e6e4524b5f3
3
+ size 1064
checkpoint-103/trainer_state.json ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8627769756077204,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-81",
4
+ "epoch": 22.88888888888889,
5
+ "eval_steps": 500,
6
+ "global_step": 103,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ }
885
+ ],
886
+ "logging_steps": 500,
887
+ "max_steps": 180,
888
+ "num_input_tokens_seen": 0,
889
+ "num_train_epochs": 45,
890
+ "save_steps": 500,
891
+ "stateful_callbacks": {
892
+ "TrainerControl": {
893
+ "args": {
894
+ "should_epoch_stop": false,
895
+ "should_evaluate": false,
896
+ "should_log": false,
897
+ "should_save": true,
898
+ "should_training_stop": false
899
+ },
900
+ "attributes": {}
901
+ }
902
+ },
903
+ "total_flos": 5.70148598592e+16,
904
+ "train_batch_size": 128,
905
+ "trial_name": null,
906
+ "trial_params": null
907
+ }
checkpoint-103/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240
checkpoint-108/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-108/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f193969e204945d23169464066234ad545dcdd00180959259c3b0d93230ee00
3
+ size 94765560
checkpoint-108/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7134ec55f60b7455d0f9bbdbb4efc5a3208c1fb45b53f2caa01c4a850e0af82c
3
+ size 189556666
checkpoint-108/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7910a20ae0ec73ddb102bf8728f5fbb2c63ac595644645b40b7095c74e739644
3
+ size 14308
checkpoint-108/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a0f217e1cd7a6ee64cdcab8f02f2a72c157844aab523fbebe978c66b9af419
3
+ size 1064
checkpoint-108/trainer_state.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8627769756077204,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-81",
4
+ "epoch": 24.0,
5
+ "eval_steps": 500,
6
+ "global_step": 108,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ },
885
+ {
886
+ "epoch": 24.0,
887
+ "eval_accuracy": 0.8424908424908425,
888
+ "eval_confusion_matrix": [
889
+ [
890
+ 67,
891
+ 1,
892
+ 0,
893
+ 4
894
+ ],
895
+ [
896
+ 12,
897
+ 44,
898
+ 4,
899
+ 0
900
+ ],
901
+ [
902
+ 0,
903
+ 18,
904
+ 58,
905
+ 0
906
+ ],
907
+ [
908
+ 2,
909
+ 2,
910
+ 0,
911
+ 61
912
+ ]
913
+ ],
914
+ "eval_f1": 0.8431607380967995,
915
+ "eval_loss": 0.752363920211792,
916
+ "eval_precision": 0.8507974885146101,
917
+ "eval_recall": 0.8424908424908425,
918
+ "eval_runtime": 3.0257,
919
+ "eval_samples_per_second": 90.228,
920
+ "eval_steps_per_second": 0.992,
921
+ "step": 108
922
+ }
923
+ ],
924
+ "logging_steps": 500,
925
+ "max_steps": 180,
926
+ "num_input_tokens_seen": 0,
927
+ "num_train_epochs": 45,
928
+ "save_steps": 500,
929
+ "stateful_callbacks": {
930
+ "TrainerControl": {
931
+ "args": {
932
+ "should_epoch_stop": false,
933
+ "should_evaluate": false,
934
+ "should_log": false,
935
+ "should_save": true,
936
+ "should_training_stop": false
937
+ },
938
+ "attributes": {}
939
+ }
940
+ },
941
+ "total_flos": 5.94937668096e+16,
942
+ "train_batch_size": 128,
943
+ "trial_name": null,
944
+ "trial_params": null
945
+ }
checkpoint-108/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240
checkpoint-112/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-112/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55f1a1837a7c7f7a1f7cace5f5cae81f177ce53ed0076b7d4272fc34489ac246
3
+ size 94765560
checkpoint-112/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52cbe97bfd5315f6a8903cf00166e6c8d6bf8f36344f973060f64a4bc98ceeaa
3
+ size 189556666
checkpoint-112/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3933a3c2e5fe0278cc211a89b2796b02c68fc53683a62ae85982e3d2c0703c13
3
+ size 14308
checkpoint-112/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b6a6538f7897eef4a81ad512c8b5f7c4391b82687409a25f8f49aad850597e4
3
+ size 1064
checkpoint-112/trainer_state.json ADDED
@@ -0,0 +1,983 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8627769756077204,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-81",
4
+ "epoch": 24.88888888888889,
5
+ "eval_steps": 500,
6
+ "global_step": 112,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ },
885
+ {
886
+ "epoch": 24.0,
887
+ "eval_accuracy": 0.8424908424908425,
888
+ "eval_confusion_matrix": [
889
+ [
890
+ 67,
891
+ 1,
892
+ 0,
893
+ 4
894
+ ],
895
+ [
896
+ 12,
897
+ 44,
898
+ 4,
899
+ 0
900
+ ],
901
+ [
902
+ 0,
903
+ 18,
904
+ 58,
905
+ 0
906
+ ],
907
+ [
908
+ 2,
909
+ 2,
910
+ 0,
911
+ 61
912
+ ]
913
+ ],
914
+ "eval_f1": 0.8431607380967995,
915
+ "eval_loss": 0.752363920211792,
916
+ "eval_precision": 0.8507974885146101,
917
+ "eval_recall": 0.8424908424908425,
918
+ "eval_runtime": 3.0257,
919
+ "eval_samples_per_second": 90.228,
920
+ "eval_steps_per_second": 0.992,
921
+ "step": 108
922
+ },
923
+ {
924
+ "epoch": 24.88888888888889,
925
+ "eval_accuracy": 0.8498168498168498,
926
+ "eval_confusion_matrix": [
927
+ [
928
+ 66,
929
+ 0,
930
+ 1,
931
+ 5
932
+ ],
933
+ [
934
+ 15,
935
+ 28,
936
+ 17,
937
+ 0
938
+ ],
939
+ [
940
+ 0,
941
+ 2,
942
+ 74,
943
+ 0
944
+ ],
945
+ [
946
+ 1,
947
+ 0,
948
+ 0,
949
+ 64
950
+ ]
951
+ ],
952
+ "eval_f1": 0.8354928653436116,
953
+ "eval_loss": 0.7849779725074768,
954
+ "eval_precision": 0.8621667009790022,
955
+ "eval_recall": 0.8498168498168498,
956
+ "eval_runtime": 2.8819,
957
+ "eval_samples_per_second": 94.73,
958
+ "eval_steps_per_second": 1.041,
959
+ "step": 112
960
+ }
961
+ ],
962
+ "logging_steps": 500,
963
+ "max_steps": 180,
964
+ "num_input_tokens_seen": 0,
965
+ "num_train_epochs": 45,
966
+ "save_steps": 500,
967
+ "stateful_callbacks": {
968
+ "TrainerControl": {
969
+ "args": {
970
+ "should_epoch_stop": false,
971
+ "should_evaluate": false,
972
+ "should_log": false,
973
+ "should_save": true,
974
+ "should_training_stop": false
975
+ },
976
+ "attributes": {}
977
+ }
978
+ },
979
+ "total_flos": 6.197267376e+16,
980
+ "train_batch_size": 128,
981
+ "trial_name": null,
982
+ "trial_params": null
983
+ }
checkpoint-112/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240
checkpoint-117/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-117/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0050367da6fb440bd4b3a6a79626d4b674131390bf26c99429c28c09bcbd87aa
3
+ size 94765560
checkpoint-117/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1028db5ae3b0c3f80cdc19fbb2aacb98fac1fb05527149144068fa098cb632f2
3
+ size 189556666
checkpoint-117/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d855f52eaa4bfddfdd3009c65f5ca86f8146a821538e891832fe491d205d67d9
3
+ size 14308
checkpoint-117/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82bcc86f54463eaec1c2a094b8d233f145e4d8b50ce541ffdcb92e8149a94060
3
+ size 1064
checkpoint-117/trainer_state.json ADDED
@@ -0,0 +1,1021 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8627769756077204,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-81",
4
+ "epoch": 26.0,
5
+ "eval_steps": 500,
6
+ "global_step": 117,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ },
885
+ {
886
+ "epoch": 24.0,
887
+ "eval_accuracy": 0.8424908424908425,
888
+ "eval_confusion_matrix": [
889
+ [
890
+ 67,
891
+ 1,
892
+ 0,
893
+ 4
894
+ ],
895
+ [
896
+ 12,
897
+ 44,
898
+ 4,
899
+ 0
900
+ ],
901
+ [
902
+ 0,
903
+ 18,
904
+ 58,
905
+ 0
906
+ ],
907
+ [
908
+ 2,
909
+ 2,
910
+ 0,
911
+ 61
912
+ ]
913
+ ],
914
+ "eval_f1": 0.8431607380967995,
915
+ "eval_loss": 0.752363920211792,
916
+ "eval_precision": 0.8507974885146101,
917
+ "eval_recall": 0.8424908424908425,
918
+ "eval_runtime": 3.0257,
919
+ "eval_samples_per_second": 90.228,
920
+ "eval_steps_per_second": 0.992,
921
+ "step": 108
922
+ },
923
+ {
924
+ "epoch": 24.88888888888889,
925
+ "eval_accuracy": 0.8498168498168498,
926
+ "eval_confusion_matrix": [
927
+ [
928
+ 66,
929
+ 0,
930
+ 1,
931
+ 5
932
+ ],
933
+ [
934
+ 15,
935
+ 28,
936
+ 17,
937
+ 0
938
+ ],
939
+ [
940
+ 0,
941
+ 2,
942
+ 74,
943
+ 0
944
+ ],
945
+ [
946
+ 1,
947
+ 0,
948
+ 0,
949
+ 64
950
+ ]
951
+ ],
952
+ "eval_f1": 0.8354928653436116,
953
+ "eval_loss": 0.7849779725074768,
954
+ "eval_precision": 0.8621667009790022,
955
+ "eval_recall": 0.8498168498168498,
956
+ "eval_runtime": 2.8819,
957
+ "eval_samples_per_second": 94.73,
958
+ "eval_steps_per_second": 1.041,
959
+ "step": 112
960
+ },
961
+ {
962
+ "epoch": 26.0,
963
+ "eval_accuracy": 0.8131868131868132,
964
+ "eval_confusion_matrix": [
965
+ [
966
+ 62,
967
+ 6,
968
+ 0,
969
+ 4
970
+ ],
971
+ [
972
+ 11,
973
+ 44,
974
+ 5,
975
+ 0
976
+ ],
977
+ [
978
+ 0,
979
+ 24,
980
+ 52,
981
+ 0
982
+ ],
983
+ [
984
+ 1,
985
+ 0,
986
+ 0,
987
+ 64
988
+ ]
989
+ ],
990
+ "eval_f1": 0.8151598256855266,
991
+ "eval_loss": 0.7896661162376404,
992
+ "eval_precision": 0.8297062414709473,
993
+ "eval_recall": 0.8131868131868132,
994
+ "eval_runtime": 2.8953,
995
+ "eval_samples_per_second": 94.291,
996
+ "eval_steps_per_second": 1.036,
997
+ "step": 117
998
+ }
999
+ ],
1000
+ "logging_steps": 500,
1001
+ "max_steps": 180,
1002
+ "num_input_tokens_seen": 0,
1003
+ "num_train_epochs": 45,
1004
+ "save_steps": 500,
1005
+ "stateful_callbacks": {
1006
+ "TrainerControl": {
1007
+ "args": {
1008
+ "should_epoch_stop": false,
1009
+ "should_evaluate": false,
1010
+ "should_log": false,
1011
+ "should_save": true,
1012
+ "should_training_stop": false
1013
+ },
1014
+ "attributes": {}
1015
+ }
1016
+ },
1017
+ "total_flos": 6.44515807104e+16,
1018
+ "train_batch_size": 128,
1019
+ "trial_name": null,
1020
+ "trial_params": null
1021
+ }
checkpoint-117/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240
checkpoint-121/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-121/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00956f5c7a7b4b257696dd508120e7f8c293d6eae2868350246f64214485eec0
3
+ size 94765560
checkpoint-121/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf7362c0ebbcddc9516c4702d04e42283fe14ca75340a0e003469d351fa1c2b
3
+ size 189556666
checkpoint-121/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d46bac788524b95c2eb6a503c884354aedaaf5845a246dad40a99742be55b9f
3
+ size 14308
checkpoint-121/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baac56bdbea76b1ed3b409eadae05b655886a9f39f011f0f648a4eb3fa804071
3
+ size 1064
checkpoint-121/trainer_state.json ADDED
@@ -0,0 +1,1059 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8754390108936493,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
+ "epoch": 26.88888888888889,
5
+ "eval_steps": 500,
6
+ "global_step": 121,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ },
885
+ {
886
+ "epoch": 24.0,
887
+ "eval_accuracy": 0.8424908424908425,
888
+ "eval_confusion_matrix": [
889
+ [
890
+ 67,
891
+ 1,
892
+ 0,
893
+ 4
894
+ ],
895
+ [
896
+ 12,
897
+ 44,
898
+ 4,
899
+ 0
900
+ ],
901
+ [
902
+ 0,
903
+ 18,
904
+ 58,
905
+ 0
906
+ ],
907
+ [
908
+ 2,
909
+ 2,
910
+ 0,
911
+ 61
912
+ ]
913
+ ],
914
+ "eval_f1": 0.8431607380967995,
915
+ "eval_loss": 0.752363920211792,
916
+ "eval_precision": 0.8507974885146101,
917
+ "eval_recall": 0.8424908424908425,
918
+ "eval_runtime": 3.0257,
919
+ "eval_samples_per_second": 90.228,
920
+ "eval_steps_per_second": 0.992,
921
+ "step": 108
922
+ },
923
+ {
924
+ "epoch": 24.88888888888889,
925
+ "eval_accuracy": 0.8498168498168498,
926
+ "eval_confusion_matrix": [
927
+ [
928
+ 66,
929
+ 0,
930
+ 1,
931
+ 5
932
+ ],
933
+ [
934
+ 15,
935
+ 28,
936
+ 17,
937
+ 0
938
+ ],
939
+ [
940
+ 0,
941
+ 2,
942
+ 74,
943
+ 0
944
+ ],
945
+ [
946
+ 1,
947
+ 0,
948
+ 0,
949
+ 64
950
+ ]
951
+ ],
952
+ "eval_f1": 0.8354928653436116,
953
+ "eval_loss": 0.7849779725074768,
954
+ "eval_precision": 0.8621667009790022,
955
+ "eval_recall": 0.8498168498168498,
956
+ "eval_runtime": 2.8819,
957
+ "eval_samples_per_second": 94.73,
958
+ "eval_steps_per_second": 1.041,
959
+ "step": 112
960
+ },
961
+ {
962
+ "epoch": 26.0,
963
+ "eval_accuracy": 0.8131868131868132,
964
+ "eval_confusion_matrix": [
965
+ [
966
+ 62,
967
+ 6,
968
+ 0,
969
+ 4
970
+ ],
971
+ [
972
+ 11,
973
+ 44,
974
+ 5,
975
+ 0
976
+ ],
977
+ [
978
+ 0,
979
+ 24,
980
+ 52,
981
+ 0
982
+ ],
983
+ [
984
+ 1,
985
+ 0,
986
+ 0,
987
+ 64
988
+ ]
989
+ ],
990
+ "eval_f1": 0.8151598256855266,
991
+ "eval_loss": 0.7896661162376404,
992
+ "eval_precision": 0.8297062414709473,
993
+ "eval_recall": 0.8131868131868132,
994
+ "eval_runtime": 2.8953,
995
+ "eval_samples_per_second": 94.291,
996
+ "eval_steps_per_second": 1.036,
997
+ "step": 117
998
+ },
999
+ {
1000
+ "epoch": 26.88888888888889,
1001
+ "eval_accuracy": 0.8791208791208791,
1002
+ "eval_confusion_matrix": [
1003
+ [
1004
+ 66,
1005
+ 1,
1006
+ 1,
1007
+ 4
1008
+ ],
1009
+ [
1010
+ 13,
1011
+ 40,
1012
+ 7,
1013
+ 0
1014
+ ],
1015
+ [
1016
+ 1,
1017
+ 6,
1018
+ 69,
1019
+ 0
1020
+ ],
1021
+ [
1022
+ 0,
1023
+ 0,
1024
+ 0,
1025
+ 65
1026
+ ]
1027
+ ],
1028
+ "eval_f1": 0.8754390108936493,
1029
+ "eval_loss": 0.7321063876152039,
1030
+ "eval_precision": 0.878386849630407,
1031
+ "eval_recall": 0.8791208791208791,
1032
+ "eval_runtime": 2.8941,
1033
+ "eval_samples_per_second": 94.329,
1034
+ "eval_steps_per_second": 1.037,
1035
+ "step": 121
1036
+ }
1037
+ ],
1038
+ "logging_steps": 500,
1039
+ "max_steps": 180,
1040
+ "num_input_tokens_seen": 0,
1041
+ "num_train_epochs": 45,
1042
+ "save_steps": 500,
1043
+ "stateful_callbacks": {
1044
+ "TrainerControl": {
1045
+ "args": {
1046
+ "should_epoch_stop": false,
1047
+ "should_evaluate": false,
1048
+ "should_log": false,
1049
+ "should_save": true,
1050
+ "should_training_stop": false
1051
+ },
1052
+ "attributes": {}
1053
+ }
1054
+ },
1055
+ "total_flos": 6.69304876608e+16,
1056
+ "train_batch_size": 128,
1057
+ "trial_name": null,
1058
+ "trial_params": null
1059
+ }
checkpoint-121/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240
checkpoint-126/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "finetuning_task": "audio-classification",
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_size": 768,
52
+ "id2label": {
53
+ "0": "1s_normal",
54
+ "1": "1s_pain",
55
+ "2": "1s_hunger",
56
+ "3": "1s_asphyxia"
57
+ },
58
+ "initializer_range": 0.02,
59
+ "intermediate_size": 3072,
60
+ "label2id": {
61
+ "LABEL_0": 0,
62
+ "LABEL_1": 1,
63
+ "LABEL_2": 2,
64
+ "LABEL_3": 3
65
+ },
66
+ "layer_norm_eps": 1e-05,
67
+ "layerdrop": 0.0,
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_prob": 0.05,
74
+ "model_type": "hubert",
75
+ "num_attention_heads": 12,
76
+ "num_conv_pos_embedding_groups": 16,
77
+ "num_conv_pos_embeddings": 128,
78
+ "num_feat_extract_layers": 7,
79
+ "num_hidden_layers": 2,
80
+ "pad_token_id": 0,
81
+ "torch_dtype": "float32",
82
+ "transformers_version": "4.44.2",
83
+ "use_weighted_layer_sum": false,
84
+ "vocab_size": 32
85
+ }
checkpoint-126/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06f0d1b8ea136d0eeebfeb45abb3a0bd540dc213dbae5c03e5bf2ebfe926bc7b
3
+ size 94765560
checkpoint-126/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3390789d74ae0285ec7c077478bca5275fd78b948bd51a8210c4e4547eba1ae
3
+ size 189556666
checkpoint-126/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63288390848aaf8c678d1ad2c771d062d656c8caedcb8079034b6b7b2b3c0359
3
+ size 14308
checkpoint-126/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00fa2c49ac4b95860fb0f8bfb07178e641fa9e4de37635dd229a5022d1dcfb6e
3
+ size 1064
checkpoint-126/trainer_state.json ADDED
@@ -0,0 +1,1097 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8754390108936493,
3
+ "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
4
+ "epoch": 28.0,
5
+ "eval_steps": 500,
6
+ "global_step": 126,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8888888888888888,
13
+ "eval_accuracy": 0.63003663003663,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 53,
17
+ 0,
18
+ 16,
19
+ 3
20
+ ],
21
+ [
22
+ 28,
23
+ 1,
24
+ 31,
25
+ 0
26
+ ],
27
+ [
28
+ 13,
29
+ 0,
30
+ 62,
31
+ 1
32
+ ],
33
+ [
34
+ 3,
35
+ 0,
36
+ 6,
37
+ 56
38
+ ]
39
+ ],
40
+ "eval_f1": 0.566693372031096,
41
+ "eval_loss": 1.31327486038208,
42
+ "eval_precision": 0.7361933549293478,
43
+ "eval_recall": 0.63003663003663,
44
+ "eval_runtime": 2.8436,
45
+ "eval_samples_per_second": 96.005,
46
+ "eval_steps_per_second": 1.055,
47
+ "step": 4
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.6593406593406593,
52
+ "eval_confusion_matrix": [
53
+ [
54
+ 41,
55
+ 10,
56
+ 16,
57
+ 5
58
+ ],
59
+ [
60
+ 21,
61
+ 1,
62
+ 38,
63
+ 0
64
+ ],
65
+ [
66
+ 1,
67
+ 2,
68
+ 73,
69
+ 0
70
+ ],
71
+ [
72
+ 0,
73
+ 0,
74
+ 0,
75
+ 65
76
+ ]
77
+ ],
78
+ "eval_f1": 0.595713773130153,
79
+ "eval_loss": 0.9242589473724365,
80
+ "eval_precision": 0.5696509512811321,
81
+ "eval_recall": 0.6593406593406593,
82
+ "eval_runtime": 2.8668,
83
+ "eval_samples_per_second": 95.227,
84
+ "eval_steps_per_second": 1.046,
85
+ "step": 9
86
+ },
87
+ {
88
+ "epoch": 2.888888888888889,
89
+ "eval_accuracy": 0.717948717948718,
90
+ "eval_confusion_matrix": [
91
+ [
92
+ 52,
93
+ 8,
94
+ 7,
95
+ 5
96
+ ],
97
+ [
98
+ 18,
99
+ 5,
100
+ 37,
101
+ 0
102
+ ],
103
+ [
104
+ 1,
105
+ 0,
106
+ 75,
107
+ 0
108
+ ],
109
+ [
110
+ 1,
111
+ 0,
112
+ 0,
113
+ 64
114
+ ]
115
+ ],
116
+ "eval_f1": 0.6621621567125869,
117
+ "eval_loss": 0.8232662081718445,
118
+ "eval_precision": 0.671303801513745,
119
+ "eval_recall": 0.717948717948718,
120
+ "eval_runtime": 2.8711,
121
+ "eval_samples_per_second": 95.084,
122
+ "eval_steps_per_second": 1.045,
123
+ "step": 13
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.6959706959706959,
128
+ "eval_confusion_matrix": [
129
+ [
130
+ 45,
131
+ 9,
132
+ 13,
133
+ 5
134
+ ],
135
+ [
136
+ 13,
137
+ 5,
138
+ 42,
139
+ 0
140
+ ],
141
+ [
142
+ 1,
143
+ 0,
144
+ 75,
145
+ 0
146
+ ],
147
+ [
148
+ 0,
149
+ 0,
150
+ 0,
151
+ 65
152
+ ]
153
+ ],
154
+ "eval_f1": 0.6428796900399459,
155
+ "eval_loss": 0.951453447341919,
156
+ "eval_precision": 0.6613447705829899,
157
+ "eval_recall": 0.6959706959706959,
158
+ "eval_runtime": 2.8813,
159
+ "eval_samples_per_second": 94.75,
160
+ "eval_steps_per_second": 1.041,
161
+ "step": 18
162
+ },
163
+ {
164
+ "epoch": 4.888888888888889,
165
+ "eval_accuracy": 0.6666666666666666,
166
+ "eval_confusion_matrix": [
167
+ [
168
+ 16,
169
+ 49,
170
+ 3,
171
+ 4
172
+ ],
173
+ [
174
+ 1,
175
+ 26,
176
+ 33,
177
+ 0
178
+ ],
179
+ [
180
+ 0,
181
+ 1,
182
+ 75,
183
+ 0
184
+ ],
185
+ [
186
+ 0,
187
+ 0,
188
+ 0,
189
+ 65
190
+ ]
191
+ ],
192
+ "eval_f1": 0.6331541584523206,
193
+ "eval_loss": 1.0080645084381104,
194
+ "eval_precision": 0.7358031394913882,
195
+ "eval_recall": 0.6666666666666666,
196
+ "eval_runtime": 2.9108,
197
+ "eval_samples_per_second": 93.789,
198
+ "eval_steps_per_second": 1.031,
199
+ "step": 22
200
+ },
201
+ {
202
+ "epoch": 6.0,
203
+ "eval_accuracy": 0.652014652014652,
204
+ "eval_confusion_matrix": [
205
+ [
206
+ 66,
207
+ 0,
208
+ 0,
209
+ 6
210
+ ],
211
+ [
212
+ 36,
213
+ 16,
214
+ 8,
215
+ 0
216
+ ],
217
+ [
218
+ 10,
219
+ 35,
220
+ 31,
221
+ 0
222
+ ],
223
+ [
224
+ 0,
225
+ 0,
226
+ 0,
227
+ 65
228
+ ]
229
+ ],
230
+ "eval_f1": 0.6302407955860642,
231
+ "eval_loss": 0.9598046541213989,
232
+ "eval_precision": 0.6636241740077424,
233
+ "eval_recall": 0.652014652014652,
234
+ "eval_runtime": 2.9773,
235
+ "eval_samples_per_second": 91.693,
236
+ "eval_steps_per_second": 1.008,
237
+ "step": 27
238
+ },
239
+ {
240
+ "epoch": 6.888888888888889,
241
+ "eval_accuracy": 0.7692307692307693,
242
+ "eval_confusion_matrix": [
243
+ [
244
+ 66,
245
+ 0,
246
+ 0,
247
+ 6
248
+ ],
249
+ [
250
+ 29,
251
+ 21,
252
+ 10,
253
+ 0
254
+ ],
255
+ [
256
+ 3,
257
+ 15,
258
+ 58,
259
+ 0
260
+ ],
261
+ [
262
+ 0,
263
+ 0,
264
+ 0,
265
+ 65
266
+ ]
267
+ ],
268
+ "eval_f1": 0.7527855586679116,
269
+ "eval_loss": 0.7898163795471191,
270
+ "eval_precision": 0.7612467677056892,
271
+ "eval_recall": 0.7692307692307693,
272
+ "eval_runtime": 2.8918,
273
+ "eval_samples_per_second": 94.404,
274
+ "eval_steps_per_second": 1.037,
275
+ "step": 31
276
+ },
277
+ {
278
+ "epoch": 8.0,
279
+ "eval_accuracy": 0.7802197802197802,
280
+ "eval_confusion_matrix": [
281
+ [
282
+ 66,
283
+ 0,
284
+ 0,
285
+ 6
286
+ ],
287
+ [
288
+ 25,
289
+ 15,
290
+ 19,
291
+ 1
292
+ ],
293
+ [
294
+ 1,
295
+ 6,
296
+ 68,
297
+ 1
298
+ ],
299
+ [
300
+ 1,
301
+ 0,
302
+ 0,
303
+ 64
304
+ ]
305
+ ],
306
+ "eval_f1": 0.7471179200524057,
307
+ "eval_loss": 0.7337484359741211,
308
+ "eval_precision": 0.7733845922309294,
309
+ "eval_recall": 0.7802197802197802,
310
+ "eval_runtime": 2.9006,
311
+ "eval_samples_per_second": 94.119,
312
+ "eval_steps_per_second": 1.034,
313
+ "step": 36
314
+ },
315
+ {
316
+ "epoch": 8.88888888888889,
317
+ "eval_accuracy": 0.7912087912087912,
318
+ "eval_confusion_matrix": [
319
+ [
320
+ 57,
321
+ 8,
322
+ 3,
323
+ 4
324
+ ],
325
+ [
326
+ 13,
327
+ 24,
328
+ 23,
329
+ 0
330
+ ],
331
+ [
332
+ 0,
333
+ 5,
334
+ 71,
335
+ 0
336
+ ],
337
+ [
338
+ 1,
339
+ 0,
340
+ 0,
341
+ 64
342
+ ]
343
+ ],
344
+ "eval_f1": 0.7766564722186922,
345
+ "eval_loss": 0.7148727178573608,
346
+ "eval_precision": 0.7821504483074875,
347
+ "eval_recall": 0.7912087912087912,
348
+ "eval_runtime": 2.8866,
349
+ "eval_samples_per_second": 94.576,
350
+ "eval_steps_per_second": 1.039,
351
+ "step": 40
352
+ },
353
+ {
354
+ "epoch": 10.0,
355
+ "eval_accuracy": 0.8168498168498168,
356
+ "eval_confusion_matrix": [
357
+ [
358
+ 51,
359
+ 8,
360
+ 1,
361
+ 12
362
+ ],
363
+ [
364
+ 10,
365
+ 36,
366
+ 14,
367
+ 0
368
+ ],
369
+ [
370
+ 0,
371
+ 5,
372
+ 71,
373
+ 0
374
+ ],
375
+ [
376
+ 0,
377
+ 0,
378
+ 0,
379
+ 65
380
+ ]
381
+ ],
382
+ "eval_f1": 0.8094336953840884,
383
+ "eval_loss": 0.7574812173843384,
384
+ "eval_precision": 0.8127936625684181,
385
+ "eval_recall": 0.8168498168498168,
386
+ "eval_runtime": 2.8967,
387
+ "eval_samples_per_second": 94.245,
388
+ "eval_steps_per_second": 1.036,
389
+ "step": 45
390
+ },
391
+ {
392
+ "epoch": 10.88888888888889,
393
+ "eval_accuracy": 0.8021978021978022,
394
+ "eval_confusion_matrix": [
395
+ [
396
+ 48,
397
+ 18,
398
+ 2,
399
+ 4
400
+ ],
401
+ [
402
+ 4,
403
+ 41,
404
+ 15,
405
+ 0
406
+ ],
407
+ [
408
+ 0,
409
+ 7,
410
+ 69,
411
+ 0
412
+ ],
413
+ [
414
+ 3,
415
+ 0,
416
+ 1,
417
+ 61
418
+ ]
419
+ ],
420
+ "eval_f1": 0.801525180147331,
421
+ "eval_loss": 0.7140281200408936,
422
+ "eval_precision": 0.8109338936925145,
423
+ "eval_recall": 0.8021978021978022,
424
+ "eval_runtime": 2.917,
425
+ "eval_samples_per_second": 93.589,
426
+ "eval_steps_per_second": 1.028,
427
+ "step": 49
428
+ },
429
+ {
430
+ "epoch": 12.0,
431
+ "eval_accuracy": 0.8534798534798534,
432
+ "eval_confusion_matrix": [
433
+ [
434
+ 56,
435
+ 12,
436
+ 0,
437
+ 4
438
+ ],
439
+ [
440
+ 6,
441
+ 43,
442
+ 11,
443
+ 0
444
+ ],
445
+ [
446
+ 0,
447
+ 6,
448
+ 70,
449
+ 0
450
+ ],
451
+ [
452
+ 1,
453
+ 0,
454
+ 0,
455
+ 64
456
+ ]
457
+ ],
458
+ "eval_f1": 0.8523994617102314,
459
+ "eval_loss": 0.6672152280807495,
460
+ "eval_precision": 0.8540313732642031,
461
+ "eval_recall": 0.8534798534798534,
462
+ "eval_runtime": 2.8969,
463
+ "eval_samples_per_second": 94.238,
464
+ "eval_steps_per_second": 1.036,
465
+ "step": 54
466
+ },
467
+ {
468
+ "epoch": 12.88888888888889,
469
+ "eval_accuracy": 0.8498168498168498,
470
+ "eval_confusion_matrix": [
471
+ [
472
+ 60,
473
+ 8,
474
+ 0,
475
+ 4
476
+ ],
477
+ [
478
+ 8,
479
+ 38,
480
+ 14,
481
+ 0
482
+ ],
483
+ [
484
+ 0,
485
+ 6,
486
+ 70,
487
+ 0
488
+ ],
489
+ [
490
+ 1,
491
+ 0,
492
+ 0,
493
+ 64
494
+ ]
495
+ ],
496
+ "eval_f1": 0.8463270052615757,
497
+ "eval_loss": 0.6432910561561584,
498
+ "eval_precision": 0.8460243715014519,
499
+ "eval_recall": 0.8498168498168498,
500
+ "eval_runtime": 3.0281,
501
+ "eval_samples_per_second": 90.156,
502
+ "eval_steps_per_second": 0.991,
503
+ "step": 58
504
+ },
505
+ {
506
+ "epoch": 14.0,
507
+ "eval_accuracy": 0.8278388278388278,
508
+ "eval_confusion_matrix": [
509
+ [
510
+ 54,
511
+ 13,
512
+ 1,
513
+ 4
514
+ ],
515
+ [
516
+ 7,
517
+ 44,
518
+ 9,
519
+ 0
520
+ ],
521
+ [
522
+ 0,
523
+ 11,
524
+ 65,
525
+ 0
526
+ ],
527
+ [
528
+ 1,
529
+ 1,
530
+ 0,
531
+ 63
532
+ ]
533
+ ],
534
+ "eval_f1": 0.82943590265942,
535
+ "eval_loss": 0.7395206093788147,
536
+ "eval_precision": 0.8350059217447382,
537
+ "eval_recall": 0.8278388278388278,
538
+ "eval_runtime": 2.9869,
539
+ "eval_samples_per_second": 91.399,
540
+ "eval_steps_per_second": 1.004,
541
+ "step": 63
542
+ },
543
+ {
544
+ "epoch": 14.88888888888889,
545
+ "eval_accuracy": 0.8315018315018315,
546
+ "eval_confusion_matrix": [
547
+ [
548
+ 54,
549
+ 13,
550
+ 1,
551
+ 4
552
+ ],
553
+ [
554
+ 10,
555
+ 39,
556
+ 11,
557
+ 0
558
+ ],
559
+ [
560
+ 0,
561
+ 7,
562
+ 69,
563
+ 0
564
+ ],
565
+ [
566
+ 0,
567
+ 0,
568
+ 0,
569
+ 65
570
+ ]
571
+ ],
572
+ "eval_f1": 0.8291811389886823,
573
+ "eval_loss": 0.7115849852561951,
574
+ "eval_precision": 0.829244108966536,
575
+ "eval_recall": 0.8315018315018315,
576
+ "eval_runtime": 2.9616,
577
+ "eval_samples_per_second": 92.181,
578
+ "eval_steps_per_second": 1.013,
579
+ "step": 67
580
+ },
581
+ {
582
+ "epoch": 16.0,
583
+ "eval_accuracy": 0.8315018315018315,
584
+ "eval_confusion_matrix": [
585
+ [
586
+ 60,
587
+ 7,
588
+ 1,
589
+ 4
590
+ ],
591
+ [
592
+ 10,
593
+ 39,
594
+ 11,
595
+ 0
596
+ ],
597
+ [
598
+ 0,
599
+ 11,
600
+ 65,
601
+ 0
602
+ ],
603
+ [
604
+ 1,
605
+ 1,
606
+ 0,
607
+ 63
608
+ ]
609
+ ],
610
+ "eval_f1": 0.8304073820984628,
611
+ "eval_loss": 0.7295921444892883,
612
+ "eval_precision": 0.8295426562258641,
613
+ "eval_recall": 0.8315018315018315,
614
+ "eval_runtime": 2.8758,
615
+ "eval_samples_per_second": 94.932,
616
+ "eval_steps_per_second": 1.043,
617
+ "step": 72
618
+ },
619
+ {
620
+ "epoch": 16.88888888888889,
621
+ "eval_accuracy": 0.8644688644688645,
622
+ "eval_confusion_matrix": [
623
+ [
624
+ 62,
625
+ 5,
626
+ 1,
627
+ 4
628
+ ],
629
+ [
630
+ 9,
631
+ 37,
632
+ 14,
633
+ 0
634
+ ],
635
+ [
636
+ 0,
637
+ 3,
638
+ 73,
639
+ 0
640
+ ],
641
+ [
642
+ 1,
643
+ 0,
644
+ 0,
645
+ 64
646
+ ]
647
+ ],
648
+ "eval_f1": 0.8589767100678526,
649
+ "eval_loss": 0.7055637240409851,
650
+ "eval_precision": 0.8628397746044805,
651
+ "eval_recall": 0.8644688644688645,
652
+ "eval_runtime": 3.074,
653
+ "eval_samples_per_second": 88.809,
654
+ "eval_steps_per_second": 0.976,
655
+ "step": 76
656
+ },
657
+ {
658
+ "epoch": 18.0,
659
+ "eval_accuracy": 0.8644688644688645,
660
+ "eval_confusion_matrix": [
661
+ [
662
+ 65,
663
+ 2,
664
+ 1,
665
+ 4
666
+ ],
667
+ [
668
+ 13,
669
+ 42,
670
+ 5,
671
+ 0
672
+ ],
673
+ [
674
+ 0,
675
+ 12,
676
+ 64,
677
+ 0
678
+ ],
679
+ [
680
+ 0,
681
+ 0,
682
+ 0,
683
+ 65
684
+ ]
685
+ ],
686
+ "eval_f1": 0.8627769756077204,
687
+ "eval_loss": 0.7563945651054382,
688
+ "eval_precision": 0.8634344261673453,
689
+ "eval_recall": 0.8644688644688645,
690
+ "eval_runtime": 3.0072,
691
+ "eval_samples_per_second": 90.783,
692
+ "eval_steps_per_second": 0.998,
693
+ "step": 81
694
+ },
695
+ {
696
+ "epoch": 18.88888888888889,
697
+ "eval_accuracy": 0.8424908424908425,
698
+ "eval_confusion_matrix": [
699
+ [
700
+ 64,
701
+ 4,
702
+ 0,
703
+ 4
704
+ ],
705
+ [
706
+ 11,
707
+ 41,
708
+ 8,
709
+ 0
710
+ ],
711
+ [
712
+ 0,
713
+ 10,
714
+ 66,
715
+ 0
716
+ ],
717
+ [
718
+ 6,
719
+ 0,
720
+ 0,
721
+ 59
722
+ ]
723
+ ],
724
+ "eval_f1": 0.8418306879608031,
725
+ "eval_loss": 0.7825365662574768,
726
+ "eval_precision": 0.8434907006335578,
727
+ "eval_recall": 0.8424908424908425,
728
+ "eval_runtime": 3.0343,
729
+ "eval_samples_per_second": 89.972,
730
+ "eval_steps_per_second": 0.989,
731
+ "step": 85
732
+ },
733
+ {
734
+ "epoch": 20.0,
735
+ "eval_accuracy": 0.8058608058608059,
736
+ "eval_confusion_matrix": [
737
+ [
738
+ 40,
739
+ 28,
740
+ 1,
741
+ 3
742
+ ],
743
+ [
744
+ 2,
745
+ 50,
746
+ 8,
747
+ 0
748
+ ],
749
+ [
750
+ 0,
751
+ 11,
752
+ 65,
753
+ 0
754
+ ],
755
+ [
756
+ 0,
757
+ 0,
758
+ 0,
759
+ 65
760
+ ]
761
+ ],
762
+ "eval_f1": 0.8065760931078588,
763
+ "eval_loss": 0.8426868915557861,
764
+ "eval_precision": 0.8467707085637404,
765
+ "eval_recall": 0.8058608058608059,
766
+ "eval_runtime": 3.0783,
767
+ "eval_samples_per_second": 88.686,
768
+ "eval_steps_per_second": 0.975,
769
+ "step": 90
770
+ },
771
+ {
772
+ "epoch": 20.88888888888889,
773
+ "eval_accuracy": 0.8498168498168498,
774
+ "eval_confusion_matrix": [
775
+ [
776
+ 63,
777
+ 4,
778
+ 0,
779
+ 5
780
+ ],
781
+ [
782
+ 13,
783
+ 40,
784
+ 7,
785
+ 0
786
+ ],
787
+ [
788
+ 0,
789
+ 12,
790
+ 64,
791
+ 0
792
+ ],
793
+ [
794
+ 0,
795
+ 0,
796
+ 0,
797
+ 65
798
+ ]
799
+ ],
800
+ "eval_f1": 0.8477878057985963,
801
+ "eval_loss": 0.7440442442893982,
802
+ "eval_precision": 0.8476393351433065,
803
+ "eval_recall": 0.8498168498168498,
804
+ "eval_runtime": 2.9875,
805
+ "eval_samples_per_second": 91.38,
806
+ "eval_steps_per_second": 1.004,
807
+ "step": 94
808
+ },
809
+ {
810
+ "epoch": 22.0,
811
+ "eval_accuracy": 0.8608058608058609,
812
+ "eval_confusion_matrix": [
813
+ [
814
+ 66,
815
+ 1,
816
+ 1,
817
+ 4
818
+ ],
819
+ [
820
+ 13,
821
+ 36,
822
+ 11,
823
+ 0
824
+ ],
825
+ [
826
+ 0,
827
+ 5,
828
+ 71,
829
+ 0
830
+ ],
831
+ [
832
+ 3,
833
+ 0,
834
+ 0,
835
+ 62
836
+ ]
837
+ ],
838
+ "eval_f1": 0.855194718990792,
839
+ "eval_loss": 0.7338178753852844,
840
+ "eval_precision": 0.8624631692093176,
841
+ "eval_recall": 0.8608058608058609,
842
+ "eval_runtime": 2.98,
843
+ "eval_samples_per_second": 91.611,
844
+ "eval_steps_per_second": 1.007,
845
+ "step": 99
846
+ },
847
+ {
848
+ "epoch": 22.88888888888889,
849
+ "eval_accuracy": 0.8498168498168498,
850
+ "eval_confusion_matrix": [
851
+ [
852
+ 58,
853
+ 10,
854
+ 0,
855
+ 4
856
+ ],
857
+ [
858
+ 10,
859
+ 43,
860
+ 7,
861
+ 0
862
+ ],
863
+ [
864
+ 0,
865
+ 7,
866
+ 69,
867
+ 0
868
+ ],
869
+ [
870
+ 2,
871
+ 1,
872
+ 0,
873
+ 62
874
+ ]
875
+ ],
876
+ "eval_f1": 0.8497737987724402,
877
+ "eval_loss": 0.7231407761573792,
878
+ "eval_precision": 0.8498638493954653,
879
+ "eval_recall": 0.8498168498168498,
880
+ "eval_runtime": 2.9272,
881
+ "eval_samples_per_second": 93.262,
882
+ "eval_steps_per_second": 1.025,
883
+ "step": 103
884
+ },
885
+ {
886
+ "epoch": 24.0,
887
+ "eval_accuracy": 0.8424908424908425,
888
+ "eval_confusion_matrix": [
889
+ [
890
+ 67,
891
+ 1,
892
+ 0,
893
+ 4
894
+ ],
895
+ [
896
+ 12,
897
+ 44,
898
+ 4,
899
+ 0
900
+ ],
901
+ [
902
+ 0,
903
+ 18,
904
+ 58,
905
+ 0
906
+ ],
907
+ [
908
+ 2,
909
+ 2,
910
+ 0,
911
+ 61
912
+ ]
913
+ ],
914
+ "eval_f1": 0.8431607380967995,
915
+ "eval_loss": 0.752363920211792,
916
+ "eval_precision": 0.8507974885146101,
917
+ "eval_recall": 0.8424908424908425,
918
+ "eval_runtime": 3.0257,
919
+ "eval_samples_per_second": 90.228,
920
+ "eval_steps_per_second": 0.992,
921
+ "step": 108
922
+ },
923
+ {
924
+ "epoch": 24.88888888888889,
925
+ "eval_accuracy": 0.8498168498168498,
926
+ "eval_confusion_matrix": [
927
+ [
928
+ 66,
929
+ 0,
930
+ 1,
931
+ 5
932
+ ],
933
+ [
934
+ 15,
935
+ 28,
936
+ 17,
937
+ 0
938
+ ],
939
+ [
940
+ 0,
941
+ 2,
942
+ 74,
943
+ 0
944
+ ],
945
+ [
946
+ 1,
947
+ 0,
948
+ 0,
949
+ 64
950
+ ]
951
+ ],
952
+ "eval_f1": 0.8354928653436116,
953
+ "eval_loss": 0.7849779725074768,
954
+ "eval_precision": 0.8621667009790022,
955
+ "eval_recall": 0.8498168498168498,
956
+ "eval_runtime": 2.8819,
957
+ "eval_samples_per_second": 94.73,
958
+ "eval_steps_per_second": 1.041,
959
+ "step": 112
960
+ },
961
+ {
962
+ "epoch": 26.0,
963
+ "eval_accuracy": 0.8131868131868132,
964
+ "eval_confusion_matrix": [
965
+ [
966
+ 62,
967
+ 6,
968
+ 0,
969
+ 4
970
+ ],
971
+ [
972
+ 11,
973
+ 44,
974
+ 5,
975
+ 0
976
+ ],
977
+ [
978
+ 0,
979
+ 24,
980
+ 52,
981
+ 0
982
+ ],
983
+ [
984
+ 1,
985
+ 0,
986
+ 0,
987
+ 64
988
+ ]
989
+ ],
990
+ "eval_f1": 0.8151598256855266,
991
+ "eval_loss": 0.7896661162376404,
992
+ "eval_precision": 0.8297062414709473,
993
+ "eval_recall": 0.8131868131868132,
994
+ "eval_runtime": 2.8953,
995
+ "eval_samples_per_second": 94.291,
996
+ "eval_steps_per_second": 1.036,
997
+ "step": 117
998
+ },
999
+ {
1000
+ "epoch": 26.88888888888889,
1001
+ "eval_accuracy": 0.8791208791208791,
1002
+ "eval_confusion_matrix": [
1003
+ [
1004
+ 66,
1005
+ 1,
1006
+ 1,
1007
+ 4
1008
+ ],
1009
+ [
1010
+ 13,
1011
+ 40,
1012
+ 7,
1013
+ 0
1014
+ ],
1015
+ [
1016
+ 1,
1017
+ 6,
1018
+ 69,
1019
+ 0
1020
+ ],
1021
+ [
1022
+ 0,
1023
+ 0,
1024
+ 0,
1025
+ 65
1026
+ ]
1027
+ ],
1028
+ "eval_f1": 0.8754390108936493,
1029
+ "eval_loss": 0.7321063876152039,
1030
+ "eval_precision": 0.878386849630407,
1031
+ "eval_recall": 0.8791208791208791,
1032
+ "eval_runtime": 2.8941,
1033
+ "eval_samples_per_second": 94.329,
1034
+ "eval_steps_per_second": 1.037,
1035
+ "step": 121
1036
+ },
1037
+ {
1038
+ "epoch": 28.0,
1039
+ "eval_accuracy": 0.8608058608058609,
1040
+ "eval_confusion_matrix": [
1041
+ [
1042
+ 66,
1043
+ 2,
1044
+ 0,
1045
+ 4
1046
+ ],
1047
+ [
1048
+ 15,
1049
+ 40,
1050
+ 5,
1051
+ 0
1052
+ ],
1053
+ [
1054
+ 1,
1055
+ 11,
1056
+ 64,
1057
+ 0
1058
+ ],
1059
+ [
1060
+ 0,
1061
+ 0,
1062
+ 0,
1063
+ 65
1064
+ ]
1065
+ ],
1066
+ "eval_f1": 0.8583938406059761,
1067
+ "eval_loss": 0.7691925168037415,
1068
+ "eval_precision": 0.8606552236676889,
1069
+ "eval_recall": 0.8608058608058609,
1070
+ "eval_runtime": 2.9043,
1071
+ "eval_samples_per_second": 93.997,
1072
+ "eval_steps_per_second": 1.033,
1073
+ "step": 126
1074
+ }
1075
+ ],
1076
+ "logging_steps": 500,
1077
+ "max_steps": 180,
1078
+ "num_input_tokens_seen": 0,
1079
+ "num_train_epochs": 45,
1080
+ "save_steps": 500,
1081
+ "stateful_callbacks": {
1082
+ "TrainerControl": {
1083
+ "args": {
1084
+ "should_epoch_stop": false,
1085
+ "should_evaluate": false,
1086
+ "should_log": false,
1087
+ "should_save": true,
1088
+ "should_training_stop": false
1089
+ },
1090
+ "attributes": {}
1091
+ }
1092
+ },
1093
+ "total_flos": 6.94093946112e+16,
1094
+ "train_batch_size": 128,
1095
+ "trial_name": null,
1096
+ "trial_params": null
1097
+ }
checkpoint-126/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e217cad0464b642c54f8b7691100b62a93dceb32a1869f55e3f0eb3a54a79e
3
+ size 5240