AntoineBlanot commited on
Commit
e3f67a1
1 Parent(s): 64e0ff0

Upload 13 files

Browse files
README.md ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: roberta-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ - recall
9
+ - f1
10
+ model-index:
11
+ - name: train
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # train
19
+
20
+ This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 0.6648
23
+ - Accuracy: 0.7617
24
+ - B Acc: 0.6394
25
+ - Prec: 0.7595
26
+ - Recall: 0.7617
27
+ - F1: 0.7602
28
+ - Prec Joy: 0.7315
29
+ - Recall Joy: 0.7793
30
+ - F1 Joy: 0.7547
31
+ - Prec Anger: 0.6467
32
+ - Recall Anger: 0.6507
33
+ - F1 Anger: 0.6487
34
+ - Prec Disgust: 0.4710
35
+ - Recall Disgust: 0.45
36
+ - F1 Disgust: 0.4603
37
+ - Prec Fear: 0.6963
38
+ - Recall Fear: 0.6409
39
+ - F1 Fear: 0.6675
40
+ - Prec Neutral: 0.8457
41
+ - Recall Neutral: 0.8490
42
+ - F1 Neutral: 0.8474
43
+ - Prec Sadness: 0.7094
44
+ - Recall Sadness: 0.6738
45
+ - F1 Sadness: 0.6911
46
+ - Prec Surprise: 0.5228
47
+ - Recall Surprise: 0.4323
48
+ - F1 Surprise: 0.4732
49
+
50
+ ## Model description
51
+
52
+ More information needed
53
+
54
+ ## Intended uses & limitations
55
+
56
+ More information needed
57
+
58
+ ## Training and evaluation data
59
+
60
+ More information needed
61
+
62
+ ## Training procedure
63
+
64
+ ### Training hyperparameters
65
+
66
+ The following hyperparameters were used during training:
67
+ - learning_rate: 5e-05
68
+ - train_batch_size: 128
69
+ - eval_batch_size: 128
70
+ - seed: 42
71
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
72
+ - lr_scheduler_type: linear
73
+ - lr_scheduler_warmup_ratio: 0.1
74
+ - num_epochs: 3.0
75
+
76
+ ### Training results
77
+
78
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | B Acc | Prec | Recall | F1 | Prec Joy | Recall Joy | F1 Joy | Prec Anger | Recall Anger | F1 Anger | Prec Disgust | Recall Disgust | F1 Disgust | Prec Fear | Recall Fear | F1 Fear | Prec Neutral | Recall Neutral | F1 Neutral | Prec Sadness | Recall Sadness | F1 Sadness | Prec Surprise | Recall Surprise | F1 Surprise |
79
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:------:|:------:|:------:|:--------:|:----------:|:------:|:----------:|:------------:|:--------:|:------------:|:--------------:|:----------:|:---------:|:-----------:|:-------:|:------------:|:--------------:|:----------:|:------------:|:--------------:|:----------:|:-------------:|:---------------:|:-----------:|
80
+ | 0.9538 | 0.15 | 232 | 0.8701 | 0.6961 | 0.4790 | 0.6837 | 0.6961 | 0.6837 | 0.7401 | 0.6381 | 0.6853 | 0.4622 | 0.5391 | 0.4977 | 0.25 | 0.0018 | 0.0035 | 0.5527 | 0.4292 | 0.4832 | 0.7965 | 0.8618 | 0.8279 | 0.5281 | 0.6431 | 0.5800 | 0.3562 | 0.2398 | 0.2866 |
81
+ | 0.7952 | 0.3 | 464 | 0.8010 | 0.7168 | 0.5242 | 0.7098 | 0.7168 | 0.7025 | 0.8084 | 0.5948 | 0.6853 | 0.5732 | 0.4710 | 0.5171 | 0.4713 | 0.2643 | 0.3387 | 0.6156 | 0.5263 | 0.5675 | 0.7405 | 0.9250 | 0.8226 | 0.6858 | 0.5676 | 0.6211 | 0.4448 | 0.3204 | 0.3725 |
82
+ | 0.7528 | 0.45 | 696 | 0.7560 | 0.7261 | 0.5878 | 0.7309 | 0.7261 | 0.7256 | 0.6969 | 0.7646 | 0.7292 | 0.5550 | 0.5534 | 0.5542 | 0.3409 | 0.4821 | 0.3994 | 0.7225 | 0.4842 | 0.5798 | 0.8476 | 0.8159 | 0.8314 | 0.6118 | 0.7027 | 0.6541 | 0.4957 | 0.3118 | 0.3828 |
83
+ | 0.7334 | 0.6 | 928 | 0.7310 | 0.7370 | 0.5868 | 0.7345 | 0.7370 | 0.7283 | 0.7170 | 0.7458 | 0.7311 | 0.7129 | 0.4116 | 0.5219 | 0.3727 | 0.5696 | 0.4506 | 0.6671 | 0.5626 | 0.6104 | 0.7898 | 0.8859 | 0.8351 | 0.7318 | 0.5844 | 0.6499 | 0.5252 | 0.3473 | 0.4181 |
84
+ | 0.7216 | 0.75 | 1160 | 0.7043 | 0.7448 | 0.6009 | 0.7403 | 0.7448 | 0.7389 | 0.7767 | 0.6826 | 0.7266 | 0.6159 | 0.5386 | 0.5746 | 0.5302 | 0.4393 | 0.4805 | 0.8023 | 0.5602 | 0.6598 | 0.7854 | 0.8926 | 0.8356 | 0.7005 | 0.632 | 0.6645 | 0.4815 | 0.4613 | 0.4712 |
85
+ | 0.7259 | 0.9 | 1392 | 0.6962 | 0.7475 | 0.6082 | 0.7433 | 0.7475 | 0.7412 | 0.7355 | 0.7586 | 0.7469 | 0.6758 | 0.4504 | 0.5405 | 0.3908 | 0.5589 | 0.4600 | 0.6939 | 0.6070 | 0.6475 | 0.8122 | 0.8744 | 0.8421 | 0.6830 | 0.6676 | 0.6752 | 0.5494 | 0.3409 | 0.4207 |
86
+ | 0.6362 | 1.05 | 1624 | 0.6771 | 0.7526 | 0.6055 | 0.7472 | 0.7526 | 0.7484 | 0.7392 | 0.7483 | 0.7437 | 0.5873 | 0.6191 | 0.6028 | 0.5302 | 0.3768 | 0.4405 | 0.7388 | 0.5789 | 0.6492 | 0.8213 | 0.8670 | 0.8435 | 0.7090 | 0.6507 | 0.6786 | 0.5301 | 0.3978 | 0.4545 |
87
+ | 0.621 | 1.2 | 1856 | 0.6779 | 0.7528 | 0.6120 | 0.7494 | 0.7528 | 0.7487 | 0.7107 | 0.7828 | 0.7450 | 0.6508 | 0.5913 | 0.6196 | 0.4980 | 0.4518 | 0.4738 | 0.7963 | 0.5532 | 0.6529 | 0.8165 | 0.8590 | 0.8372 | 0.7499 | 0.6236 | 0.6809 | 0.5078 | 0.4226 | 0.4613 |
88
+ | 0.6241 | 1.35 | 2088 | 0.6849 | 0.7513 | 0.6367 | 0.7526 | 0.7513 | 0.7514 | 0.7429 | 0.7592 | 0.7510 | 0.5795 | 0.6531 | 0.6141 | 0.4372 | 0.4661 | 0.4512 | 0.6462 | 0.6515 | 0.6488 | 0.8492 | 0.8372 | 0.8432 | 0.6887 | 0.6609 | 0.6745 | 0.5271 | 0.4290 | 0.4730 |
89
+ | 0.6188 | 1.5 | 2320 | 0.6713 | 0.7579 | 0.6159 | 0.7539 | 0.7579 | 0.7534 | 0.7071 | 0.7971 | 0.7494 | 0.6343 | 0.6267 | 0.6305 | 0.5877 | 0.3768 | 0.4592 | 0.7247 | 0.6281 | 0.6729 | 0.8361 | 0.8496 | 0.8428 | 0.6943 | 0.6693 | 0.6816 | 0.5919 | 0.3634 | 0.4504 |
90
+ | 0.6182 | 1.65 | 2552 | 0.6608 | 0.7601 | 0.6199 | 0.7567 | 0.7601 | 0.7566 | 0.7143 | 0.7891 | 0.7498 | 0.6163 | 0.6358 | 0.6259 | 0.5607 | 0.3875 | 0.4583 | 0.7591 | 0.6082 | 0.6753 | 0.8375 | 0.8578 | 0.8475 | 0.7324 | 0.6436 | 0.6851 | 0.5381 | 0.4172 | 0.4700 |
91
+ | 0.6392 | 1.8 | 2784 | 0.6542 | 0.7624 | 0.6261 | 0.7593 | 0.7624 | 0.7596 | 0.7513 | 0.7584 | 0.7548 | 0.5970 | 0.6708 | 0.6318 | 0.5711 | 0.3875 | 0.4617 | 0.7482 | 0.6152 | 0.6752 | 0.8379 | 0.8635 | 0.8505 | 0.7076 | 0.668 | 0.6872 | 0.5132 | 0.4194 | 0.4615 |
92
+ | 0.6158 | 1.95 | 3016 | 0.6456 | 0.7649 | 0.6279 | 0.7599 | 0.7649 | 0.7614 | 0.7490 | 0.7548 | 0.7519 | 0.6402 | 0.6378 | 0.6390 | 0.5314 | 0.4232 | 0.4712 | 0.7569 | 0.6117 | 0.6766 | 0.8310 | 0.8753 | 0.8526 | 0.7199 | 0.6627 | 0.6901 | 0.5063 | 0.4301 | 0.4651 |
93
+ | 0.554 | 2.1 | 3248 | 0.6742 | 0.7584 | 0.6346 | 0.7555 | 0.7584 | 0.7564 | 0.7293 | 0.7732 | 0.7506 | 0.6433 | 0.6430 | 0.6432 | 0.5031 | 0.4393 | 0.4690 | 0.7292 | 0.6363 | 0.6796 | 0.8347 | 0.8496 | 0.8421 | 0.7163 | 0.6587 | 0.6863 | 0.5049 | 0.4419 | 0.4713 |
94
+ | 0.5537 | 2.25 | 3480 | 0.6708 | 0.7633 | 0.6283 | 0.7604 | 0.7633 | 0.7605 | 0.7263 | 0.7801 | 0.7523 | 0.6304 | 0.6612 | 0.6455 | 0.5806 | 0.3732 | 0.4543 | 0.7486 | 0.6094 | 0.6718 | 0.8442 | 0.8528 | 0.8485 | 0.6982 | 0.692 | 0.6951 | 0.5356 | 0.4290 | 0.4764 |
95
+ | 0.5375 | 2.4 | 3712 | 0.6712 | 0.7606 | 0.6402 | 0.7592 | 0.7606 | 0.7595 | 0.7373 | 0.7709 | 0.7537 | 0.6245 | 0.6608 | 0.6421 | 0.4827 | 0.4482 | 0.4648 | 0.7319 | 0.6257 | 0.6747 | 0.8454 | 0.8474 | 0.8464 | 0.7006 | 0.6769 | 0.6885 | 0.5204 | 0.4516 | 0.4836 |
96
+ | 0.5175 | 2.55 | 3944 | 0.6625 | 0.7625 | 0.6369 | 0.7600 | 0.7625 | 0.7604 | 0.7422 | 0.7642 | 0.7530 | 0.6335 | 0.6526 | 0.6429 | 0.4481 | 0.4929 | 0.4694 | 0.7482 | 0.6187 | 0.6773 | 0.8374 | 0.8604 | 0.8488 | 0.7252 | 0.6684 | 0.6957 | 0.5321 | 0.4011 | 0.4574 |
97
+ | 0.5182 | 2.7 | 4176 | 0.6621 | 0.7631 | 0.6404 | 0.7602 | 0.7631 | 0.7612 | 0.7343 | 0.7766 | 0.7549 | 0.6491 | 0.6392 | 0.6441 | 0.4739 | 0.4536 | 0.4635 | 0.6784 | 0.6538 | 0.6659 | 0.8444 | 0.8529 | 0.8486 | 0.7109 | 0.684 | 0.6972 | 0.5458 | 0.4226 | 0.4764 |
98
+ | 0.5148 | 2.85 | 4408 | 0.6638 | 0.7637 | 0.6383 | 0.7598 | 0.7637 | 0.7612 | 0.7394 | 0.7741 | 0.7563 | 0.6741 | 0.6205 | 0.6462 | 0.5 | 0.4375 | 0.4667 | 0.6813 | 0.6550 | 0.6679 | 0.8400 | 0.8572 | 0.8485 | 0.6922 | 0.6916 | 0.6919 | 0.5296 | 0.4323 | 0.4760 |
99
+
100
+
101
+ ### Framework versions
102
+
103
+ - Transformers 4.31.0
104
+ - Pytorch 2.0.1+cu117
105
+ - Datasets 2.14.2
106
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.7617346477344669,
4
+ "eval_b_acc": 0.6394282040393414,
5
+ "eval_f1": 0.7601905030141421,
6
+ "eval_f1_anger": 0.6486744685932649,
7
+ "eval_f1_disgust": 0.46027397260273967,
8
+ "eval_f1_fear": 0.6674786845310596,
9
+ "eval_f1_joy": 0.7546596434359805,
10
+ "eval_f1_neutral": 0.8473718729504358,
11
+ "eval_f1_sadness": 0.6911328926373376,
12
+ "eval_f1_surprise": 0.47321954090641555,
13
+ "eval_loss": 0.6647528409957886,
14
+ "eval_prec": 0.7595317468114701,
15
+ "eval_prec_anger": 0.6466666666666666,
16
+ "eval_prec_disgust": 0.47102803738317756,
17
+ "eval_prec_fear": 0.6963151207115629,
18
+ "eval_prec_joy": 0.7315396700706991,
19
+ "eval_prec_neutral": 0.8457078735739667,
20
+ "eval_prec_sadness": 0.7094057089377632,
21
+ "eval_prec_surprise": 0.5227568270481144,
22
+ "eval_recall": 0.7617346477344669,
23
+ "eval_recall_anger": 0.6506947771921419,
24
+ "eval_recall_disgust": 0.45,
25
+ "eval_recall_fear": 0.6409356725146199,
26
+ "eval_recall_joy": 0.7792887029288703,
27
+ "eval_recall_neutral": 0.8490424333458505,
28
+ "eval_recall_sadness": 0.6737777777777778,
29
+ "eval_recall_surprise": 0.432258064516129,
30
+ "eval_runtime": 14.5728,
31
+ "eval_samples": 22114,
32
+ "eval_samples_per_second": 1517.489,
33
+ "eval_steps_per_second": 11.871,
34
+ "train_loss": 0.6570020180521673,
35
+ "train_runtime": 1377.6596,
36
+ "train_samples": 197800,
37
+ "train_samples_per_second": 430.73,
38
+ "train_steps_per_second": 3.367
39
+ }
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "anger",
15
+ "1": "disgust",
16
+ "2": "fear",
17
+ "3": "joy",
18
+ "4": "neutral",
19
+ "5": "sadness",
20
+ "6": "surprise"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "anger": 0,
26
+ "disgust": 1,
27
+ "fear": 2,
28
+ "joy": 3,
29
+ "neutral": 4,
30
+ "sadness": 5,
31
+ "surprise": 6
32
+ },
33
+ "layer_norm_eps": 1e-05,
34
+ "max_position_embeddings": 514,
35
+ "model_type": "roberta",
36
+ "num_attention_heads": 12,
37
+ "num_hidden_layers": 12,
38
+ "pad_token_id": 1,
39
+ "position_embedding_type": "absolute",
40
+ "problem_type": "single_label_classification",
41
+ "torch_dtype": "float32",
42
+ "transformers_version": "4.31.0",
43
+ "type_vocab_size": 1,
44
+ "use_cache": true,
45
+ "vocab_size": 50265
46
+ }
eval_results.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.7617346477344669,
4
+ "eval_b_acc": 0.6394282040393414,
5
+ "eval_f1": 0.7601905030141421,
6
+ "eval_f1_anger": 0.6486744685932649,
7
+ "eval_f1_disgust": 0.46027397260273967,
8
+ "eval_f1_fear": 0.6674786845310596,
9
+ "eval_f1_joy": 0.7546596434359805,
10
+ "eval_f1_neutral": 0.8473718729504358,
11
+ "eval_f1_sadness": 0.6911328926373376,
12
+ "eval_f1_surprise": 0.47321954090641555,
13
+ "eval_loss": 0.6647528409957886,
14
+ "eval_prec": 0.7595317468114701,
15
+ "eval_prec_anger": 0.6466666666666666,
16
+ "eval_prec_disgust": 0.47102803738317756,
17
+ "eval_prec_fear": 0.6963151207115629,
18
+ "eval_prec_joy": 0.7315396700706991,
19
+ "eval_prec_neutral": 0.8457078735739667,
20
+ "eval_prec_sadness": 0.7094057089377632,
21
+ "eval_prec_surprise": 0.5227568270481144,
22
+ "eval_recall": 0.7617346477344669,
23
+ "eval_recall_anger": 0.6506947771921419,
24
+ "eval_recall_disgust": 0.45,
25
+ "eval_recall_fear": 0.6409356725146199,
26
+ "eval_recall_joy": 0.7792887029288703,
27
+ "eval_recall_neutral": 0.8490424333458505,
28
+ "eval_recall_sadness": 0.6737777777777778,
29
+ "eval_recall_surprise": 0.432258064516129,
30
+ "eval_runtime": 14.5728,
31
+ "eval_samples": 22114,
32
+ "eval_samples_per_second": 1517.489,
33
+ "eval_steps_per_second": 11.871
34
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52ff68740dbd51618e05d8e926e72bd32831e0cc00fa4d8693a1c4b6ccd39cc0
3
+ size 498673009
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": true,
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 512,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "tokenizer_class": "RobertaTokenizer",
13
+ "trim_offsets": true,
14
+ "unk_token": "<unk>"
15
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.6570020180521673,
4
+ "train_runtime": 1377.6596,
5
+ "train_samples": 197800,
6
+ "train_samples_per_second": 430.73,
7
+ "train_steps_per_second": 3.367
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 4638,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 5.064655172413793e-06,
13
+ "loss": 1.827,
14
+ "step": 47
15
+ },
16
+ {
17
+ "epoch": 0.06,
18
+ "learning_rate": 1.0129310344827586e-05,
19
+ "loss": 1.288,
20
+ "step": 94
21
+ },
22
+ {
23
+ "epoch": 0.09,
24
+ "learning_rate": 1.5086206896551724e-05,
25
+ "loss": 1.0612,
26
+ "step": 141
27
+ },
28
+ {
29
+ "epoch": 0.12,
30
+ "learning_rate": 2.0150862068965517e-05,
31
+ "loss": 0.9538,
32
+ "step": 188
33
+ },
34
+ {
35
+ "epoch": 0.15,
36
+ "eval_accuracy": 0.6960748846884327,
37
+ "eval_b_acc": 0.4789797300032158,
38
+ "eval_f1": 0.683716357682452,
39
+ "eval_f1_anger": 0.497677504976775,
40
+ "eval_f1_disgust": 0.0035460992907801418,
41
+ "eval_f1_fear": 0.4832126398946675,
42
+ "eval_f1_joy": 0.685316256600382,
43
+ "eval_f1_neutral": 0.827884745457005,
44
+ "eval_f1_sadness": 0.5799599198396794,
45
+ "eval_f1_surprise": 0.2866323907455013,
46
+ "eval_loss": 0.8701004981994629,
47
+ "eval_prec": 0.6836877535141815,
48
+ "eval_prec_anger": 0.462202136400986,
49
+ "eval_prec_disgust": 0.25,
50
+ "eval_prec_fear": 0.552710843373494,
51
+ "eval_prec_joy": 0.7401116233923805,
52
+ "eval_prec_neutral": 0.796529284164859,
53
+ "eval_prec_sadness": 0.5281021897810219,
54
+ "eval_prec_surprise": 0.3562300319488818,
55
+ "eval_recall": 0.6960748846884327,
56
+ "eval_recall_anger": 0.5390512697652132,
57
+ "eval_recall_disgust": 0.0017857142857142857,
58
+ "eval_recall_fear": 0.42923976608187137,
59
+ "eval_recall_joy": 0.6380753138075314,
60
+ "eval_recall_neutral": 0.86180998873451,
61
+ "eval_recall_sadness": 0.6431111111111111,
62
+ "eval_recall_surprise": 0.23978494623655913,
63
+ "eval_runtime": 16.0297,
64
+ "eval_samples_per_second": 1379.566,
65
+ "eval_steps_per_second": 10.792,
66
+ "step": 232
67
+ },
68
+ {
69
+ "epoch": 0.15,
70
+ "learning_rate": 2.521551724137931e-05,
71
+ "loss": 0.8837,
72
+ "step": 235
73
+ },
74
+ {
75
+ "epoch": 0.18,
76
+ "learning_rate": 3.0280172413793107e-05,
77
+ "loss": 0.881,
78
+ "step": 282
79
+ },
80
+ {
81
+ "epoch": 0.21,
82
+ "learning_rate": 3.53448275862069e-05,
83
+ "loss": 0.8462,
84
+ "step": 329
85
+ },
86
+ {
87
+ "epoch": 0.24,
88
+ "learning_rate": 4.0409482758620696e-05,
89
+ "loss": 0.8242,
90
+ "step": 376
91
+ },
92
+ {
93
+ "epoch": 0.27,
94
+ "learning_rate": 4.5474137931034485e-05,
95
+ "loss": 0.7952,
96
+ "step": 423
97
+ },
98
+ {
99
+ "epoch": 0.3,
100
+ "eval_accuracy": 0.7168309668083567,
101
+ "eval_b_acc": 0.5241940962877409,
102
+ "eval_f1": 0.7025228198992769,
103
+ "eval_f1_anger": 0.5170962651236192,
104
+ "eval_f1_disgust": 0.3386727688787185,
105
+ "eval_f1_fear": 0.5674653215636821,
106
+ "eval_f1_joy": 0.6853079426298663,
107
+ "eval_f1_neutral": 0.8225570814375757,
108
+ "eval_f1_sadness": 0.6211089494163424,
109
+ "eval_f1_surprise": 0.37250000000000005,
110
+ "eval_loss": 0.8009697198867798,
111
+ "eval_prec": 0.7097552640353452,
112
+ "eval_prec_anger": 0.5731778425655977,
113
+ "eval_prec_disgust": 0.4713375796178344,
114
+ "eval_prec_fear": 0.615595075239398,
115
+ "eval_prec_joy": 0.8083593972135342,
116
+ "eval_prec_neutral": 0.7405486659150695,
117
+ "eval_prec_sadness": 0.6858216970998926,
118
+ "eval_prec_surprise": 0.44477611940298506,
119
+ "eval_recall": 0.7168309668083567,
120
+ "eval_recall_anger": 0.4710110206037374,
121
+ "eval_recall_disgust": 0.2642857142857143,
122
+ "eval_recall_fear": 0.5263157894736842,
123
+ "eval_recall_joy": 0.5947698744769875,
124
+ "eval_recall_neutral": 0.924990612091626,
125
+ "eval_recall_sadness": 0.5675555555555556,
126
+ "eval_recall_surprise": 0.3204301075268817,
127
+ "eval_runtime": 15.1427,
128
+ "eval_samples_per_second": 1460.377,
129
+ "eval_steps_per_second": 11.425,
130
+ "step": 464
131
+ },
132
+ {
133
+ "epoch": 0.3,
134
+ "learning_rate": 4.9940105414470535e-05,
135
+ "loss": 0.7998,
136
+ "step": 470
137
+ },
138
+ {
139
+ "epoch": 0.33,
140
+ "learning_rate": 4.938907522759943e-05,
141
+ "loss": 0.7996,
142
+ "step": 517
143
+ },
144
+ {
145
+ "epoch": 0.36,
146
+ "learning_rate": 4.882606612362243e-05,
147
+ "loss": 0.776,
148
+ "step": 564
149
+ },
150
+ {
151
+ "epoch": 0.4,
152
+ "learning_rate": 4.826305701964542e-05,
153
+ "loss": 0.8018,
154
+ "step": 611
155
+ },
156
+ {
157
+ "epoch": 0.43,
158
+ "learning_rate": 4.770004791566843e-05,
159
+ "loss": 0.7528,
160
+ "step": 658
161
+ },
162
+ {
163
+ "epoch": 0.45,
164
+ "eval_accuracy": 0.7260558921949896,
165
+ "eval_b_acc": 0.5878316350938373,
166
+ "eval_f1": 0.7255951803151626,
167
+ "eval_f1_anger": 0.5542226487523992,
168
+ "eval_f1_disgust": 0.3994082840236686,
169
+ "eval_f1_fear": 0.5798319327731093,
170
+ "eval_f1_joy": 0.7291770573566084,
171
+ "eval_f1_neutral": 0.8314359514015114,
172
+ "eval_f1_sadness": 0.6541166735622672,
173
+ "eval_f1_surprise": 0.38283828382838286,
174
+ "eval_loss": 0.7560049891471863,
175
+ "eval_prec": 0.7309376250219672,
176
+ "eval_prec_anger": 0.5550216242191254,
177
+ "eval_prec_disgust": 0.3409090909090909,
178
+ "eval_prec_fear": 0.7225130890052356,
179
+ "eval_prec_joy": 0.6968541468064824,
180
+ "eval_prec_neutral": 0.847571679344646,
181
+ "eval_prec_sadness": 0.6118421052631579,
182
+ "eval_prec_surprise": 0.49572649572649574,
183
+ "eval_recall": 0.7260558921949896,
184
+ "eval_recall_anger": 0.5534259702922856,
185
+ "eval_recall_disgust": 0.48214285714285715,
186
+ "eval_recall_fear": 0.4842105263157895,
187
+ "eval_recall_joy": 0.7646443514644351,
188
+ "eval_recall_neutral": 0.8159031167855801,
189
+ "eval_recall_sadness": 0.7026666666666667,
190
+ "eval_recall_surprise": 0.3118279569892473,
191
+ "eval_runtime": 14.7708,
192
+ "eval_samples_per_second": 1497.143,
193
+ "eval_steps_per_second": 11.712,
194
+ "step": 696
195
+ },
196
+ {
197
+ "epoch": 0.46,
198
+ "learning_rate": 4.713703881169142e-05,
199
+ "loss": 0.7459,
200
+ "step": 705
201
+ },
202
+ {
203
+ "epoch": 0.49,
204
+ "learning_rate": 4.657402970771442e-05,
205
+ "loss": 0.7599,
206
+ "step": 752
207
+ },
208
+ {
209
+ "epoch": 0.52,
210
+ "learning_rate": 4.601102060373742e-05,
211
+ "loss": 0.7648,
212
+ "step": 799
213
+ },
214
+ {
215
+ "epoch": 0.55,
216
+ "learning_rate": 4.544801149976042e-05,
217
+ "loss": 0.7436,
218
+ "step": 846
219
+ },
220
+ {
221
+ "epoch": 0.58,
222
+ "learning_rate": 4.488500239578342e-05,
223
+ "loss": 0.7334,
224
+ "step": 893
225
+ },
226
+ {
227
+ "epoch": 0.6,
228
+ "eval_accuracy": 0.7370444062584788,
229
+ "eval_b_acc": 0.5867600905079025,
230
+ "eval_f1": 0.7282740654207969,
231
+ "eval_f1_anger": 0.5218712029161603,
232
+ "eval_f1_disgust": 0.4505649717514124,
233
+ "eval_f1_fear": 0.6104060913705585,
234
+ "eval_f1_joy": 0.7311320754716981,
235
+ "eval_f1_neutral": 0.8351327433628318,
236
+ "eval_f1_sadness": 0.6498640968618731,
237
+ "eval_f1_surprise": 0.4181229773462783,
238
+ "eval_loss": 0.7309762835502625,
239
+ "eval_prec": 0.7344879537976949,
240
+ "eval_prec_anger": 0.7128630705394191,
241
+ "eval_prec_disgust": 0.3726635514018692,
242
+ "eval_prec_fear": 0.6671289875173371,
243
+ "eval_prec_joy": 0.7170152855993563,
244
+ "eval_prec_neutral": 0.7898393036491463,
245
+ "eval_prec_sadness": 0.7317751808569839,
246
+ "eval_prec_surprise": 0.5252032520325203,
247
+ "eval_recall": 0.7370444062584788,
248
+ "eval_recall_anger": 0.411595591758505,
249
+ "eval_recall_disgust": 0.5696428571428571,
250
+ "eval_recall_fear": 0.5625730994152047,
251
+ "eval_recall_joy": 0.74581589958159,
252
+ "eval_recall_neutral": 0.8859369132557267,
253
+ "eval_recall_sadness": 0.5844444444444444,
254
+ "eval_recall_surprise": 0.3473118279569892,
255
+ "eval_runtime": 14.5647,
256
+ "eval_samples_per_second": 1518.332,
257
+ "eval_steps_per_second": 11.878,
258
+ "step": 928
259
+ },
260
+ {
261
+ "epoch": 0.61,
262
+ "learning_rate": 4.432199329180642e-05,
263
+ "loss": 0.7493,
264
+ "step": 940
265
+ },
266
+ {
267
+ "epoch": 0.64,
268
+ "learning_rate": 4.375898418782942e-05,
269
+ "loss": 0.7194,
270
+ "step": 987
271
+ },
272
+ {
273
+ "epoch": 0.67,
274
+ "learning_rate": 4.319597508385242e-05,
275
+ "loss": 0.7158,
276
+ "step": 1034
277
+ },
278
+ {
279
+ "epoch": 0.7,
280
+ "learning_rate": 4.263296597987542e-05,
281
+ "loss": 0.719,
282
+ "step": 1081
283
+ },
284
+ {
285
+ "epoch": 0.73,
286
+ "learning_rate": 4.206995687589842e-05,
287
+ "loss": 0.7216,
288
+ "step": 1128
289
+ },
290
+ {
291
+ "epoch": 0.75,
292
+ "eval_accuracy": 0.7448222845256399,
293
+ "eval_b_acc": 0.6009457685058381,
294
+ "eval_f1": 0.7388819166581033,
295
+ "eval_f1_anger": 0.5746421267893661,
296
+ "eval_f1_disgust": 0.48046875,
297
+ "eval_f1_fear": 0.6597796143250689,
298
+ "eval_f1_joy": 0.7266451397394499,
299
+ "eval_f1_neutral": 0.8355743035416118,
300
+ "eval_f1_sadness": 0.6644859813084112,
301
+ "eval_f1_surprise": 0.4711696869851729,
302
+ "eval_loss": 0.7043296694755554,
303
+ "eval_prec": 0.7402960337790491,
304
+ "eval_prec_anger": 0.6158904109589041,
305
+ "eval_prec_disgust": 0.5301724137931034,
306
+ "eval_prec_fear": 0.8023450586264657,
307
+ "eval_prec_joy": 0.7767198286122352,
308
+ "eval_prec_neutral": 0.7853956715678176,
309
+ "eval_prec_sadness": 0.7004926108374384,
310
+ "eval_prec_surprise": 0.48148148148148145,
311
+ "eval_recall": 0.7448222845256399,
312
+ "eval_recall_anger": 0.5385721130809775,
313
+ "eval_recall_disgust": 0.4392857142857143,
314
+ "eval_recall_fear": 0.560233918128655,
315
+ "eval_recall_joy": 0.6826359832635983,
316
+ "eval_recall_neutral": 0.8926023282012767,
317
+ "eval_recall_sadness": 0.632,
318
+ "eval_recall_surprise": 0.4612903225806452,
319
+ "eval_runtime": 14.6801,
320
+ "eval_samples_per_second": 1506.393,
321
+ "eval_steps_per_second": 11.785,
322
+ "step": 1160
323
+ },
324
+ {
325
+ "epoch": 0.76,
326
+ "learning_rate": 4.150694777192142e-05,
327
+ "loss": 0.7134,
328
+ "step": 1175
329
+ },
330
+ {
331
+ "epoch": 0.79,
332
+ "learning_rate": 4.094393866794442e-05,
333
+ "loss": 0.7029,
334
+ "step": 1222
335
+ },
336
+ {
337
+ "epoch": 0.82,
338
+ "learning_rate": 4.0380929563967415e-05,
339
+ "loss": 0.6945,
340
+ "step": 1269
341
+ },
342
+ {
343
+ "epoch": 0.85,
344
+ "learning_rate": 3.981792045999042e-05,
345
+ "loss": 0.7033,
346
+ "step": 1316
347
+ },
348
+ {
349
+ "epoch": 0.88,
350
+ "learning_rate": 3.9254911356013415e-05,
351
+ "loss": 0.7259,
352
+ "step": 1363
353
+ },
354
+ {
355
+ "epoch": 0.9,
356
+ "eval_accuracy": 0.7475354978746496,
357
+ "eval_b_acc": 0.6082480515560814,
358
+ "eval_f1": 0.741168197686153,
359
+ "eval_f1_anger": 0.5405405405405406,
360
+ "eval_f1_disgust": 0.4599559147685525,
361
+ "eval_f1_fear": 0.6475358702432937,
362
+ "eval_f1_joy": 0.7468589083419156,
363
+ "eval_f1_neutral": 0.8421338155515371,
364
+ "eval_f1_sadness": 0.6752079118903124,
365
+ "eval_f1_surprise": 0.42070338420703385,
366
+ "eval_loss": 0.6962281465530396,
367
+ "eval_prec": 0.7432901970674257,
368
+ "eval_prec_anger": 0.6757728253055356,
369
+ "eval_prec_disgust": 0.39076154806491886,
370
+ "eval_prec_fear": 0.6938502673796791,
371
+ "eval_prec_joy": 0.7354969574036511,
372
+ "eval_prec_neutral": 0.8121730031391698,
373
+ "eval_prec_sadness": 0.683037744429286,
374
+ "eval_prec_surprise": 0.5493934142114385,
375
+ "eval_recall": 0.7475354978746496,
376
+ "eval_recall_anger": 0.4504072831816004,
377
+ "eval_recall_disgust": 0.5589285714285714,
378
+ "eval_recall_fear": 0.6070175438596491,
379
+ "eval_recall_joy": 0.7585774058577406,
380
+ "eval_recall_neutral": 0.8743897859556891,
381
+ "eval_recall_sadness": 0.6675555555555556,
382
+ "eval_recall_surprise": 0.34086021505376346,
383
+ "eval_runtime": 14.6912,
384
+ "eval_samples_per_second": 1505.258,
385
+ "eval_steps_per_second": 11.776,
386
+ "step": 1392
387
+ },
388
+ {
389
+ "epoch": 0.91,
390
+ "learning_rate": 3.8691902252036415e-05,
391
+ "loss": 0.7143,
392
+ "step": 1410
393
+ },
394
+ {
395
+ "epoch": 0.94,
396
+ "learning_rate": 3.812889314805942e-05,
397
+ "loss": 0.6903,
398
+ "step": 1457
399
+ },
400
+ {
401
+ "epoch": 0.97,
402
+ "learning_rate": 3.7565884044082415e-05,
403
+ "loss": 0.6812,
404
+ "step": 1504
405
+ },
406
+ {
407
+ "epoch": 1.0,
408
+ "learning_rate": 3.7002874940105415e-05,
409
+ "loss": 0.7182,
410
+ "step": 1551
411
+ },
412
+ {
413
+ "epoch": 1.03,
414
+ "learning_rate": 3.6439865836128415e-05,
415
+ "loss": 0.6362,
416
+ "step": 1598
417
+ },
418
+ {
419
+ "epoch": 1.05,
420
+ "eval_accuracy": 0.7526453830152844,
421
+ "eval_b_acc": 0.6055170494206373,
422
+ "eval_f1": 0.7483739984911967,
423
+ "eval_f1_anger": 0.602752507581059,
424
+ "eval_f1_disgust": 0.4405010438413361,
425
+ "eval_f1_fear": 0.6491803278688525,
426
+ "eval_f1_joy": 0.7437363551304709,
427
+ "eval_f1_neutral": 0.8435330654000731,
428
+ "eval_f1_sadness": 0.6785631517960602,
429
+ "eval_f1_surprise": 0.45454545454545453,
430
+ "eval_loss": 0.6771188974380493,
431
+ "eval_prec": 0.7472415916170276,
432
+ "eval_prec_anger": 0.5872727272727273,
433
+ "eval_prec_disgust": 0.5301507537688442,
434
+ "eval_prec_fear": 0.7388059701492538,
435
+ "eval_prec_joy": 0.739202314527795,
436
+ "eval_prec_neutral": 0.8213269299181786,
437
+ "eval_prec_sadness": 0.7089588377723971,
438
+ "eval_prec_surprise": 0.5300859598853869,
439
+ "eval_recall": 0.7526453830152844,
440
+ "eval_recall_anger": 0.6190704360325826,
441
+ "eval_recall_disgust": 0.3767857142857143,
442
+ "eval_recall_fear": 0.5789473684210527,
443
+ "eval_recall_joy": 0.748326359832636,
444
+ "eval_recall_neutral": 0.8669733383402178,
445
+ "eval_recall_sadness": 0.6506666666666666,
446
+ "eval_recall_surprise": 0.3978494623655914,
447
+ "eval_runtime": 14.8683,
448
+ "eval_samples_per_second": 1487.33,
449
+ "eval_steps_per_second": 11.636,
450
+ "step": 1624
451
+ },
452
+ {
453
+ "epoch": 1.06,
454
+ "learning_rate": 3.5876856732151415e-05,
455
+ "loss": 0.6432,
456
+ "step": 1645
457
+ },
458
+ {
459
+ "epoch": 1.09,
460
+ "learning_rate": 3.5313847628174415e-05,
461
+ "loss": 0.652,
462
+ "step": 1692
463
+ },
464
+ {
465
+ "epoch": 1.12,
466
+ "learning_rate": 3.4750838524197415e-05,
467
+ "loss": 0.6474,
468
+ "step": 1739
469
+ },
470
+ {
471
+ "epoch": 1.16,
472
+ "learning_rate": 3.4187829420220415e-05,
473
+ "loss": 0.6364,
474
+ "step": 1786
475
+ },
476
+ {
477
+ "epoch": 1.19,
478
+ "learning_rate": 3.362482031624341e-05,
479
+ "loss": 0.621,
480
+ "step": 1833
481
+ },
482
+ {
483
+ "epoch": 1.2,
484
+ "eval_accuracy": 0.7528262639052185,
485
+ "eval_b_acc": 0.6120366345893286,
486
+ "eval_f1": 0.7487165396015127,
487
+ "eval_f1_anger": 0.6196334421290485,
488
+ "eval_f1_disgust": 0.47378277153558057,
489
+ "eval_f1_fear": 0.6528640441683919,
490
+ "eval_f1_joy": 0.7450472872075661,
491
+ "eval_f1_neutral": 0.8372220697227559,
492
+ "eval_f1_sadness": 0.6809026935209901,
493
+ "eval_f1_surprise": 0.4612676056338028,
494
+ "eval_loss": 0.67792809009552,
495
+ "eval_prec": 0.749407175516463,
496
+ "eval_prec_anger": 0.6508438818565401,
497
+ "eval_prec_disgust": 0.49803149606299213,
498
+ "eval_prec_fear": 0.7962962962962963,
499
+ "eval_prec_joy": 0.7107312440645774,
500
+ "eval_prec_neutral": 0.8165268606103873,
501
+ "eval_prec_sadness": 0.7498663816141101,
502
+ "eval_prec_surprise": 0.5077519379844961,
503
+ "eval_recall": 0.7528262639052185,
504
+ "eval_recall_anger": 0.5912793483469094,
505
+ "eval_recall_disgust": 0.4517857142857143,
506
+ "eval_recall_fear": 0.5532163742690058,
507
+ "eval_recall_joy": 0.7828451882845189,
508
+ "eval_recall_neutral": 0.8589936162223056,
509
+ "eval_recall_sadness": 0.6235555555555555,
510
+ "eval_recall_surprise": 0.42258064516129035,
511
+ "eval_runtime": 14.7115,
512
+ "eval_samples_per_second": 1503.179,
513
+ "eval_steps_per_second": 11.76,
514
+ "step": 1856
515
+ },
516
+ {
517
+ "epoch": 1.22,
518
+ "learning_rate": 3.3061811212266415e-05,
519
+ "loss": 0.6195,
520
+ "step": 1880
521
+ },
522
+ {
523
+ "epoch": 1.25,
524
+ "learning_rate": 3.249880210828941e-05,
525
+ "loss": 0.6332,
526
+ "step": 1927
527
+ },
528
+ {
529
+ "epoch": 1.28,
530
+ "learning_rate": 3.1935793004312415e-05,
531
+ "loss": 0.6251,
532
+ "step": 1974
533
+ },
534
+ {
535
+ "epoch": 1.31,
536
+ "learning_rate": 3.1372783900335415e-05,
537
+ "loss": 0.63,
538
+ "step": 2021
539
+ },
540
+ {
541
+ "epoch": 1.34,
542
+ "learning_rate": 3.080977479635841e-05,
543
+ "loss": 0.6241,
544
+ "step": 2068
545
+ },
546
+ {
547
+ "epoch": 1.35,
548
+ "eval_accuracy": 0.7512887763407796,
549
+ "eval_b_acc": 0.6367091163081916,
550
+ "eval_f1": 0.7514440246825209,
551
+ "eval_f1_anger": 0.6141022752872269,
552
+ "eval_f1_disgust": 0.4511668107173725,
553
+ "eval_f1_fear": 0.6488060570762958,
554
+ "eval_f1_joy": 0.7509570615623384,
555
+ "eval_f1_neutral": 0.843150231634679,
556
+ "eval_f1_sadness": 0.6745293717396236,
557
+ "eval_f1_surprise": 0.4730290456431535,
558
+ "eval_loss": 0.6849319338798523,
559
+ "eval_prec": 0.7525985800729426,
560
+ "eval_prec_anger": 0.5795068027210885,
561
+ "eval_prec_disgust": 0.4371859296482412,
562
+ "eval_prec_fear": 0.6461716937354989,
563
+ "eval_prec_joy": 0.7428863868986694,
564
+ "eval_prec_neutral": 0.849171586364502,
565
+ "eval_prec_sadness": 0.6887447892542844,
566
+ "eval_prec_surprise": 0.5270805812417437,
567
+ "eval_recall": 0.7512887763407796,
568
+ "eval_recall_anger": 0.6530905606133206,
569
+ "eval_recall_disgust": 0.4660714285714286,
570
+ "eval_recall_fear": 0.6514619883040935,
571
+ "eval_recall_joy": 0.7592050209205021,
572
+ "eval_recall_neutral": 0.8372136687945926,
573
+ "eval_recall_sadness": 0.6608888888888889,
574
+ "eval_recall_surprise": 0.4290322580645161,
575
+ "eval_runtime": 14.591,
576
+ "eval_samples_per_second": 1515.587,
577
+ "eval_steps_per_second": 11.857,
578
+ "step": 2088
579
+ },
580
+ {
581
+ "epoch": 1.37,
582
+ "learning_rate": 3.024676569238141e-05,
583
+ "loss": 0.5997,
584
+ "step": 2115
585
+ },
586
+ {
587
+ "epoch": 1.4,
588
+ "learning_rate": 2.968375658840441e-05,
589
+ "loss": 0.6348,
590
+ "step": 2162
591
+ },
592
+ {
593
+ "epoch": 1.43,
594
+ "learning_rate": 2.9120747484427408e-05,
595
+ "loss": 0.6269,
596
+ "step": 2209
597
+ },
598
+ {
599
+ "epoch": 1.46,
600
+ "learning_rate": 2.855773838045041e-05,
601
+ "loss": 0.6411,
602
+ "step": 2256
603
+ },
604
+ {
605
+ "epoch": 1.49,
606
+ "learning_rate": 2.7994729276473408e-05,
607
+ "loss": 0.6188,
608
+ "step": 2303
609
+ },
610
+ {
611
+ "epoch": 1.5,
612
+ "eval_accuracy": 0.7578909288233698,
613
+ "eval_b_acc": 0.6158634091154939,
614
+ "eval_f1": 0.7533929534836733,
615
+ "eval_f1_anger": 0.6305133767172812,
616
+ "eval_f1_disgust": 0.45919477693144717,
617
+ "eval_f1_fear": 0.6729323308270676,
618
+ "eval_f1_joy": 0.7494099134539732,
619
+ "eval_f1_neutral": 0.84280126653008,
620
+ "eval_f1_sadness": 0.681602172437203,
621
+ "eval_f1_surprise": 0.4503664223850767,
622
+ "eval_loss": 0.6713131070137024,
623
+ "eval_prec": 0.7538941653702528,
624
+ "eval_prec_anger": 0.6343355965082444,
625
+ "eval_prec_disgust": 0.5877437325905293,
626
+ "eval_prec_fear": 0.7246963562753036,
627
+ "eval_prec_joy": 0.7071269487750557,
628
+ "eval_prec_neutral": 0.8361049519586105,
629
+ "eval_prec_sadness": 0.69432918395574,
630
+ "eval_prec_surprise": 0.5919439579684763,
631
+ "eval_recall": 0.7578909288233698,
632
+ "eval_recall_anger": 0.6267369429803545,
633
+ "eval_recall_disgust": 0.3767857142857143,
634
+ "eval_recall_fear": 0.6280701754385964,
635
+ "eval_recall_joy": 0.797071129707113,
636
+ "eval_recall_neutral": 0.8496057078482914,
637
+ "eval_recall_sadness": 0.6693333333333333,
638
+ "eval_recall_surprise": 0.3634408602150538,
639
+ "eval_runtime": 14.588,
640
+ "eval_samples_per_second": 1515.9,
641
+ "eval_steps_per_second": 11.859,
642
+ "step": 2320
643
+ },
644
+ {
645
+ "epoch": 1.52,
646
+ "learning_rate": 2.7431720172496405e-05,
647
+ "loss": 0.622,
648
+ "step": 2350
649
+ },
650
+ {
651
+ "epoch": 1.55,
652
+ "learning_rate": 2.6868711068519408e-05,
653
+ "loss": 0.6278,
654
+ "step": 2397
655
+ },
656
+ {
657
+ "epoch": 1.58,
658
+ "learning_rate": 2.6305701964542405e-05,
659
+ "loss": 0.6227,
660
+ "step": 2444
661
+ },
662
+ {
663
+ "epoch": 1.61,
664
+ "learning_rate": 2.5742692860565405e-05,
665
+ "loss": 0.6337,
666
+ "step": 2491
667
+ },
668
+ {
669
+ "epoch": 1.64,
670
+ "learning_rate": 2.5179683756588408e-05,
671
+ "loss": 0.6182,
672
+ "step": 2538
673
+ },
674
+ {
675
+ "epoch": 1.65,
676
+ "eval_accuracy": 0.760106719725061,
677
+ "eval_b_acc": 0.6198832054514857,
678
+ "eval_f1": 0.7565733944354605,
679
+ "eval_f1_anger": 0.625943396226415,
680
+ "eval_f1_disgust": 0.4582893347412883,
681
+ "eval_f1_fear": 0.6753246753246753,
682
+ "eval_f1_joy": 0.7498260610277309,
683
+ "eval_f1_neutral": 0.8475095074668398,
684
+ "eval_f1_sadness": 0.6851194700733381,
685
+ "eval_f1_surprise": 0.4700181708055723,
686
+ "eval_loss": 0.6608244180679321,
687
+ "eval_prec": 0.7566647637236428,
688
+ "eval_prec_anger": 0.6163492800743149,
689
+ "eval_prec_disgust": 0.5607235142118863,
690
+ "eval_prec_fear": 0.7591240875912408,
691
+ "eval_prec_joy": 0.7142586631319826,
692
+ "eval_prec_neutral": 0.8374885426214482,
693
+ "eval_prec_sadness": 0.7324228629236217,
694
+ "eval_prec_surprise": 0.5381414701803051,
695
+ "eval_recall": 0.760106719725061,
696
+ "eval_recall_anger": 0.6358409199808337,
697
+ "eval_recall_disgust": 0.3875,
698
+ "eval_recall_fear": 0.6081871345029239,
699
+ "eval_recall_joy": 0.7891213389121339,
700
+ "eval_recall_neutral": 0.8577731881336839,
701
+ "eval_recall_sadness": 0.6435555555555555,
702
+ "eval_recall_surprise": 0.4172043010752688,
703
+ "eval_runtime": 14.5162,
704
+ "eval_samples_per_second": 1523.406,
705
+ "eval_steps_per_second": 11.918,
706
+ "step": 2552
707
+ },
708
+ {
709
+ "epoch": 1.67,
710
+ "learning_rate": 2.4616674652611405e-05,
711
+ "loss": 0.6078,
712
+ "step": 2585
713
+ },
714
+ {
715
+ "epoch": 1.7,
716
+ "learning_rate": 2.4053665548634405e-05,
717
+ "loss": 0.6167,
718
+ "step": 2632
719
+ },
720
+ {
721
+ "epoch": 1.73,
722
+ "learning_rate": 2.3490656444657405e-05,
723
+ "loss": 0.6329,
724
+ "step": 2679
725
+ },
726
+ {
727
+ "epoch": 1.76,
728
+ "learning_rate": 2.29396262577863e-05,
729
+ "loss": 0.6153,
730
+ "step": 2726
731
+ },
732
+ {
733
+ "epoch": 1.79,
734
+ "learning_rate": 2.2376617153809296e-05,
735
+ "loss": 0.6392,
736
+ "step": 2773
737
+ },
738
+ {
739
+ "epoch": 1.8,
740
+ "eval_accuracy": 0.7623677308492358,
741
+ "eval_b_acc": 0.626106698297278,
742
+ "eval_f1": 0.7595959808260688,
743
+ "eval_f1_anger": 0.6317689530685919,
744
+ "eval_f1_disgust": 0.46170212765957447,
745
+ "eval_f1_fear": 0.6752246469833119,
746
+ "eval_f1_joy": 0.7548152004164499,
747
+ "eval_f1_neutral": 0.8505247584261871,
748
+ "eval_f1_sadness": 0.6872427983539094,
749
+ "eval_f1_surprise": 0.4615384615384616,
750
+ "eval_loss": 0.6541842818260193,
751
+ "eval_prec": 0.7593266897219236,
752
+ "eval_prec_anger": 0.5970149253731343,
753
+ "eval_prec_disgust": 0.5710526315789474,
754
+ "eval_prec_fear": 0.748221906116643,
755
+ "eval_prec_joy": 0.7512953367875648,
756
+ "eval_prec_neutral": 0.8379338617108499,
757
+ "eval_prec_sadness": 0.7076271186440678,
758
+ "eval_prec_surprise": 0.5131578947368421,
759
+ "eval_recall": 0.7623677308492358,
760
+ "eval_recall_anger": 0.6708193579300431,
761
+ "eval_recall_disgust": 0.3875,
762
+ "eval_recall_fear": 0.6152046783625731,
763
+ "eval_recall_joy": 0.7583682008368201,
764
+ "eval_recall_neutral": 0.8634998122418325,
765
+ "eval_recall_sadness": 0.668,
766
+ "eval_recall_surprise": 0.41935483870967744,
767
+ "eval_runtime": 14.6661,
768
+ "eval_samples_per_second": 1507.827,
769
+ "eval_steps_per_second": 11.796,
770
+ "step": 2784
771
+ },
772
+ {
773
+ "epoch": 1.82,
774
+ "learning_rate": 2.1813608049832296e-05,
775
+ "loss": 0.6221,
776
+ "step": 2820
777
+ },
778
+ {
779
+ "epoch": 1.85,
780
+ "learning_rate": 2.1250598945855296e-05,
781
+ "loss": 0.6109,
782
+ "step": 2867
783
+ },
784
+ {
785
+ "epoch": 1.88,
786
+ "learning_rate": 2.0687589841878296e-05,
787
+ "loss": 0.6029,
788
+ "step": 2914
789
+ },
790
+ {
791
+ "epoch": 1.92,
792
+ "learning_rate": 2.0124580737901292e-05,
793
+ "loss": 0.6243,
794
+ "step": 2961
795
+ },
796
+ {
797
+ "epoch": 1.95,
798
+ "learning_rate": 1.9561571633924296e-05,
799
+ "loss": 0.6158,
800
+ "step": 3008
801
+ },
802
+ {
803
+ "epoch": 1.95,
804
+ "eval_accuracy": 0.764854843085828,
805
+ "eval_b_acc": 0.6279403178124943,
806
+ "eval_f1": 0.761380516026678,
807
+ "eval_f1_anger": 0.6389822371579453,
808
+ "eval_f1_disgust": 0.4711729622266402,
809
+ "eval_f1_fear": 0.6765847347994824,
810
+ "eval_f1_joy": 0.751901635927894,
811
+ "eval_f1_neutral": 0.8525969275786394,
812
+ "eval_f1_sadness": 0.6901180282342051,
813
+ "eval_f1_surprise": 0.46511627906976744,
814
+ "eval_loss": 0.6456097960472107,
815
+ "eval_prec": 0.7598739530313577,
816
+ "eval_prec_anger": 0.6402116402116402,
817
+ "eval_prec_disgust": 0.531390134529148,
818
+ "eval_prec_fear": 0.7568740955137482,
819
+ "eval_prec_joy": 0.7490139090720366,
820
+ "eval_prec_neutral": 0.8310160427807487,
821
+ "eval_prec_sadness": 0.7199420569773056,
822
+ "eval_prec_surprise": 0.5063291139240507,
823
+ "eval_recall": 0.764854843085828,
824
+ "eval_recall_anger": 0.6377575467177767,
825
+ "eval_recall_disgust": 0.4232142857142857,
826
+ "eval_recall_fear": 0.6116959064327485,
827
+ "eval_recall_joy": 0.7548117154811715,
828
+ "eval_recall_neutral": 0.8753285767930905,
829
+ "eval_recall_sadness": 0.6626666666666666,
830
+ "eval_recall_surprise": 0.43010752688172044,
831
+ "eval_runtime": 14.622,
832
+ "eval_samples_per_second": 1512.381,
833
+ "eval_steps_per_second": 11.832,
834
+ "step": 3016
835
+ },
836
+ {
837
+ "epoch": 1.98,
838
+ "learning_rate": 1.8998562529947296e-05,
839
+ "loss": 0.5901,
840
+ "step": 3055
841
+ },
842
+ {
843
+ "epoch": 2.01,
844
+ "learning_rate": 1.8435553425970292e-05,
845
+ "loss": 0.5923,
846
+ "step": 3102
847
+ },
848
+ {
849
+ "epoch": 2.04,
850
+ "learning_rate": 1.7872544321993292e-05,
851
+ "loss": 0.5239,
852
+ "step": 3149
853
+ },
854
+ {
855
+ "epoch": 2.07,
856
+ "learning_rate": 1.7309535218016292e-05,
857
+ "loss": 0.5329,
858
+ "step": 3196
859
+ },
860
+ {
861
+ "epoch": 2.1,
862
+ "learning_rate": 1.6758505031145187e-05,
863
+ "loss": 0.554,
864
+ "step": 3243
865
+ },
866
+ {
867
+ "epoch": 2.1,
868
+ "eval_accuracy": 0.7583883512706883,
869
+ "eval_b_acc": 0.634571558597101,
870
+ "eval_f1": 0.7563700086517053,
871
+ "eval_f1_anger": 0.643182362808531,
872
+ "eval_f1_disgust": 0.4690181124880839,
873
+ "eval_f1_fear": 0.6795752654590881,
874
+ "eval_f1_joy": 0.7506092607636069,
875
+ "eval_f1_neutral": 0.842095468502838,
876
+ "eval_f1_sadness": 0.6862699699004399,
877
+ "eval_f1_surprise": 0.47133027522935783,
878
+ "eval_loss": 0.6742041707038879,
879
+ "eval_prec": 0.7554687165264222,
880
+ "eval_prec_anger": 0.6433365292425696,
881
+ "eval_prec_disgust": 0.5030674846625767,
882
+ "eval_prec_fear": 0.7292225201072386,
883
+ "eval_prec_joy": 0.7292817679558011,
884
+ "eval_prec_neutral": 0.8347168419110865,
885
+ "eval_prec_sadness": 0.7162880618656355,
886
+ "eval_prec_surprise": 0.504914004914005,
887
+ "eval_recall": 0.7583883512706883,
888
+ "eval_recall_anger": 0.6430282702443699,
889
+ "eval_recall_disgust": 0.4392857142857143,
890
+ "eval_recall_fear": 0.6362573099415205,
891
+ "eval_recall_joy": 0.7732217573221757,
892
+ "eval_recall_neutral": 0.8496057078482914,
893
+ "eval_recall_sadness": 0.6586666666666666,
894
+ "eval_recall_surprise": 0.44193548387096776,
895
+ "eval_runtime": 14.8261,
896
+ "eval_samples_per_second": 1491.559,
897
+ "eval_steps_per_second": 11.669,
898
+ "step": 3248
899
+ },
900
+ {
901
+ "epoch": 2.13,
902
+ "learning_rate": 1.6195495927168187e-05,
903
+ "loss": 0.5568,
904
+ "step": 3290
905
+ },
906
+ {
907
+ "epoch": 2.16,
908
+ "learning_rate": 1.5632486823191184e-05,
909
+ "loss": 0.5375,
910
+ "step": 3337
911
+ },
912
+ {
913
+ "epoch": 2.19,
914
+ "learning_rate": 1.5069477719214184e-05,
915
+ "loss": 0.5198,
916
+ "step": 3384
917
+ },
918
+ {
919
+ "epoch": 2.22,
920
+ "learning_rate": 1.4506468615237184e-05,
921
+ "loss": 0.5255,
922
+ "step": 3431
923
+ },
924
+ {
925
+ "epoch": 2.25,
926
+ "learning_rate": 1.3943459511260182e-05,
927
+ "loss": 0.5537,
928
+ "step": 3478
929
+ },
930
+ {
931
+ "epoch": 2.25,
932
+ "eval_accuracy": 0.7632721352989057,
933
+ "eval_b_acc": 0.6282518018397625,
934
+ "eval_f1": 0.7604673635172584,
935
+ "eval_f1_anger": 0.6454630495790459,
936
+ "eval_f1_disgust": 0.45434782608695656,
937
+ "eval_f1_fear": 0.6718246292714378,
938
+ "eval_f1_joy": 0.7522695178535405,
939
+ "eval_f1_neutral": 0.8484961703717542,
940
+ "eval_f1_sadness": 0.6950892857142857,
941
+ "eval_f1_surprise": 0.47641791044776116,
942
+ "eval_loss": 0.670791745185852,
943
+ "eval_prec": 0.7603587760472442,
944
+ "eval_prec_anger": 0.6304248515303792,
945
+ "eval_prec_disgust": 0.5805555555555556,
946
+ "eval_prec_fear": 0.7485632183908046,
947
+ "eval_prec_joy": 0.726334242306194,
948
+ "eval_prec_neutral": 0.8442379182156133,
949
+ "eval_prec_sadness": 0.6982062780269058,
950
+ "eval_prec_surprise": 0.5355704697986577,
951
+ "eval_recall": 0.7632721352989057,
952
+ "eval_recall_anger": 0.6612362242453282,
953
+ "eval_recall_disgust": 0.3732142857142857,
954
+ "eval_recall_fear": 0.6093567251461989,
955
+ "eval_recall_joy": 0.7801255230125523,
956
+ "eval_recall_neutral": 0.8527975966954563,
957
+ "eval_recall_sadness": 0.692,
958
+ "eval_recall_surprise": 0.4290322580645161,
959
+ "eval_runtime": 14.5517,
960
+ "eval_samples_per_second": 1519.68,
961
+ "eval_steps_per_second": 11.889,
962
+ "step": 3480
963
+ },
964
+ {
965
+ "epoch": 2.28,
966
+ "learning_rate": 1.3380450407283182e-05,
967
+ "loss": 0.5305,
968
+ "step": 3525
969
+ },
970
+ {
971
+ "epoch": 2.31,
972
+ "learning_rate": 1.2817441303306182e-05,
973
+ "loss": 0.5313,
974
+ "step": 3572
975
+ },
976
+ {
977
+ "epoch": 2.34,
978
+ "learning_rate": 1.2254432199329182e-05,
979
+ "loss": 0.5182,
980
+ "step": 3619
981
+ },
982
+ {
983
+ "epoch": 2.37,
984
+ "learning_rate": 1.169142309535218e-05,
985
+ "loss": 0.5375,
986
+ "step": 3666
987
+ },
988
+ {
989
+ "epoch": 2.4,
990
+ "eval_accuracy": 0.7605589219498959,
991
+ "eval_b_acc": 0.6402110359246711,
992
+ "eval_f1": 0.7594675823716318,
993
+ "eval_f1_anger": 0.6421420256111757,
994
+ "eval_f1_disgust": 0.4648148148148148,
995
+ "eval_f1_fear": 0.6746532156368222,
996
+ "eval_f1_joy": 0.7537328697075065,
997
+ "eval_f1_neutral": 0.8463990997749438,
998
+ "eval_f1_sadness": 0.6885171790235082,
999
+ "eval_f1_surprise": 0.4835924006908463,
1000
+ "eval_loss": 0.671157717704773,
1001
+ "eval_prec": 0.7592351302120464,
1002
+ "eval_prec_anger": 0.6245471014492754,
1003
+ "eval_prec_disgust": 0.4826923076923077,
1004
+ "eval_prec_fear": 0.7318741450068399,
1005
+ "eval_prec_joy": 0.7372949179671868,
1006
+ "eval_prec_neutral": 0.8454477332334207,
1007
+ "eval_prec_sadness": 0.7005519779208832,
1008
+ "eval_prec_surprise": 0.5204460966542751,
1009
+ "eval_recall": 0.7605589219498959,
1010
+ "eval_recall_anger": 0.6607570675610924,
1011
+ "eval_recall_disgust": 0.44821428571428573,
1012
+ "eval_recall_fear": 0.6257309941520468,
1013
+ "eval_recall_joy": 0.7709205020920502,
1014
+ "eval_recall_neutral": 0.847352609838528,
1015
+ "eval_recall_sadness": 0.6768888888888889,
1016
+ "eval_recall_surprise": 0.45161290322580644,
1017
+ "eval_runtime": 14.5753,
1018
+ "eval_samples_per_second": 1517.225,
1019
+ "eval_steps_per_second": 11.869,
1020
+ "step": 3712
1021
+ },
1022
+ {
1023
+ "epoch": 2.4,
1024
+ "learning_rate": 1.112841399137518e-05,
1025
+ "loss": 0.5266,
1026
+ "step": 3713
1027
+ },
1028
+ {
1029
+ "epoch": 2.43,
1030
+ "learning_rate": 1.056540488739818e-05,
1031
+ "loss": 0.5276,
1032
+ "step": 3760
1033
+ },
1034
+ {
1035
+ "epoch": 2.46,
1036
+ "learning_rate": 1.000239578342118e-05,
1037
+ "loss": 0.5432,
1038
+ "step": 3807
1039
+ },
1040
+ {
1041
+ "epoch": 2.49,
1042
+ "learning_rate": 9.439386679444178e-06,
1043
+ "loss": 0.5262,
1044
+ "step": 3854
1045
+ },
1046
+ {
1047
+ "epoch": 2.52,
1048
+ "learning_rate": 8.876377575467178e-06,
1049
+ "loss": 0.5175,
1050
+ "step": 3901
1051
+ },
1052
+ {
1053
+ "epoch": 2.55,
1054
+ "eval_accuracy": 0.7625033915166862,
1055
+ "eval_b_acc": 0.6369042077487537,
1056
+ "eval_f1": 0.7603749093215698,
1057
+ "eval_f1_anger": 0.6429077177248053,
1058
+ "eval_f1_disgust": 0.46938775510204084,
1059
+ "eval_f1_fear": 0.677336747759283,
1060
+ "eval_f1_joy": 0.7530406101834674,
1061
+ "eval_f1_neutral": 0.8487682904241526,
1062
+ "eval_f1_sadness": 0.6956521739130433,
1063
+ "eval_f1_surprise": 0.457388105456775,
1064
+ "eval_loss": 0.66249018907547,
1065
+ "eval_prec": 0.7600295878449174,
1066
+ "eval_prec_anger": 0.6334883720930232,
1067
+ "eval_prec_disgust": 0.44805194805194803,
1068
+ "eval_prec_fear": 0.7482319660537482,
1069
+ "eval_prec_joy": 0.7421779764323446,
1070
+ "eval_prec_neutral": 0.8374451754385965,
1071
+ "eval_prec_sadness": 0.7251687560270009,
1072
+ "eval_prec_surprise": 0.5320970042796006,
1073
+ "eval_recall": 0.7625033915166862,
1074
+ "eval_recall_anger": 0.6526114039290848,
1075
+ "eval_recall_disgust": 0.4928571428571429,
1076
+ "eval_recall_fear": 0.6187134502923977,
1077
+ "eval_recall_joy": 0.7642259414225941,
1078
+ "eval_recall_neutral": 0.8604018024784078,
1079
+ "eval_recall_sadness": 0.6684444444444444,
1080
+ "eval_recall_surprise": 0.4010752688172043,
1081
+ "eval_runtime": 14.6386,
1082
+ "eval_samples_per_second": 1510.666,
1083
+ "eval_steps_per_second": 11.818,
1084
+ "step": 3944
1085
+ },
1086
+ {
1087
+ "epoch": 2.55,
1088
+ "learning_rate": 8.313368471490178e-06,
1089
+ "loss": 0.5337,
1090
+ "step": 3948
1091
+ },
1092
+ {
1093
+ "epoch": 2.58,
1094
+ "learning_rate": 7.750359367513177e-06,
1095
+ "loss": 0.5148,
1096
+ "step": 3995
1097
+ },
1098
+ {
1099
+ "epoch": 2.61,
1100
+ "learning_rate": 7.1873502635361775e-06,
1101
+ "loss": 0.5234,
1102
+ "step": 4042
1103
+ },
1104
+ {
1105
+ "epoch": 2.64,
1106
+ "learning_rate": 6.624341159559177e-06,
1107
+ "loss": 0.5286,
1108
+ "step": 4089
1109
+ },
1110
+ {
1111
+ "epoch": 2.68,
1112
+ "learning_rate": 6.061332055582176e-06,
1113
+ "loss": 0.5182,
1114
+ "step": 4136
1115
+ },
1116
+ {
1117
+ "epoch": 2.7,
1118
+ "eval_accuracy": 0.7631364746314552,
1119
+ "eval_b_acc": 0.6403726819328496,
1120
+ "eval_f1": 0.7611758530200116,
1121
+ "eval_f1_anger": 0.6441332689521971,
1122
+ "eval_f1_disgust": 0.4635036496350365,
1123
+ "eval_f1_fear": 0.6658725431804646,
1124
+ "eval_f1_joy": 0.7548551093035079,
1125
+ "eval_f1_neutral": 0.848629209284947,
1126
+ "eval_f1_sadness": 0.6971687429218574,
1127
+ "eval_f1_surprise": 0.4763636363636364,
1128
+ "eval_loss": 0.6621036529541016,
1129
+ "eval_prec": 0.7602393679680954,
1130
+ "eval_prec_anger": 0.6491484184914842,
1131
+ "eval_prec_disgust": 0.47388059701492535,
1132
+ "eval_prec_fear": 0.6783980582524272,
1133
+ "eval_prec_joy": 0.734322453016815,
1134
+ "eval_prec_neutral": 0.844409331722279,
1135
+ "eval_prec_sadness": 0.7108545034642032,
1136
+ "eval_prec_surprise": 0.5458333333333333,
1137
+ "eval_recall": 0.7631364746314552,
1138
+ "eval_recall_anger": 0.639195016770484,
1139
+ "eval_recall_disgust": 0.45357142857142857,
1140
+ "eval_recall_fear": 0.6538011695906433,
1141
+ "eval_recall_joy": 0.7765690376569038,
1142
+ "eval_recall_neutral": 0.8528914757791964,
1143
+ "eval_recall_sadness": 0.684,
1144
+ "eval_recall_surprise": 0.42258064516129035,
1145
+ "eval_runtime": 14.5758,
1146
+ "eval_samples_per_second": 1517.169,
1147
+ "eval_steps_per_second": 11.869,
1148
+ "step": 4176
1149
+ },
1150
+ {
1151
+ "epoch": 2.71,
1152
+ "learning_rate": 5.498322951605176e-06,
1153
+ "loss": 0.5301,
1154
+ "step": 4183
1155
+ },
1156
+ {
1157
+ "epoch": 2.74,
1158
+ "learning_rate": 4.935313847628174e-06,
1159
+ "loss": 0.5402,
1160
+ "step": 4230
1161
+ },
1162
+ {
1163
+ "epoch": 2.77,
1164
+ "learning_rate": 4.372304743651174e-06,
1165
+ "loss": 0.5167,
1166
+ "step": 4277
1167
+ },
1168
+ {
1169
+ "epoch": 2.8,
1170
+ "learning_rate": 3.8092956396741735e-06,
1171
+ "loss": 0.5004,
1172
+ "step": 4324
1173
+ },
1174
+ {
1175
+ "epoch": 2.83,
1176
+ "learning_rate": 3.246286535697173e-06,
1177
+ "loss": 0.5148,
1178
+ "step": 4371
1179
+ },
1180
+ {
1181
+ "epoch": 2.85,
1182
+ "eval_accuracy": 0.7637243375237406,
1183
+ "eval_b_acc": 0.6382943967754277,
1184
+ "eval_f1": 0.7612453985473553,
1185
+ "eval_f1_anger": 0.6462075848303392,
1186
+ "eval_f1_disgust": 0.4666666666666667,
1187
+ "eval_f1_fear": 0.6678592725104352,
1188
+ "eval_f1_joy": 0.7563368765331154,
1189
+ "eval_f1_neutral": 0.8485270885605427,
1190
+ "eval_f1_sadness": 0.6918630502445532,
1191
+ "eval_f1_surprise": 0.4760213143872114,
1192
+ "eval_loss": 0.6637689471244812,
1193
+ "eval_prec": 0.7597715932015663,
1194
+ "eval_prec_anger": 0.6741280583029672,
1195
+ "eval_prec_disgust": 0.5,
1196
+ "eval_prec_fear": 0.681265206812652,
1197
+ "eval_prec_joy": 0.7394084732214229,
1198
+ "eval_prec_neutral": 0.8400183992640294,
1199
+ "eval_prec_sadness": 0.6921708185053381,
1200
+ "eval_prec_surprise": 0.5296442687747036,
1201
+ "eval_recall": 0.7637243375237406,
1202
+ "eval_recall_anger": 0.6205079060852899,
1203
+ "eval_recall_disgust": 0.4375,
1204
+ "eval_recall_fear": 0.6549707602339181,
1205
+ "eval_recall_joy": 0.7740585774058577,
1206
+ "eval_recall_neutral": 0.857209913631243,
1207
+ "eval_recall_sadness": 0.6915555555555556,
1208
+ "eval_recall_surprise": 0.432258064516129,
1209
+ "eval_runtime": 14.5644,
1210
+ "eval_samples_per_second": 1518.363,
1211
+ "eval_steps_per_second": 11.878,
1212
+ "step": 4408
1213
+ },
1214
+ {
1215
+ "epoch": 2.86,
1216
+ "learning_rate": 2.6832774317201726e-06,
1217
+ "loss": 0.5231,
1218
+ "step": 4418
1219
+ },
1220
+ {
1221
+ "epoch": 2.89,
1222
+ "learning_rate": 2.120268327743172e-06,
1223
+ "loss": 0.5223,
1224
+ "step": 4465
1225
+ },
1226
+ {
1227
+ "epoch": 2.92,
1228
+ "learning_rate": 1.5572592237661715e-06,
1229
+ "loss": 0.525,
1230
+ "step": 4512
1231
+ },
1232
+ {
1233
+ "epoch": 2.95,
1234
+ "learning_rate": 9.942501197891712e-07,
1235
+ "loss": 0.5219,
1236
+ "step": 4559
1237
+ },
1238
+ {
1239
+ "epoch": 2.98,
1240
+ "learning_rate": 4.312410158121706e-07,
1241
+ "loss": 0.516,
1242
+ "step": 4606
1243
+ },
1244
+ {
1245
+ "epoch": 3.0,
1246
+ "step": 4638,
1247
+ "total_flos": 3.903427734912e+16,
1248
+ "train_loss": 0.6570020180521673,
1249
+ "train_runtime": 1377.6596,
1250
+ "train_samples_per_second": 430.73,
1251
+ "train_steps_per_second": 3.367
1252
+ }
1253
+ ],
1254
+ "max_steps": 4638,
1255
+ "num_train_epochs": 3,
1256
+ "total_flos": 3.903427734912e+16,
1257
+ "trial_name": null,
1258
+ "trial_params": null
1259
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b6d654a0fd9d25c51cd952b5043595530c71ec715e7e1fb0c43d08734c2b34
3
+ size 4027
vocab.json ADDED
The diff for this file is too large to render. See raw diff