bobox commited on
Commit
95ce1f6
·
verified ·
1 Parent(s): f99c016

Training in progress, step 2957, checkpoint

Browse files
checkpoint-2957/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-2957/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2957/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-2957/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.42.4",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-2957/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.4",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-2957/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-2957/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a4231dace8ea5e6996051bf9fd731b70b56dba1ca086f2d27c02783ce614dc
3
+ size 1130520122
checkpoint-2957/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd21dc5e9a99836f5ccdb72aa1171b3aa090e50c93912dacc1f8b0f68195957a
3
+ size 565251810
checkpoint-2957/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfad2ff737464e76c5b37506f2db24785b83457e177269ea750956f0d300868d
3
+ size 14244
checkpoint-2957/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5926d2179d543a06e492450f789694397251123df8342ff76f43fd337c0a8363
3
+ size 1064
checkpoint-2957/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-2957/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-2957/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-2957/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2957/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-2957/trainer_state.json ADDED
@@ -0,0 +1,996 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 740,
6
+ "global_step": 2957,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.025025363544132567,
13
+ "grad_norm": 24.194847106933594,
14
+ "learning_rate": 8.048698004734527e-07,
15
+ "loss": 12.9253,
16
+ "step": 74
17
+ },
18
+ {
19
+ "epoch": 0.050050727088265135,
20
+ "grad_norm": 16.451122283935547,
21
+ "learning_rate": 1.6807575245180925e-06,
22
+ "loss": 9.5557,
23
+ "step": 148
24
+ },
25
+ {
26
+ "epoch": 0.0750760906323977,
27
+ "grad_norm": 166.15907287597656,
28
+ "learning_rate": 2.5566452485627323e-06,
29
+ "loss": 9.2788,
30
+ "step": 222
31
+ },
32
+ {
33
+ "epoch": 0.10010145417653027,
34
+ "grad_norm": 29.49967384338379,
35
+ "learning_rate": 3.432532972607372e-06,
36
+ "loss": 8.6849,
37
+ "step": 296
38
+ },
39
+ {
40
+ "epoch": 0.12512681772066284,
41
+ "grad_norm": 8.201894760131836,
42
+ "learning_rate": 4.308420696652012e-06,
43
+ "loss": 7.7612,
44
+ "step": 370
45
+ },
46
+ {
47
+ "epoch": 0.1501521812647954,
48
+ "grad_norm": 35.00122833251953,
49
+ "learning_rate": 5.184308420696652e-06,
50
+ "loss": 7.1482,
51
+ "step": 444
52
+ },
53
+ {
54
+ "epoch": 0.17517754480892797,
55
+ "grad_norm": 15.764969825744629,
56
+ "learning_rate": 6.0601961447412915e-06,
57
+ "loss": 6.5551,
58
+ "step": 518
59
+ },
60
+ {
61
+ "epoch": 0.20020290835306054,
62
+ "grad_norm": 33.325008392333984,
63
+ "learning_rate": 6.936083868785931e-06,
64
+ "loss": 6.2869,
65
+ "step": 592
66
+ },
67
+ {
68
+ "epoch": 0.2252282718971931,
69
+ "grad_norm": 30.581892013549805,
70
+ "learning_rate": 7.81197159283057e-06,
71
+ "loss": 5.092,
72
+ "step": 666
73
+ },
74
+ {
75
+ "epoch": 0.2502536354413257,
76
+ "grad_norm": 39.986324310302734,
77
+ "learning_rate": 8.68785931687521e-06,
78
+ "loss": 4.7866,
79
+ "step": 740
80
+ },
81
+ {
82
+ "epoch": 0.2502536354413257,
83
+ "eval_Vitaminc-test_cosine_accuracy": 0.55,
84
+ "eval_Vitaminc-test_cosine_accuracy_threshold": 0.9010212421417236,
85
+ "eval_Vitaminc-test_cosine_ap": 0.5414572634918511,
86
+ "eval_Vitaminc-test_cosine_f1": 0.654627539503386,
87
+ "eval_Vitaminc-test_cosine_f1_threshold": 0.5345202088356018,
88
+ "eval_Vitaminc-test_cosine_precision": 0.4865771812080537,
89
+ "eval_Vitaminc-test_cosine_recall": 1.0,
90
+ "eval_Vitaminc-test_dot_accuracy": 0.56,
91
+ "eval_Vitaminc-test_dot_accuracy_threshold": 251.8475341796875,
92
+ "eval_Vitaminc-test_dot_ap": 0.5368797297298245,
93
+ "eval_Vitaminc-test_dot_f1": 0.654627539503386,
94
+ "eval_Vitaminc-test_dot_f1_threshold": 151.05996704101562,
95
+ "eval_Vitaminc-test_dot_precision": 0.4865771812080537,
96
+ "eval_Vitaminc-test_dot_recall": 1.0,
97
+ "eval_Vitaminc-test_euclidean_accuracy": 0.55,
98
+ "eval_Vitaminc-test_euclidean_accuracy_threshold": 7.4731292724609375,
99
+ "eval_Vitaminc-test_euclidean_ap": 0.5349241278127328,
100
+ "eval_Vitaminc-test_euclidean_f1": 0.654627539503386,
101
+ "eval_Vitaminc-test_euclidean_f1_threshold": 16.456798553466797,
102
+ "eval_Vitaminc-test_euclidean_precision": 0.4865771812080537,
103
+ "eval_Vitaminc-test_euclidean_recall": 1.0,
104
+ "eval_Vitaminc-test_manhattan_accuracy": 0.55,
105
+ "eval_Vitaminc-test_manhattan_accuracy_threshold": 153.8025360107422,
106
+ "eval_Vitaminc-test_manhattan_ap": 0.5359223565961893,
107
+ "eval_Vitaminc-test_manhattan_f1": 0.6560364464692483,
108
+ "eval_Vitaminc-test_manhattan_f1_threshold": 310.77142333984375,
109
+ "eval_Vitaminc-test_manhattan_precision": 0.4897959183673469,
110
+ "eval_Vitaminc-test_manhattan_recall": 0.993103448275862,
111
+ "eval_Vitaminc-test_max_accuracy": 0.56,
112
+ "eval_Vitaminc-test_max_accuracy_threshold": 251.8475341796875,
113
+ "eval_Vitaminc-test_max_ap": 0.5414572634918511,
114
+ "eval_Vitaminc-test_max_f1": 0.6560364464692483,
115
+ "eval_Vitaminc-test_max_f1_threshold": 310.77142333984375,
116
+ "eval_Vitaminc-test_max_precision": 0.4897959183673469,
117
+ "eval_Vitaminc-test_max_recall": 1.0,
118
+ "eval_mrpc-test_cosine_accuracy": 0.7133333333333334,
119
+ "eval_mrpc-test_cosine_accuracy_threshold": 0.8482479453086853,
120
+ "eval_mrpc-test_cosine_ap": 0.8126327786702257,
121
+ "eval_mrpc-test_cosine_f1": 0.8080357142857143,
122
+ "eval_mrpc-test_cosine_f1_threshold": 0.8482479453086853,
123
+ "eval_mrpc-test_cosine_precision": 0.7182539682539683,
124
+ "eval_mrpc-test_cosine_recall": 0.923469387755102,
125
+ "eval_mrpc-test_dot_accuracy": 0.6733333333333333,
126
+ "eval_mrpc-test_dot_accuracy_threshold": 192.46002197265625,
127
+ "eval_mrpc-test_dot_ap": 0.7048128672008315,
128
+ "eval_mrpc-test_dot_f1": 0.7975460122699387,
129
+ "eval_mrpc-test_dot_f1_threshold": 186.87075805664062,
130
+ "eval_mrpc-test_dot_precision": 0.6655290102389079,
131
+ "eval_mrpc-test_dot_recall": 0.9948979591836735,
132
+ "eval_mrpc-test_euclidean_accuracy": 0.7133333333333334,
133
+ "eval_mrpc-test_euclidean_accuracy_threshold": 9.194692611694336,
134
+ "eval_mrpc-test_euclidean_ap": 0.8190781511691683,
135
+ "eval_mrpc-test_euclidean_f1": 0.8080357142857143,
136
+ "eval_mrpc-test_euclidean_f1_threshold": 9.194692611694336,
137
+ "eval_mrpc-test_euclidean_precision": 0.7182539682539683,
138
+ "eval_mrpc-test_euclidean_recall": 0.923469387755102,
139
+ "eval_mrpc-test_manhattan_accuracy": 0.71,
140
+ "eval_mrpc-test_manhattan_accuracy_threshold": 167.8056640625,
141
+ "eval_mrpc-test_manhattan_ap": 0.8350377066418277,
142
+ "eval_mrpc-test_manhattan_f1": 0.8034934497816593,
143
+ "eval_mrpc-test_manhattan_f1_threshold": 187.3101806640625,
144
+ "eval_mrpc-test_manhattan_precision": 0.7022900763358778,
145
+ "eval_mrpc-test_manhattan_recall": 0.9387755102040817,
146
+ "eval_mrpc-test_max_accuracy": 0.7133333333333334,
147
+ "eval_mrpc-test_max_accuracy_threshold": 192.46002197265625,
148
+ "eval_mrpc-test_max_ap": 0.8350377066418277,
149
+ "eval_mrpc-test_max_f1": 0.8080357142857143,
150
+ "eval_mrpc-test_max_f1_threshold": 187.3101806640625,
151
+ "eval_mrpc-test_max_precision": 0.7182539682539683,
152
+ "eval_mrpc-test_max_recall": 0.9948979591836735,
153
+ "eval_negationNLI-test_cosine_accuracy": 1.0,
154
+ "eval_negationNLI-test_dot_accuracy": 0.0,
155
+ "eval_negationNLI-test_euclidean_accuracy": 1.0,
156
+ "eval_negationNLI-test_manhattan_accuracy": 1.0,
157
+ "eval_negationNLI-test_max_accuracy": 1.0,
158
+ "eval_nli-pairs_loss": 7.208528518676758,
159
+ "eval_nli-pairs_runtime": 2.8235,
160
+ "eval_nli-pairs_samples_per_second": 354.167,
161
+ "eval_nli-pairs_steps_per_second": 7.083,
162
+ "eval_sequential_score": 0.5414572634918511,
163
+ "eval_sts-test_pearson_cosine": 0.5924382437556898,
164
+ "eval_sts-test_pearson_dot": 0.5712096252698496,
165
+ "eval_sts-test_pearson_euclidean": 0.567118545585895,
166
+ "eval_sts-test_pearson_manhattan": 0.5851061637430899,
167
+ "eval_sts-test_pearson_max": 0.5924382437556898,
168
+ "eval_sts-test_spearman_cosine": 0.6094124531020365,
169
+ "eval_sts-test_spearman_dot": 0.5838193528505712,
170
+ "eval_sts-test_spearman_euclidean": 0.5744646395850245,
171
+ "eval_sts-test_spearman_manhattan": 0.5976596658090335,
172
+ "eval_sts-test_spearman_max": 0.6094124531020365,
173
+ "step": 740
174
+ },
175
+ {
176
+ "epoch": 0.2502536354413257,
177
+ "eval_vitaminc-pairs_loss": 6.389328479766846,
178
+ "eval_vitaminc-pairs_runtime": 0.3763,
179
+ "eval_vitaminc-pairs_samples_per_second": 265.723,
180
+ "eval_vitaminc-pairs_steps_per_second": 5.314,
181
+ "step": 740
182
+ },
183
+ {
184
+ "epoch": 0.2502536354413257,
185
+ "eval_negation-triplets_loss": 5.878116607666016,
186
+ "eval_negation-triplets_runtime": 0.123,
187
+ "eval_negation-triplets_samples_per_second": 544.706,
188
+ "eval_negation-triplets_steps_per_second": 16.26,
189
+ "step": 740
190
+ },
191
+ {
192
+ "epoch": 0.2502536354413257,
193
+ "eval_qnli-contrastive_loss": 4.718408584594727,
194
+ "eval_qnli-contrastive_runtime": 0.1805,
195
+ "eval_qnli-contrastive_samples_per_second": 553.913,
196
+ "eval_qnli-contrastive_steps_per_second": 11.078,
197
+ "step": 740
198
+ },
199
+ {
200
+ "epoch": 0.2502536354413257,
201
+ "eval_scitail-pairs-qa_loss": 0.7982025146484375,
202
+ "eval_scitail-pairs-qa_runtime": 0.2817,
203
+ "eval_scitail-pairs-qa_samples_per_second": 354.927,
204
+ "eval_scitail-pairs-qa_steps_per_second": 7.099,
205
+ "step": 740
206
+ },
207
+ {
208
+ "epoch": 0.2502536354413257,
209
+ "eval_scitail-pairs-pos_loss": 1.6438982486724854,
210
+ "eval_scitail-pairs-pos_runtime": 0.556,
211
+ "eval_scitail-pairs-pos_samples_per_second": 179.843,
212
+ "eval_scitail-pairs-pos_steps_per_second": 3.597,
213
+ "step": 740
214
+ },
215
+ {
216
+ "epoch": 0.2502536354413257,
217
+ "eval_xsum-pairs_loss": 2.896432399749756,
218
+ "eval_xsum-pairs_runtime": 0.6662,
219
+ "eval_xsum-pairs_samples_per_second": 150.113,
220
+ "eval_xsum-pairs_steps_per_second": 3.002,
221
+ "step": 740
222
+ },
223
+ {
224
+ "epoch": 0.2502536354413257,
225
+ "eval_compression-pairs_loss": 2.1321794986724854,
226
+ "eval_compression-pairs_runtime": 0.0988,
227
+ "eval_compression-pairs_samples_per_second": 1012.518,
228
+ "eval_compression-pairs_steps_per_second": 20.25,
229
+ "step": 740
230
+ },
231
+ {
232
+ "epoch": 0.2502536354413257,
233
+ "eval_sciq_pairs_loss": 2.1849400997161865,
234
+ "eval_sciq_pairs_runtime": 2.003,
235
+ "eval_sciq_pairs_samples_per_second": 49.926,
236
+ "eval_sciq_pairs_steps_per_second": 0.999,
237
+ "step": 740
238
+ },
239
+ {
240
+ "epoch": 0.2502536354413257,
241
+ "eval_qasc_pairs_loss": 3.1045215129852295,
242
+ "eval_qasc_pairs_runtime": 0.2356,
243
+ "eval_qasc_pairs_samples_per_second": 424.456,
244
+ "eval_qasc_pairs_steps_per_second": 8.489,
245
+ "step": 740
246
+ },
247
+ {
248
+ "epoch": 0.2502536354413257,
249
+ "eval_qasc_facts_sym_loss": 2.4955689907073975,
250
+ "eval_qasc_facts_sym_runtime": 0.0956,
251
+ "eval_qasc_facts_sym_samples_per_second": 1045.497,
252
+ "eval_qasc_facts_sym_steps_per_second": 20.91,
253
+ "step": 740
254
+ },
255
+ {
256
+ "epoch": 0.2502536354413257,
257
+ "eval_openbookqa_pairs_loss": 4.331984519958496,
258
+ "eval_openbookqa_pairs_runtime": 0.2874,
259
+ "eval_openbookqa_pairs_samples_per_second": 347.959,
260
+ "eval_openbookqa_pairs_steps_per_second": 6.959,
261
+ "step": 740
262
+ },
263
+ {
264
+ "epoch": 0.2502536354413257,
265
+ "eval_msmarco_pairs_loss": 6.1474928855896,
266
+ "eval_msmarco_pairs_runtime": 0.5405,
267
+ "eval_msmarco_pairs_samples_per_second": 185.006,
268
+ "eval_msmarco_pairs_steps_per_second": 3.7,
269
+ "step": 740
270
+ },
271
+ {
272
+ "epoch": 0.2502536354413257,
273
+ "eval_nq_pairs_loss": 6.490893363952637,
274
+ "eval_nq_pairs_runtime": 1.5599,
275
+ "eval_nq_pairs_samples_per_second": 64.106,
276
+ "eval_nq_pairs_steps_per_second": 1.282,
277
+ "step": 740
278
+ },
279
+ {
280
+ "epoch": 0.2502536354413257,
281
+ "eval_trivia_pairs_loss": 6.1789960861206055,
282
+ "eval_trivia_pairs_runtime": 2.1019,
283
+ "eval_trivia_pairs_samples_per_second": 47.576,
284
+ "eval_trivia_pairs_steps_per_second": 0.952,
285
+ "step": 740
286
+ },
287
+ {
288
+ "epoch": 0.2502536354413257,
289
+ "eval_quora_pairs_loss": 1.2507822513580322,
290
+ "eval_quora_pairs_runtime": 7.7343,
291
+ "eval_quora_pairs_samples_per_second": 217.213,
292
+ "eval_quora_pairs_steps_per_second": 4.396,
293
+ "step": 740
294
+ },
295
+ {
296
+ "epoch": 0.2502536354413257,
297
+ "eval_gooaq_pairs_loss": 4.71970272064209,
298
+ "eval_gooaq_pairs_runtime": 0.4295,
299
+ "eval_gooaq_pairs_samples_per_second": 232.84,
300
+ "eval_gooaq_pairs_steps_per_second": 4.657,
301
+ "step": 740
302
+ },
303
+ {
304
+ "epoch": 0.2502536354413257,
305
+ "eval_mrpc_pairs_loss": 1.2289129495620728,
306
+ "eval_mrpc_pairs_runtime": 0.0981,
307
+ "eval_mrpc_pairs_samples_per_second": 1019.237,
308
+ "eval_mrpc_pairs_steps_per_second": 20.385,
309
+ "step": 740
310
+ },
311
+ {
312
+ "epoch": 0.2752789989854582,
313
+ "grad_norm": 13.371368408203125,
314
+ "learning_rate": 9.56374704091985e-06,
315
+ "loss": 4.9352,
316
+ "step": 814
317
+ },
318
+ {
319
+ "epoch": 0.3003043625295908,
320
+ "grad_norm": 11.514687538146973,
321
+ "learning_rate": 1.0439634764964491e-05,
322
+ "loss": 4.6466,
323
+ "step": 888
324
+ },
325
+ {
326
+ "epoch": 0.32532972607372335,
327
+ "grad_norm": 7.828142166137695,
328
+ "learning_rate": 1.131552248900913e-05,
329
+ "loss": 4.5722,
330
+ "step": 962
331
+ },
332
+ {
333
+ "epoch": 0.35035508961785594,
334
+ "grad_norm": 51.203739166259766,
335
+ "learning_rate": 1.2191410213053771e-05,
336
+ "loss": 4.3531,
337
+ "step": 1036
338
+ },
339
+ {
340
+ "epoch": 0.3753804531619885,
341
+ "grad_norm": 22.266889572143555,
342
+ "learning_rate": 1.3067297937098409e-05,
343
+ "loss": 4.2219,
344
+ "step": 1110
345
+ },
346
+ {
347
+ "epoch": 0.4004058167061211,
348
+ "grad_norm": 49.611026763916016,
349
+ "learning_rate": 1.394318566114305e-05,
350
+ "loss": 4.7228,
351
+ "step": 1184
352
+ },
353
+ {
354
+ "epoch": 0.4254311802502536,
355
+ "grad_norm": 16.54360580444336,
356
+ "learning_rate": 1.4819073385187689e-05,
357
+ "loss": 4.1036,
358
+ "step": 1258
359
+ },
360
+ {
361
+ "epoch": 0.4504565437943862,
362
+ "grad_norm": 11.04261302947998,
363
+ "learning_rate": 1.569496110923233e-05,
364
+ "loss": 3.0328,
365
+ "step": 1332
366
+ },
367
+ {
368
+ "epoch": 0.47548190733851875,
369
+ "grad_norm": 12.153929710388184,
370
+ "learning_rate": 1.6570848833276968e-05,
371
+ "loss": 3.6321,
372
+ "step": 1406
373
+ },
374
+ {
375
+ "epoch": 0.5005072708826513,
376
+ "grad_norm": 43.6102180480957,
377
+ "learning_rate": 1.7446736557321606e-05,
378
+ "loss": 3.6522,
379
+ "step": 1480
380
+ },
381
+ {
382
+ "epoch": 0.5005072708826513,
383
+ "eval_Vitaminc-test_cosine_accuracy": 0.56,
384
+ "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7371894121170044,
385
+ "eval_Vitaminc-test_cosine_ap": 0.5260625326082631,
386
+ "eval_Vitaminc-test_cosine_f1": 0.6586538461538463,
387
+ "eval_Vitaminc-test_cosine_f1_threshold": 0.543196976184845,
388
+ "eval_Vitaminc-test_cosine_precision": 0.5055350553505535,
389
+ "eval_Vitaminc-test_cosine_recall": 0.9448275862068966,
390
+ "eval_Vitaminc-test_dot_accuracy": 0.5533333333333333,
391
+ "eval_Vitaminc-test_dot_accuracy_threshold": 120.95848846435547,
392
+ "eval_Vitaminc-test_dot_ap": 0.5280492506160784,
393
+ "eval_Vitaminc-test_dot_f1": 0.6545454545454545,
394
+ "eval_Vitaminc-test_dot_f1_threshold": 64.36978149414062,
395
+ "eval_Vitaminc-test_dot_precision": 0.488135593220339,
396
+ "eval_Vitaminc-test_dot_recall": 0.993103448275862,
397
+ "eval_Vitaminc-test_euclidean_accuracy": 0.5533333333333333,
398
+ "eval_Vitaminc-test_euclidean_accuracy_threshold": 9.099786758422852,
399
+ "eval_Vitaminc-test_euclidean_ap": 0.5285735728609713,
400
+ "eval_Vitaminc-test_euclidean_f1": 0.6531531531531533,
401
+ "eval_Vitaminc-test_euclidean_f1_threshold": 16.646543502807617,
402
+ "eval_Vitaminc-test_euclidean_precision": 0.48494983277591974,
403
+ "eval_Vitaminc-test_euclidean_recall": 1.0,
404
+ "eval_Vitaminc-test_manhattan_accuracy": 0.5566666666666666,
405
+ "eval_Vitaminc-test_manhattan_accuracy_threshold": 183.7972412109375,
406
+ "eval_Vitaminc-test_manhattan_ap": 0.5272973961720947,
407
+ "eval_Vitaminc-test_manhattan_f1": 0.6540284360189572,
408
+ "eval_Vitaminc-test_manhattan_f1_threshold": 248.92129516601562,
409
+ "eval_Vitaminc-test_manhattan_precision": 0.4981949458483754,
410
+ "eval_Vitaminc-test_manhattan_recall": 0.9517241379310345,
411
+ "eval_Vitaminc-test_max_accuracy": 0.56,
412
+ "eval_Vitaminc-test_max_accuracy_threshold": 183.7972412109375,
413
+ "eval_Vitaminc-test_max_ap": 0.5285735728609713,
414
+ "eval_Vitaminc-test_max_f1": 0.6586538461538463,
415
+ "eval_Vitaminc-test_max_f1_threshold": 248.92129516601562,
416
+ "eval_Vitaminc-test_max_precision": 0.5055350553505535,
417
+ "eval_Vitaminc-test_max_recall": 1.0,
418
+ "eval_mrpc-test_cosine_accuracy": 0.7166666666666667,
419
+ "eval_mrpc-test_cosine_accuracy_threshold": 0.7689210176467896,
420
+ "eval_mrpc-test_cosine_ap": 0.8209283332512223,
421
+ "eval_mrpc-test_cosine_f1": 0.8067226890756303,
422
+ "eval_mrpc-test_cosine_f1_threshold": 0.6529129147529602,
423
+ "eval_mrpc-test_cosine_precision": 0.6857142857142857,
424
+ "eval_mrpc-test_cosine_recall": 0.9795918367346939,
425
+ "eval_mrpc-test_dot_accuracy": 0.6766666666666666,
426
+ "eval_mrpc-test_dot_accuracy_threshold": 77.22791290283203,
427
+ "eval_mrpc-test_dot_ap": 0.6966742608454493,
428
+ "eval_mrpc-test_dot_f1": 0.7957894736842105,
429
+ "eval_mrpc-test_dot_f1_threshold": 77.22791290283203,
430
+ "eval_mrpc-test_dot_precision": 0.6774193548387096,
431
+ "eval_mrpc-test_dot_recall": 0.9642857142857143,
432
+ "eval_mrpc-test_euclidean_accuracy": 0.7,
433
+ "eval_mrpc-test_euclidean_accuracy_threshold": 7.456927299499512,
434
+ "eval_mrpc-test_euclidean_ap": 0.830942263798408,
435
+ "eval_mrpc-test_euclidean_f1": 0.8067940552016986,
436
+ "eval_mrpc-test_euclidean_f1_threshold": 9.238859176635742,
437
+ "eval_mrpc-test_euclidean_precision": 0.6909090909090909,
438
+ "eval_mrpc-test_euclidean_recall": 0.9693877551020408,
439
+ "eval_mrpc-test_manhattan_accuracy": 0.7033333333333334,
440
+ "eval_mrpc-test_manhattan_accuracy_threshold": 152.87489318847656,
441
+ "eval_mrpc-test_manhattan_ap": 0.8386748577280486,
442
+ "eval_mrpc-test_manhattan_f1": 0.8077753779697625,
443
+ "eval_mrpc-test_manhattan_f1_threshold": 174.57843017578125,
444
+ "eval_mrpc-test_manhattan_precision": 0.700374531835206,
445
+ "eval_mrpc-test_manhattan_recall": 0.9540816326530612,
446
+ "eval_mrpc-test_max_accuracy": 0.7166666666666667,
447
+ "eval_mrpc-test_max_accuracy_threshold": 152.87489318847656,
448
+ "eval_mrpc-test_max_ap": 0.8386748577280486,
449
+ "eval_mrpc-test_max_f1": 0.8077753779697625,
450
+ "eval_mrpc-test_max_f1_threshold": 174.57843017578125,
451
+ "eval_mrpc-test_max_precision": 0.700374531835206,
452
+ "eval_mrpc-test_max_recall": 0.9795918367346939,
453
+ "eval_negationNLI-test_cosine_accuracy": 1.0,
454
+ "eval_negationNLI-test_dot_accuracy": 0.0,
455
+ "eval_negationNLI-test_euclidean_accuracy": 1.0,
456
+ "eval_negationNLI-test_manhattan_accuracy": 1.0,
457
+ "eval_negationNLI-test_max_accuracy": 1.0,
458
+ "eval_nli-pairs_loss": 4.24396276473999,
459
+ "eval_nli-pairs_runtime": 2.8135,
460
+ "eval_nli-pairs_samples_per_second": 355.432,
461
+ "eval_nli-pairs_steps_per_second": 7.109,
462
+ "eval_sequential_score": 0.5285735728609713,
463
+ "eval_sts-test_pearson_cosine": 0.7766910180979095,
464
+ "eval_sts-test_pearson_dot": 0.7319420450752285,
465
+ "eval_sts-test_pearson_euclidean": 0.7642908181972558,
466
+ "eval_sts-test_pearson_manhattan": 0.7791004599919719,
467
+ "eval_sts-test_pearson_max": 0.7791004599919719,
468
+ "eval_sts-test_spearman_cosine": 0.7711480866239301,
469
+ "eval_sts-test_spearman_dot": 0.7296255905576166,
470
+ "eval_sts-test_spearman_euclidean": 0.7593187745196615,
471
+ "eval_sts-test_spearman_manhattan": 0.768980476853658,
472
+ "eval_sts-test_spearman_max": 0.7711480866239301,
473
+ "step": 1480
474
+ },
475
+ {
476
+ "epoch": 0.5005072708826513,
477
+ "eval_vitaminc-pairs_loss": 6.655053615570068,
478
+ "eval_vitaminc-pairs_runtime": 0.3746,
479
+ "eval_vitaminc-pairs_samples_per_second": 266.927,
480
+ "eval_vitaminc-pairs_steps_per_second": 5.339,
481
+ "step": 1480
482
+ },
483
+ {
484
+ "epoch": 0.5005072708826513,
485
+ "eval_negation-triplets_loss": 4.761081695556641,
486
+ "eval_negation-triplets_runtime": 0.1309,
487
+ "eval_negation-triplets_samples_per_second": 511.863,
488
+ "eval_negation-triplets_steps_per_second": 15.279,
489
+ "step": 1480
490
+ },
491
+ {
492
+ "epoch": 0.5005072708826513,
493
+ "eval_qnli-contrastive_loss": 3.232550859451294,
494
+ "eval_qnli-contrastive_runtime": 0.177,
495
+ "eval_qnli-contrastive_samples_per_second": 564.83,
496
+ "eval_qnli-contrastive_steps_per_second": 11.297,
497
+ "step": 1480
498
+ },
499
+ {
500
+ "epoch": 0.5005072708826513,
501
+ "eval_scitail-pairs-qa_loss": 0.2729453146457672,
502
+ "eval_scitail-pairs-qa_runtime": 0.2728,
503
+ "eval_scitail-pairs-qa_samples_per_second": 366.588,
504
+ "eval_scitail-pairs-qa_steps_per_second": 7.332,
505
+ "step": 1480
506
+ },
507
+ {
508
+ "epoch": 0.5005072708826513,
509
+ "eval_scitail-pairs-pos_loss": 0.8931738138198853,
510
+ "eval_scitail-pairs-pos_runtime": 0.5321,
511
+ "eval_scitail-pairs-pos_samples_per_second": 187.936,
512
+ "eval_scitail-pairs-pos_steps_per_second": 3.759,
513
+ "step": 1480
514
+ },
515
+ {
516
+ "epoch": 0.5005072708826513,
517
+ "eval_xsum-pairs_loss": 1.9395147562026978,
518
+ "eval_xsum-pairs_runtime": 0.6642,
519
+ "eval_xsum-pairs_samples_per_second": 150.564,
520
+ "eval_xsum-pairs_steps_per_second": 3.011,
521
+ "step": 1480
522
+ },
523
+ {
524
+ "epoch": 0.5005072708826513,
525
+ "eval_compression-pairs_loss": 1.3951506614685059,
526
+ "eval_compression-pairs_runtime": 0.0986,
527
+ "eval_compression-pairs_samples_per_second": 1013.751,
528
+ "eval_compression-pairs_steps_per_second": 20.275,
529
+ "step": 1480
530
+ },
531
+ {
532
+ "epoch": 0.5005072708826513,
533
+ "eval_sciq_pairs_loss": 1.6004968881607056,
534
+ "eval_sciq_pairs_runtime": 1.9806,
535
+ "eval_sciq_pairs_samples_per_second": 50.489,
536
+ "eval_sciq_pairs_steps_per_second": 1.01,
537
+ "step": 1480
538
+ },
539
+ {
540
+ "epoch": 0.5005072708826513,
541
+ "eval_qasc_pairs_loss": 1.3576843738555908,
542
+ "eval_qasc_pairs_runtime": 0.2305,
543
+ "eval_qasc_pairs_samples_per_second": 433.83,
544
+ "eval_qasc_pairs_steps_per_second": 8.677,
545
+ "step": 1480
546
+ },
547
+ {
548
+ "epoch": 0.5005072708826513,
549
+ "eval_qasc_facts_sym_loss": 1.5506917238235474,
550
+ "eval_qasc_facts_sym_runtime": 0.0975,
551
+ "eval_qasc_facts_sym_samples_per_second": 1025.259,
552
+ "eval_qasc_facts_sym_steps_per_second": 20.505,
553
+ "step": 1480
554
+ },
555
+ {
556
+ "epoch": 0.5005072708826513,
557
+ "eval_openbookqa_pairs_loss": 2.7663590908050537,
558
+ "eval_openbookqa_pairs_runtime": 0.2815,
559
+ "eval_openbookqa_pairs_samples_per_second": 355.224,
560
+ "eval_openbookqa_pairs_steps_per_second": 7.104,
561
+ "step": 1480
562
+ },
563
+ {
564
+ "epoch": 0.5005072708826513,
565
+ "eval_msmarco_pairs_loss": 3.48696231842041,
566
+ "eval_msmarco_pairs_runtime": 0.5347,
567
+ "eval_msmarco_pairs_samples_per_second": 187.005,
568
+ "eval_msmarco_pairs_steps_per_second": 3.74,
569
+ "step": 1480
570
+ },
571
+ {
572
+ "epoch": 0.5005072708826513,
573
+ "eval_nq_pairs_loss": 4.686245441436768,
574
+ "eval_nq_pairs_runtime": 1.5466,
575
+ "eval_nq_pairs_samples_per_second": 64.657,
576
+ "eval_nq_pairs_steps_per_second": 1.293,
577
+ "step": 1480
578
+ },
579
+ {
580
+ "epoch": 0.5005072708826513,
581
+ "eval_trivia_pairs_loss": 4.968179225921631,
582
+ "eval_trivia_pairs_runtime": 2.0808,
583
+ "eval_trivia_pairs_samples_per_second": 48.058,
584
+ "eval_trivia_pairs_steps_per_second": 0.961,
585
+ "step": 1480
586
+ },
587
+ {
588
+ "epoch": 0.5005072708826513,
589
+ "eval_quora_pairs_loss": 0.9853857159614563,
590
+ "eval_quora_pairs_runtime": 7.7167,
591
+ "eval_quora_pairs_samples_per_second": 217.708,
592
+ "eval_quora_pairs_steps_per_second": 4.406,
593
+ "step": 1480
594
+ },
595
+ {
596
+ "epoch": 0.5005072708826513,
597
+ "eval_gooaq_pairs_loss": 2.873445987701416,
598
+ "eval_gooaq_pairs_runtime": 0.4311,
599
+ "eval_gooaq_pairs_samples_per_second": 231.973,
600
+ "eval_gooaq_pairs_steps_per_second": 4.639,
601
+ "step": 1480
602
+ },
603
+ {
604
+ "epoch": 0.5005072708826513,
605
+ "eval_mrpc_pairs_loss": 0.6949604749679565,
606
+ "eval_mrpc_pairs_runtime": 0.0976,
607
+ "eval_mrpc_pairs_samples_per_second": 1024.63,
608
+ "eval_mrpc_pairs_steps_per_second": 20.493,
609
+ "step": 1480
610
+ },
611
+ {
612
+ "epoch": 0.5255326344267839,
613
+ "grad_norm": 26.758914947509766,
614
+ "learning_rate": 1.8322624281366248e-05,
615
+ "loss": 3.6262,
616
+ "step": 1554
617
+ },
618
+ {
619
+ "epoch": 0.5505579979709164,
620
+ "grad_norm": 31.105188369750977,
621
+ "learning_rate": 1.919851200541089e-05,
622
+ "loss": 3.314,
623
+ "step": 1628
624
+ },
625
+ {
626
+ "epoch": 0.5755833615150491,
627
+ "grad_norm": 22.02496910095215,
628
+ "learning_rate": 2.0074399729455527e-05,
629
+ "loss": 3.0849,
630
+ "step": 1702
631
+ },
632
+ {
633
+ "epoch": 0.6006087250591816,
634
+ "grad_norm": 13.833592414855957,
635
+ "learning_rate": 2.0950287453500165e-05,
636
+ "loss": 2.9041,
637
+ "step": 1776
638
+ },
639
+ {
640
+ "epoch": 0.6256340886033142,
641
+ "grad_norm": 33.3592643737793,
642
+ "learning_rate": 2.1826175177544804e-05,
643
+ "loss": 3.1351,
644
+ "step": 1850
645
+ },
646
+ {
647
+ "epoch": 0.6506594521474467,
648
+ "grad_norm": 37.34846496582031,
649
+ "learning_rate": 2.270206290158945e-05,
650
+ "loss": 3.1039,
651
+ "step": 1924
652
+ },
653
+ {
654
+ "epoch": 0.6756848156915793,
655
+ "grad_norm": 36.27501678466797,
656
+ "learning_rate": 2.3577950625634087e-05,
657
+ "loss": 3.1698,
658
+ "step": 1998
659
+ },
660
+ {
661
+ "epoch": 0.7007101792357119,
662
+ "grad_norm": 17.501544952392578,
663
+ "learning_rate": 2.4453838349678725e-05,
664
+ "loss": 2.5172,
665
+ "step": 2072
666
+ },
667
+ {
668
+ "epoch": 0.7257355427798444,
669
+ "grad_norm": 6.748822212219238,
670
+ "learning_rate": 2.5329726073723363e-05,
671
+ "loss": 2.4682,
672
+ "step": 2146
673
+ },
674
+ {
675
+ "epoch": 0.750760906323977,
676
+ "grad_norm": 7.4816694259643555,
677
+ "learning_rate": 2.6205613797768008e-05,
678
+ "loss": 2.6695,
679
+ "step": 2220
680
+ },
681
+ {
682
+ "epoch": 0.750760906323977,
683
+ "eval_Vitaminc-test_cosine_accuracy": 0.5566666666666666,
684
+ "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7603035569190979,
685
+ "eval_Vitaminc-test_cosine_ap": 0.5291811141478275,
686
+ "eval_Vitaminc-test_cosine_f1": 0.6635071090047393,
687
+ "eval_Vitaminc-test_cosine_f1_threshold": 0.5236827731132507,
688
+ "eval_Vitaminc-test_cosine_precision": 0.5054151624548736,
689
+ "eval_Vitaminc-test_cosine_recall": 0.9655172413793104,
690
+ "eval_Vitaminc-test_dot_accuracy": 0.5633333333333334,
691
+ "eval_Vitaminc-test_dot_accuracy_threshold": 92.43148803710938,
692
+ "eval_Vitaminc-test_dot_ap": 0.5319629878299388,
693
+ "eval_Vitaminc-test_dot_f1": 0.6575342465753424,
694
+ "eval_Vitaminc-test_dot_f1_threshold": 56.82046127319336,
695
+ "eval_Vitaminc-test_dot_precision": 0.49146757679180886,
696
+ "eval_Vitaminc-test_dot_recall": 0.993103448275862,
697
+ "eval_Vitaminc-test_euclidean_accuracy": 0.5466666666666666,
698
+ "eval_Vitaminc-test_euclidean_accuracy_threshold": 8.31692123413086,
699
+ "eval_Vitaminc-test_euclidean_ap": 0.522357181501639,
700
+ "eval_Vitaminc-test_euclidean_f1": 0.662037037037037,
701
+ "eval_Vitaminc-test_euclidean_f1_threshold": 12.49872875213623,
702
+ "eval_Vitaminc-test_euclidean_precision": 0.49825783972125437,
703
+ "eval_Vitaminc-test_euclidean_recall": 0.9862068965517241,
704
+ "eval_Vitaminc-test_manhattan_accuracy": 0.5566666666666666,
705
+ "eval_Vitaminc-test_manhattan_accuracy_threshold": 171.0796661376953,
706
+ "eval_Vitaminc-test_manhattan_ap": 0.5296616128404239,
707
+ "eval_Vitaminc-test_manhattan_f1": 0.6605922551252847,
708
+ "eval_Vitaminc-test_manhattan_f1_threshold": 271.766357421875,
709
+ "eval_Vitaminc-test_manhattan_precision": 0.4931972789115646,
710
+ "eval_Vitaminc-test_manhattan_recall": 1.0,
711
+ "eval_Vitaminc-test_max_accuracy": 0.5633333333333334,
712
+ "eval_Vitaminc-test_max_accuracy_threshold": 171.0796661376953,
713
+ "eval_Vitaminc-test_max_ap": 0.5319629878299388,
714
+ "eval_Vitaminc-test_max_f1": 0.6635071090047393,
715
+ "eval_Vitaminc-test_max_f1_threshold": 271.766357421875,
716
+ "eval_Vitaminc-test_max_precision": 0.5054151624548736,
717
+ "eval_Vitaminc-test_max_recall": 1.0,
718
+ "eval_mrpc-test_cosine_accuracy": 0.7333333333333333,
719
+ "eval_mrpc-test_cosine_accuracy_threshold": 0.7723015546798706,
720
+ "eval_mrpc-test_cosine_ap": 0.847047462638431,
721
+ "eval_mrpc-test_cosine_f1": 0.8137931034482758,
722
+ "eval_mrpc-test_cosine_f1_threshold": 0.7615677118301392,
723
+ "eval_mrpc-test_cosine_precision": 0.7405857740585774,
724
+ "eval_mrpc-test_cosine_recall": 0.9030612244897959,
725
+ "eval_mrpc-test_dot_accuracy": 0.6766666666666666,
726
+ "eval_mrpc-test_dot_accuracy_threshold": 71.28694915771484,
727
+ "eval_mrpc-test_dot_ap": 0.7597503410099737,
728
+ "eval_mrpc-test_dot_f1": 0.7967479674796748,
729
+ "eval_mrpc-test_dot_f1_threshold": 60.0638427734375,
730
+ "eval_mrpc-test_dot_precision": 0.6621621621621622,
731
+ "eval_mrpc-test_dot_recall": 1.0,
732
+ "eval_mrpc-test_euclidean_accuracy": 0.72,
733
+ "eval_mrpc-test_euclidean_accuracy_threshold": 6.903799057006836,
734
+ "eval_mrpc-test_euclidean_ap": 0.832934238772353,
735
+ "eval_mrpc-test_euclidean_f1": 0.8025751072961373,
736
+ "eval_mrpc-test_euclidean_f1_threshold": 8.285726547241211,
737
+ "eval_mrpc-test_euclidean_precision": 0.6925925925925925,
738
+ "eval_mrpc-test_euclidean_recall": 0.9540816326530612,
739
+ "eval_mrpc-test_manhattan_accuracy": 0.7166666666666667,
740
+ "eval_mrpc-test_manhattan_accuracy_threshold": 144.24057006835938,
741
+ "eval_mrpc-test_manhattan_ap": 0.8379421798530551,
742
+ "eval_mrpc-test_manhattan_f1": 0.8079470198675496,
743
+ "eval_mrpc-test_manhattan_f1_threshold": 158.62255859375,
744
+ "eval_mrpc-test_manhattan_precision": 0.7120622568093385,
745
+ "eval_mrpc-test_manhattan_recall": 0.9336734693877551,
746
+ "eval_mrpc-test_max_accuracy": 0.7333333333333333,
747
+ "eval_mrpc-test_max_accuracy_threshold": 144.24057006835938,
748
+ "eval_mrpc-test_max_ap": 0.847047462638431,
749
+ "eval_mrpc-test_max_f1": 0.8137931034482758,
750
+ "eval_mrpc-test_max_f1_threshold": 158.62255859375,
751
+ "eval_mrpc-test_max_precision": 0.7405857740585774,
752
+ "eval_mrpc-test_max_recall": 1.0,
753
+ "eval_negationNLI-test_cosine_accuracy": 1.0,
754
+ "eval_negationNLI-test_dot_accuracy": 0.0,
755
+ "eval_negationNLI-test_euclidean_accuracy": 1.0,
756
+ "eval_negationNLI-test_manhattan_accuracy": 1.0,
757
+ "eval_negationNLI-test_max_accuracy": 1.0,
758
+ "eval_nli-pairs_loss": 3.2204086780548096,
759
+ "eval_nli-pairs_runtime": 2.7643,
760
+ "eval_nli-pairs_samples_per_second": 361.751,
761
+ "eval_nli-pairs_steps_per_second": 7.235,
762
+ "eval_sequential_score": 0.5319629878299388,
763
+ "eval_sts-test_pearson_cosine": 0.816293607681843,
764
+ "eval_sts-test_pearson_dot": 0.7700870243703964,
765
+ "eval_sts-test_pearson_euclidean": 0.8022637024623361,
766
+ "eval_sts-test_pearson_manhattan": 0.8129923580109858,
767
+ "eval_sts-test_pearson_max": 0.816293607681843,
768
+ "eval_sts-test_spearman_cosine": 0.8161010743022479,
769
+ "eval_sts-test_spearman_dot": 0.7831264441454899,
770
+ "eval_sts-test_spearman_euclidean": 0.8001752377809467,
771
+ "eval_sts-test_spearman_manhattan": 0.8099359329667263,
772
+ "eval_sts-test_spearman_max": 0.8161010743022479,
773
+ "step": 2220
774
+ },
775
+ {
776
+ "epoch": 0.750760906323977,
777
+ "eval_vitaminc-pairs_loss": 5.918190002441406,
778
+ "eval_vitaminc-pairs_runtime": 0.3701,
779
+ "eval_vitaminc-pairs_samples_per_second": 270.161,
780
+ "eval_vitaminc-pairs_steps_per_second": 5.403,
781
+ "step": 2220
782
+ },
783
+ {
784
+ "epoch": 0.750760906323977,
785
+ "eval_negation-triplets_loss": 4.330984115600586,
786
+ "eval_negation-triplets_runtime": 0.1178,
787
+ "eval_negation-triplets_samples_per_second": 568.728,
788
+ "eval_negation-triplets_steps_per_second": 16.977,
789
+ "step": 2220
790
+ },
791
+ {
792
+ "epoch": 0.750760906323977,
793
+ "eval_qnli-contrastive_loss": 2.517563581466675,
794
+ "eval_qnli-contrastive_runtime": 0.1727,
795
+ "eval_qnli-contrastive_samples_per_second": 579.006,
796
+ "eval_qnli-contrastive_steps_per_second": 11.58,
797
+ "step": 2220
798
+ },
799
+ {
800
+ "epoch": 0.750760906323977,
801
+ "eval_scitail-pairs-qa_loss": 0.16213805973529816,
802
+ "eval_scitail-pairs-qa_runtime": 0.2729,
803
+ "eval_scitail-pairs-qa_samples_per_second": 366.427,
804
+ "eval_scitail-pairs-qa_steps_per_second": 7.329,
805
+ "step": 2220
806
+ },
807
+ {
808
+ "epoch": 0.750760906323977,
809
+ "eval_scitail-pairs-pos_loss": 0.8153313994407654,
810
+ "eval_scitail-pairs-pos_runtime": 0.5332,
811
+ "eval_scitail-pairs-pos_samples_per_second": 187.536,
812
+ "eval_scitail-pairs-pos_steps_per_second": 3.751,
813
+ "step": 2220
814
+ },
815
+ {
816
+ "epoch": 0.750760906323977,
817
+ "eval_xsum-pairs_loss": 1.5002162456512451,
818
+ "eval_xsum-pairs_runtime": 0.6643,
819
+ "eval_xsum-pairs_samples_per_second": 150.54,
820
+ "eval_xsum-pairs_steps_per_second": 3.011,
821
+ "step": 2220
822
+ },
823
+ {
824
+ "epoch": 0.750760906323977,
825
+ "eval_compression-pairs_loss": 1.024855613708496,
826
+ "eval_compression-pairs_runtime": 0.099,
827
+ "eval_compression-pairs_samples_per_second": 1009.822,
828
+ "eval_compression-pairs_steps_per_second": 20.196,
829
+ "step": 2220
830
+ },
831
+ {
832
+ "epoch": 0.750760906323977,
833
+ "eval_sciq_pairs_loss": 1.4846413135528564,
834
+ "eval_sciq_pairs_runtime": 1.9965,
835
+ "eval_sciq_pairs_samples_per_second": 50.088,
836
+ "eval_sciq_pairs_steps_per_second": 1.002,
837
+ "step": 2220
838
+ },
839
+ {
840
+ "epoch": 0.750760906323977,
841
+ "eval_qasc_pairs_loss": 1.003045916557312,
842
+ "eval_qasc_pairs_runtime": 0.233,
843
+ "eval_qasc_pairs_samples_per_second": 429.119,
844
+ "eval_qasc_pairs_steps_per_second": 8.582,
845
+ "step": 2220
846
+ },
847
+ {
848
+ "epoch": 0.750760906323977,
849
+ "eval_qasc_facts_sym_loss": 0.9863900542259216,
850
+ "eval_qasc_facts_sym_runtime": 0.0963,
851
+ "eval_qasc_facts_sym_samples_per_second": 1038.618,
852
+ "eval_qasc_facts_sym_steps_per_second": 20.772,
853
+ "step": 2220
854
+ },
855
+ {
856
+ "epoch": 0.750760906323977,
857
+ "eval_openbookqa_pairs_loss": 2.242171049118042,
858
+ "eval_openbookqa_pairs_runtime": 0.2893,
859
+ "eval_openbookqa_pairs_samples_per_second": 345.65,
860
+ "eval_openbookqa_pairs_steps_per_second": 6.913,
861
+ "step": 2220
862
+ },
863
+ {
864
+ "epoch": 0.750760906323977,
865
+ "eval_msmarco_pairs_loss": 3.1295664310455322,
866
+ "eval_msmarco_pairs_runtime": 0.5364,
867
+ "eval_msmarco_pairs_samples_per_second": 186.435,
868
+ "eval_msmarco_pairs_steps_per_second": 3.729,
869
+ "step": 2220
870
+ },
871
+ {
872
+ "epoch": 0.750760906323977,
873
+ "eval_nq_pairs_loss": 4.279793739318848,
874
+ "eval_nq_pairs_runtime": 1.5629,
875
+ "eval_nq_pairs_samples_per_second": 63.982,
876
+ "eval_nq_pairs_steps_per_second": 1.28,
877
+ "step": 2220
878
+ },
879
+ {
880
+ "epoch": 0.750760906323977,
881
+ "eval_trivia_pairs_loss": 4.566234588623047,
882
+ "eval_trivia_pairs_runtime": 2.08,
883
+ "eval_trivia_pairs_samples_per_second": 48.076,
884
+ "eval_trivia_pairs_steps_per_second": 0.962,
885
+ "step": 2220
886
+ },
887
+ {
888
+ "epoch": 0.750760906323977,
889
+ "eval_quora_pairs_loss": 0.7613513469696045,
890
+ "eval_quora_pairs_runtime": 7.6052,
891
+ "eval_quora_pairs_samples_per_second": 220.903,
892
+ "eval_quora_pairs_steps_per_second": 4.471,
893
+ "step": 2220
894
+ },
895
+ {
896
+ "epoch": 0.750760906323977,
897
+ "eval_gooaq_pairs_loss": 2.5801427364349365,
898
+ "eval_gooaq_pairs_runtime": 0.4287,
899
+ "eval_gooaq_pairs_samples_per_second": 233.285,
900
+ "eval_gooaq_pairs_steps_per_second": 4.666,
901
+ "step": 2220
902
+ },
903
+ {
904
+ "epoch": 0.750760906323977,
905
+ "eval_mrpc_pairs_loss": 0.4525637924671173,
906
+ "eval_mrpc_pairs_runtime": 0.1029,
907
+ "eval_mrpc_pairs_samples_per_second": 971.921,
908
+ "eval_mrpc_pairs_steps_per_second": 19.438,
909
+ "step": 2220
910
+ },
911
+ {
912
+ "epoch": 0.7757862698681096,
913
+ "grad_norm": 6.072179794311523,
914
+ "learning_rate": 2.7081501521812646e-05,
915
+ "loss": 2.8803,
916
+ "step": 2294
917
+ },
918
+ {
919
+ "epoch": 0.8008116334122422,
920
+ "grad_norm": 26.244081497192383,
921
+ "learning_rate": 2.7957389245857284e-05,
922
+ "loss": 3.1122,
923
+ "step": 2368
924
+ },
925
+ {
926
+ "epoch": 0.8258369969563747,
927
+ "grad_norm": 20.519241333007812,
928
+ "learning_rate": 2.8833276969901922e-05,
929
+ "loss": 2.7361,
930
+ "step": 2442
931
+ },
932
+ {
933
+ "epoch": 0.8508623605005072,
934
+ "grad_norm": 22.986614227294922,
935
+ "learning_rate": 2.9709164693946567e-05,
936
+ "loss": 2.4439,
937
+ "step": 2516
938
+ },
939
+ {
940
+ "epoch": 0.8758877240446399,
941
+ "grad_norm": 32.69511032104492,
942
+ "learning_rate": 3.0585052417991205e-05,
943
+ "loss": 2.5529,
944
+ "step": 2590
945
+ },
946
+ {
947
+ "epoch": 0.9009130875887724,
948
+ "grad_norm": 25.88425636291504,
949
+ "learning_rate": 3.146094014203584e-05,
950
+ "loss": 2.4578,
951
+ "step": 2664
952
+ },
953
+ {
954
+ "epoch": 0.925938451132905,
955
+ "grad_norm": 13.460915565490723,
956
+ "learning_rate": 3.233682786608048e-05,
957
+ "loss": 2.6346,
958
+ "step": 2738
959
+ },
960
+ {
961
+ "epoch": 0.9509638146770375,
962
+ "grad_norm": 14.984380722045898,
963
+ "learning_rate": 3.3212715590125126e-05,
964
+ "loss": 2.4693,
965
+ "step": 2812
966
+ },
967
+ {
968
+ "epoch": 0.9759891782211702,
969
+ "grad_norm": 8.878656387329102,
970
+ "learning_rate": 3.4088603314169764e-05,
971
+ "loss": 2.4323,
972
+ "step": 2886
973
+ }
974
+ ],
975
+ "logging_steps": 74,
976
+ "max_steps": 29570,
977
+ "num_input_tokens_seen": 0,
978
+ "num_train_epochs": 10,
979
+ "save_steps": 2957,
980
+ "stateful_callbacks": {
981
+ "TrainerControl": {
982
+ "args": {
983
+ "should_epoch_stop": false,
984
+ "should_evaluate": false,
985
+ "should_log": false,
986
+ "should_save": true,
987
+ "should_training_stop": false
988
+ },
989
+ "attributes": {}
990
+ }
991
+ },
992
+ "total_flos": 0.0,
993
+ "train_batch_size": 50,
994
+ "trial_name": null,
995
+ "trial_params": null
996
+ }
checkpoint-2957/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801d93e289af089895b3e1c6cc7bb0568e1ac2646f87cfdb4957b56417377cd5
3
+ size 5624