bobox commited on
Commit
0a329a0
1 Parent(s): 98d78cf

Training in progress, step 8811, checkpoint

Browse files
checkpoint-8811/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-8811/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-8811/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-8811/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.41.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-8811/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.41.2",
5
+ "pytorch": "2.1.2"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-8811/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-8811/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e58a0da3ab537f586ad6bcff2eeff1769b149cf1bf60df332e37a7f186f7fa35
3
+ size 1130520122
checkpoint-8811/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7522d4105e4af3a2adedfa83d21607dce66e4e241b0308e0d31dd7468b8e646
3
+ size 565251810
checkpoint-8811/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28637c9cd6032adc3ef18d586d9be8660c63460a3d218c7ccf2310dca8f31aff
3
+ size 14244
checkpoint-8811/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62cf3c3afd2e6440945f1088002fbdabc3b6b9cf6b88d839f995fefb0fc0bbf
3
+ size 1064
checkpoint-8811/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-8811/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-8811/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-8811/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-8811/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-8811/trainer_state.json ADDED
@@ -0,0 +1,1876 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.5,
5
+ "eval_steps": 735,
6
+ "global_step": 8811,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02502553626149132,
13
+ "grad_norm": 65.55949401855469,
14
+ "learning_rate": 4.834865509022812e-07,
15
+ "loss": 16.851,
16
+ "step": 147
17
+ },
18
+ {
19
+ "epoch": 0.05005107252298264,
20
+ "grad_norm": 23.207971572875977,
21
+ "learning_rate": 9.805924412665985e-07,
22
+ "loss": 11.2787,
23
+ "step": 294
24
+ },
25
+ {
26
+ "epoch": 0.07507660878447395,
27
+ "grad_norm": 176.1532440185547,
28
+ "learning_rate": 1.481103166496425e-06,
29
+ "loss": 8.9166,
30
+ "step": 441
31
+ },
32
+ {
33
+ "epoch": 0.10010214504596528,
34
+ "grad_norm": 22.1564998626709,
35
+ "learning_rate": 1.981613891726251e-06,
36
+ "loss": 7.9463,
37
+ "step": 588
38
+ },
39
+ {
40
+ "epoch": 0.12512768130745658,
41
+ "grad_norm": 20.11876106262207,
42
+ "learning_rate": 2.4821246169560777e-06,
43
+ "loss": 7.2108,
44
+ "step": 735
45
+ },
46
+ {
47
+ "epoch": 0.12512768130745658,
48
+ "eval_nli-pairs_loss": 6.905651569366455,
49
+ "eval_nli-pairs_runtime": 4.0844,
50
+ "eval_nli-pairs_samples_per_second": 36.725,
51
+ "eval_nli-pairs_steps_per_second": 1.224,
52
+ "eval_sts-test_pearson_cosine": 0.3740256550072784,
53
+ "eval_sts-test_pearson_dot": 0.13384893803205677,
54
+ "eval_sts-test_pearson_euclidean": 0.3912387619869807,
55
+ "eval_sts-test_pearson_manhattan": 0.4202605137823524,
56
+ "eval_sts-test_pearson_max": 0.4202605137823524,
57
+ "eval_sts-test_spearman_cosine": 0.37210107338950205,
58
+ "eval_sts-test_spearman_dot": 0.12092409843417483,
59
+ "eval_sts-test_spearman_euclidean": 0.39172287978780546,
60
+ "eval_sts-test_spearman_manhattan": 0.4169664738563951,
61
+ "eval_sts-test_spearman_max": 0.4169664738563951,
62
+ "step": 735
63
+ },
64
+ {
65
+ "epoch": 0.12512768130745658,
66
+ "eval_vitaminc-pairs_loss": 5.720878601074219,
67
+ "eval_vitaminc-pairs_runtime": 2.1703,
68
+ "eval_vitaminc-pairs_samples_per_second": 69.115,
69
+ "eval_vitaminc-pairs_steps_per_second": 2.304,
70
+ "step": 735
71
+ },
72
+ {
73
+ "epoch": 0.12512768130745658,
74
+ "eval_qnli-contrastive_loss": 8.1649751663208,
75
+ "eval_qnli-contrastive_runtime": 0.4937,
76
+ "eval_qnli-contrastive_samples_per_second": 303.841,
77
+ "eval_qnli-contrastive_steps_per_second": 10.128,
78
+ "step": 735
79
+ },
80
+ {
81
+ "epoch": 0.12512768130745658,
82
+ "eval_scitail-pairs-qa_loss": 3.7859296798706055,
83
+ "eval_scitail-pairs-qa_runtime": 1.1509,
84
+ "eval_scitail-pairs-qa_samples_per_second": 130.329,
85
+ "eval_scitail-pairs-qa_steps_per_second": 4.344,
86
+ "step": 735
87
+ },
88
+ {
89
+ "epoch": 0.12512768130745658,
90
+ "eval_scitail-pairs-pos_loss": 3.9919917583465576,
91
+ "eval_scitail-pairs-pos_runtime": 2.1442,
92
+ "eval_scitail-pairs-pos_samples_per_second": 69.956,
93
+ "eval_scitail-pairs-pos_steps_per_second": 2.332,
94
+ "step": 735
95
+ },
96
+ {
97
+ "epoch": 0.12512768130745658,
98
+ "eval_xsum-pairs_loss": 4.600368976593018,
99
+ "eval_xsum-pairs_runtime": 2.26,
100
+ "eval_xsum-pairs_samples_per_second": 66.371,
101
+ "eval_xsum-pairs_steps_per_second": 2.212,
102
+ "step": 735
103
+ },
104
+ {
105
+ "epoch": 0.12512768130745658,
106
+ "eval_compression-pairs_loss": 3.3037569522857666,
107
+ "eval_compression-pairs_runtime": 0.449,
108
+ "eval_compression-pairs_samples_per_second": 334.078,
109
+ "eval_compression-pairs_steps_per_second": 11.136,
110
+ "step": 735
111
+ },
112
+ {
113
+ "epoch": 0.12512768130745658,
114
+ "eval_sciq_pairs_loss": 10.214456558227539,
115
+ "eval_sciq_pairs_runtime": 7.1179,
116
+ "eval_sciq_pairs_samples_per_second": 21.074,
117
+ "eval_sciq_pairs_steps_per_second": 0.702,
118
+ "step": 735
119
+ },
120
+ {
121
+ "epoch": 0.12512768130745658,
122
+ "eval_qasc_pairs_loss": 10.58031940460205,
123
+ "eval_qasc_pairs_runtime": 2.0175,
124
+ "eval_qasc_pairs_samples_per_second": 74.348,
125
+ "eval_qasc_pairs_steps_per_second": 2.478,
126
+ "step": 735
127
+ },
128
+ {
129
+ "epoch": 0.12512768130745658,
130
+ "eval_openbookqa_pairs_loss": 7.862658977508545,
131
+ "eval_openbookqa_pairs_runtime": 0.8571,
132
+ "eval_openbookqa_pairs_samples_per_second": 120.168,
133
+ "eval_openbookqa_pairs_steps_per_second": 4.667,
134
+ "step": 735
135
+ },
136
+ {
137
+ "epoch": 0.12512768130745658,
138
+ "eval_msmarco_pairs_loss": 8.754273414611816,
139
+ "eval_msmarco_pairs_runtime": 2.7533,
140
+ "eval_msmarco_pairs_samples_per_second": 54.481,
141
+ "eval_msmarco_pairs_steps_per_second": 1.816,
142
+ "step": 735
143
+ },
144
+ {
145
+ "epoch": 0.12512768130745658,
146
+ "eval_nq_pairs_loss": 8.415486335754395,
147
+ "eval_nq_pairs_runtime": 5.0894,
148
+ "eval_nq_pairs_samples_per_second": 29.473,
149
+ "eval_nq_pairs_steps_per_second": 0.982,
150
+ "step": 735
151
+ },
152
+ {
153
+ "epoch": 0.12512768130745658,
154
+ "eval_trivia_pairs_loss": 9.051105499267578,
155
+ "eval_trivia_pairs_runtime": 9.5498,
156
+ "eval_trivia_pairs_samples_per_second": 15.707,
157
+ "eval_trivia_pairs_steps_per_second": 0.524,
158
+ "step": 735
159
+ },
160
+ {
161
+ "epoch": 0.12512768130745658,
162
+ "eval_quora_pairs_loss": 4.5232110023498535,
163
+ "eval_quora_pairs_runtime": 1.1469,
164
+ "eval_quora_pairs_samples_per_second": 130.785,
165
+ "eval_quora_pairs_steps_per_second": 4.36,
166
+ "step": 735
167
+ },
168
+ {
169
+ "epoch": 0.12512768130745658,
170
+ "eval_gooaq_pairs_loss": 7.579105854034424,
171
+ "eval_gooaq_pairs_runtime": 2.0491,
172
+ "eval_gooaq_pairs_samples_per_second": 73.203,
173
+ "eval_gooaq_pairs_steps_per_second": 2.44,
174
+ "step": 735
175
+ },
176
+ {
177
+ "epoch": 0.1501532175689479,
178
+ "grad_norm": 31.7736759185791,
179
+ "learning_rate": 2.982635342185904e-06,
180
+ "loss": 6.7709,
181
+ "step": 882
182
+ },
183
+ {
184
+ "epoch": 0.1751787538304392,
185
+ "grad_norm": 31.57339096069336,
186
+ "learning_rate": 3.4831460674157306e-06,
187
+ "loss": 6.1746,
188
+ "step": 1029
189
+ },
190
+ {
191
+ "epoch": 0.20020429009193055,
192
+ "grad_norm": 25.392702102661133,
193
+ "learning_rate": 3.9836567926455565e-06,
194
+ "loss": 5.7706,
195
+ "step": 1176
196
+ },
197
+ {
198
+ "epoch": 0.22522982635342187,
199
+ "grad_norm": 32.390472412109375,
200
+ "learning_rate": 4.484167517875383e-06,
201
+ "loss": 5.7283,
202
+ "step": 1323
203
+ },
204
+ {
205
+ "epoch": 0.25025536261491316,
206
+ "grad_norm": 18.85039520263672,
207
+ "learning_rate": 4.98467824310521e-06,
208
+ "loss": 5.1856,
209
+ "step": 1470
210
+ },
211
+ {
212
+ "epoch": 0.25025536261491316,
213
+ "eval_nli-pairs_loss": 4.352054119110107,
214
+ "eval_nli-pairs_runtime": 4.1476,
215
+ "eval_nli-pairs_samples_per_second": 36.165,
216
+ "eval_nli-pairs_steps_per_second": 1.206,
217
+ "eval_sts-test_pearson_cosine": 0.6694155778571752,
218
+ "eval_sts-test_pearson_dot": 0.5201102118957572,
219
+ "eval_sts-test_pearson_euclidean": 0.6613028243200022,
220
+ "eval_sts-test_pearson_manhattan": 0.6670710500315469,
221
+ "eval_sts-test_pearson_max": 0.6694155778571752,
222
+ "eval_sts-test_spearman_cosine": 0.6367853204388882,
223
+ "eval_sts-test_spearman_dot": 0.4940207180607985,
224
+ "eval_sts-test_spearman_euclidean": 0.6391132775161348,
225
+ "eval_sts-test_spearman_manhattan": 0.6446159957787251,
226
+ "eval_sts-test_spearman_max": 0.6446159957787251,
227
+ "step": 1470
228
+ },
229
+ {
230
+ "epoch": 0.25025536261491316,
231
+ "eval_vitaminc-pairs_loss": 3.4987735748291016,
232
+ "eval_vitaminc-pairs_runtime": 2.1678,
233
+ "eval_vitaminc-pairs_samples_per_second": 69.194,
234
+ "eval_vitaminc-pairs_steps_per_second": 2.306,
235
+ "step": 1470
236
+ },
237
+ {
238
+ "epoch": 0.25025536261491316,
239
+ "eval_qnli-contrastive_loss": 12.915559768676758,
240
+ "eval_qnli-contrastive_runtime": 0.4918,
241
+ "eval_qnli-contrastive_samples_per_second": 304.99,
242
+ "eval_qnli-contrastive_steps_per_second": 10.166,
243
+ "step": 1470
244
+ },
245
+ {
246
+ "epoch": 0.25025536261491316,
247
+ "eval_scitail-pairs-qa_loss": 1.3250077962875366,
248
+ "eval_scitail-pairs-qa_runtime": 1.154,
249
+ "eval_scitail-pairs-qa_samples_per_second": 129.984,
250
+ "eval_scitail-pairs-qa_steps_per_second": 4.333,
251
+ "step": 1470
252
+ },
253
+ {
254
+ "epoch": 0.25025536261491316,
255
+ "eval_scitail-pairs-pos_loss": 2.457335948944092,
256
+ "eval_scitail-pairs-pos_runtime": 2.1475,
257
+ "eval_scitail-pairs-pos_samples_per_second": 69.85,
258
+ "eval_scitail-pairs-pos_steps_per_second": 2.328,
259
+ "step": 1470
260
+ },
261
+ {
262
+ "epoch": 0.25025536261491316,
263
+ "eval_xsum-pairs_loss": 3.071201801300049,
264
+ "eval_xsum-pairs_runtime": 2.2634,
265
+ "eval_xsum-pairs_samples_per_second": 66.271,
266
+ "eval_xsum-pairs_steps_per_second": 2.209,
267
+ "step": 1470
268
+ },
269
+ {
270
+ "epoch": 0.25025536261491316,
271
+ "eval_compression-pairs_loss": 2.0629916191101074,
272
+ "eval_compression-pairs_runtime": 0.4529,
273
+ "eval_compression-pairs_samples_per_second": 331.23,
274
+ "eval_compression-pairs_steps_per_second": 11.041,
275
+ "step": 1470
276
+ },
277
+ {
278
+ "epoch": 0.25025536261491316,
279
+ "eval_sciq_pairs_loss": 9.06814193725586,
280
+ "eval_sciq_pairs_runtime": 7.1445,
281
+ "eval_sciq_pairs_samples_per_second": 20.995,
282
+ "eval_sciq_pairs_steps_per_second": 0.7,
283
+ "step": 1470
284
+ },
285
+ {
286
+ "epoch": 0.25025536261491316,
287
+ "eval_qasc_pairs_loss": 9.245658874511719,
288
+ "eval_qasc_pairs_runtime": 2.0471,
289
+ "eval_qasc_pairs_samples_per_second": 73.274,
290
+ "eval_qasc_pairs_steps_per_second": 2.442,
291
+ "step": 1470
292
+ },
293
+ {
294
+ "epoch": 0.25025536261491316,
295
+ "eval_openbookqa_pairs_loss": 5.652446746826172,
296
+ "eval_openbookqa_pairs_runtime": 0.8946,
297
+ "eval_openbookqa_pairs_samples_per_second": 115.14,
298
+ "eval_openbookqa_pairs_steps_per_second": 4.471,
299
+ "step": 1470
300
+ },
301
+ {
302
+ "epoch": 0.25025536261491316,
303
+ "eval_msmarco_pairs_loss": 4.844855785369873,
304
+ "eval_msmarco_pairs_runtime": 2.7887,
305
+ "eval_msmarco_pairs_samples_per_second": 53.788,
306
+ "eval_msmarco_pairs_steps_per_second": 1.793,
307
+ "step": 1470
308
+ },
309
+ {
310
+ "epoch": 0.25025536261491316,
311
+ "eval_nq_pairs_loss": 5.023958206176758,
312
+ "eval_nq_pairs_runtime": 5.0823,
313
+ "eval_nq_pairs_samples_per_second": 29.514,
314
+ "eval_nq_pairs_steps_per_second": 0.984,
315
+ "step": 1470
316
+ },
317
+ {
318
+ "epoch": 0.25025536261491316,
319
+ "eval_trivia_pairs_loss": 5.2907304763793945,
320
+ "eval_trivia_pairs_runtime": 9.6673,
321
+ "eval_trivia_pairs_samples_per_second": 15.516,
322
+ "eval_trivia_pairs_steps_per_second": 0.517,
323
+ "step": 1470
324
+ },
325
+ {
326
+ "epoch": 0.25025536261491316,
327
+ "eval_quora_pairs_loss": 1.5572240352630615,
328
+ "eval_quora_pairs_runtime": 1.1979,
329
+ "eval_quora_pairs_samples_per_second": 125.218,
330
+ "eval_quora_pairs_steps_per_second": 4.174,
331
+ "step": 1470
332
+ },
333
+ {
334
+ "epoch": 0.25025536261491316,
335
+ "eval_gooaq_pairs_loss": 3.970768928527832,
336
+ "eval_gooaq_pairs_runtime": 2.117,
337
+ "eval_gooaq_pairs_samples_per_second": 70.855,
338
+ "eval_gooaq_pairs_steps_per_second": 2.362,
339
+ "step": 1470
340
+ },
341
+ {
342
+ "epoch": 0.2752808988764045,
343
+ "grad_norm": 40.67585754394531,
344
+ "learning_rate": 5.4851889683350365e-06,
345
+ "loss": 4.185,
346
+ "step": 1617
347
+ },
348
+ {
349
+ "epoch": 0.3003064351378958,
350
+ "grad_norm": 45.92570495605469,
351
+ "learning_rate": 5.985699693564862e-06,
352
+ "loss": 4.6367,
353
+ "step": 1764
354
+ },
355
+ {
356
+ "epoch": 0.32533197139938713,
357
+ "grad_norm": 13.566838264465332,
358
+ "learning_rate": 6.486210418794688e-06,
359
+ "loss": 4.3615,
360
+ "step": 1911
361
+ },
362
+ {
363
+ "epoch": 0.3503575076608784,
364
+ "grad_norm": 9.495999336242676,
365
+ "learning_rate": 6.986721144024515e-06,
366
+ "loss": 4.1791,
367
+ "step": 2058
368
+ },
369
+ {
370
+ "epoch": 0.37538304392236976,
371
+ "grad_norm": 32.735416412353516,
372
+ "learning_rate": 7.487231869254341e-06,
373
+ "loss": 4.1051,
374
+ "step": 2205
375
+ },
376
+ {
377
+ "epoch": 0.37538304392236976,
378
+ "eval_nli-pairs_loss": 3.2717113494873047,
379
+ "eval_nli-pairs_runtime": 4.0124,
380
+ "eval_nli-pairs_samples_per_second": 37.384,
381
+ "eval_nli-pairs_steps_per_second": 1.246,
382
+ "eval_sts-test_pearson_cosine": 0.6958570089637609,
383
+ "eval_sts-test_pearson_dot": 0.5824298957890577,
384
+ "eval_sts-test_pearson_euclidean": 0.6893962819387462,
385
+ "eval_sts-test_pearson_manhattan": 0.6993681181979946,
386
+ "eval_sts-test_pearson_max": 0.6993681181979946,
387
+ "eval_sts-test_spearman_cosine": 0.6652712160836801,
388
+ "eval_sts-test_spearman_dot": 0.5536505624407877,
389
+ "eval_sts-test_spearman_euclidean": 0.6659844314307678,
390
+ "eval_sts-test_spearman_manhattan": 0.675740852112121,
391
+ "eval_sts-test_spearman_max": 0.675740852112121,
392
+ "step": 2205
393
+ },
394
+ {
395
+ "epoch": 0.37538304392236976,
396
+ "eval_vitaminc-pairs_loss": 2.7197911739349365,
397
+ "eval_vitaminc-pairs_runtime": 2.1625,
398
+ "eval_vitaminc-pairs_samples_per_second": 69.365,
399
+ "eval_vitaminc-pairs_steps_per_second": 2.312,
400
+ "step": 2205
401
+ },
402
+ {
403
+ "epoch": 0.37538304392236976,
404
+ "eval_qnli-contrastive_loss": 9.638714790344238,
405
+ "eval_qnli-contrastive_runtime": 0.4877,
406
+ "eval_qnli-contrastive_samples_per_second": 307.567,
407
+ "eval_qnli-contrastive_steps_per_second": 10.252,
408
+ "step": 2205
409
+ },
410
+ {
411
+ "epoch": 0.37538304392236976,
412
+ "eval_scitail-pairs-qa_loss": 0.8106752634048462,
413
+ "eval_scitail-pairs-qa_runtime": 1.1588,
414
+ "eval_scitail-pairs-qa_samples_per_second": 129.449,
415
+ "eval_scitail-pairs-qa_steps_per_second": 4.315,
416
+ "step": 2205
417
+ },
418
+ {
419
+ "epoch": 0.37538304392236976,
420
+ "eval_scitail-pairs-pos_loss": 1.8894625902175903,
421
+ "eval_scitail-pairs-pos_runtime": 2.1181,
422
+ "eval_scitail-pairs-pos_samples_per_second": 70.817,
423
+ "eval_scitail-pairs-pos_steps_per_second": 2.361,
424
+ "step": 2205
425
+ },
426
+ {
427
+ "epoch": 0.37538304392236976,
428
+ "eval_xsum-pairs_loss": 2.262718439102173,
429
+ "eval_xsum-pairs_runtime": 2.2585,
430
+ "eval_xsum-pairs_samples_per_second": 66.416,
431
+ "eval_xsum-pairs_steps_per_second": 2.214,
432
+ "step": 2205
433
+ },
434
+ {
435
+ "epoch": 0.37538304392236976,
436
+ "eval_compression-pairs_loss": 1.4910633563995361,
437
+ "eval_compression-pairs_runtime": 0.4462,
438
+ "eval_compression-pairs_samples_per_second": 336.204,
439
+ "eval_compression-pairs_steps_per_second": 11.207,
440
+ "step": 2205
441
+ },
442
+ {
443
+ "epoch": 0.37538304392236976,
444
+ "eval_sciq_pairs_loss": 8.59740161895752,
445
+ "eval_sciq_pairs_runtime": 7.1845,
446
+ "eval_sciq_pairs_samples_per_second": 20.878,
447
+ "eval_sciq_pairs_steps_per_second": 0.696,
448
+ "step": 2205
449
+ },
450
+ {
451
+ "epoch": 0.37538304392236976,
452
+ "eval_qasc_pairs_loss": 8.103879928588867,
453
+ "eval_qasc_pairs_runtime": 2.0762,
454
+ "eval_qasc_pairs_samples_per_second": 72.246,
455
+ "eval_qasc_pairs_steps_per_second": 2.408,
456
+ "step": 2205
457
+ },
458
+ {
459
+ "epoch": 0.37538304392236976,
460
+ "eval_openbookqa_pairs_loss": 5.090969562530518,
461
+ "eval_openbookqa_pairs_runtime": 0.89,
462
+ "eval_openbookqa_pairs_samples_per_second": 115.726,
463
+ "eval_openbookqa_pairs_steps_per_second": 4.494,
464
+ "step": 2205
465
+ },
466
+ {
467
+ "epoch": 0.37538304392236976,
468
+ "eval_msmarco_pairs_loss": 3.9566943645477295,
469
+ "eval_msmarco_pairs_runtime": 2.8183,
470
+ "eval_msmarco_pairs_samples_per_second": 53.223,
471
+ "eval_msmarco_pairs_steps_per_second": 1.774,
472
+ "step": 2205
473
+ },
474
+ {
475
+ "epoch": 0.37538304392236976,
476
+ "eval_nq_pairs_loss": 4.009054183959961,
477
+ "eval_nq_pairs_runtime": 5.0219,
478
+ "eval_nq_pairs_samples_per_second": 29.869,
479
+ "eval_nq_pairs_steps_per_second": 0.996,
480
+ "step": 2205
481
+ },
482
+ {
483
+ "epoch": 0.37538304392236976,
484
+ "eval_trivia_pairs_loss": 4.286431312561035,
485
+ "eval_trivia_pairs_runtime": 9.4975,
486
+ "eval_trivia_pairs_samples_per_second": 15.794,
487
+ "eval_trivia_pairs_steps_per_second": 0.526,
488
+ "step": 2205
489
+ },
490
+ {
491
+ "epoch": 0.37538304392236976,
492
+ "eval_quora_pairs_loss": 1.123273491859436,
493
+ "eval_quora_pairs_runtime": 1.1487,
494
+ "eval_quora_pairs_samples_per_second": 130.586,
495
+ "eval_quora_pairs_steps_per_second": 4.353,
496
+ "step": 2205
497
+ },
498
+ {
499
+ "epoch": 0.37538304392236976,
500
+ "eval_gooaq_pairs_loss": 3.222414255142212,
501
+ "eval_gooaq_pairs_runtime": 2.0173,
502
+ "eval_gooaq_pairs_samples_per_second": 74.357,
503
+ "eval_gooaq_pairs_steps_per_second": 2.479,
504
+ "step": 2205
505
+ },
506
+ {
507
+ "epoch": 0.4004085801838611,
508
+ "grad_norm": 218.56105041503906,
509
+ "learning_rate": 7.987742594484168e-06,
510
+ "loss": 3.7674,
511
+ "step": 2352
512
+ },
513
+ {
514
+ "epoch": 0.4254341164453524,
515
+ "grad_norm": 27.877609252929688,
516
+ "learning_rate": 8.488253319713993e-06,
517
+ "loss": 3.8729,
518
+ "step": 2499
519
+ },
520
+ {
521
+ "epoch": 0.45045965270684374,
522
+ "grad_norm": 33.50013732910156,
523
+ "learning_rate": 8.988764044943822e-06,
524
+ "loss": 3.4527,
525
+ "step": 2646
526
+ },
527
+ {
528
+ "epoch": 0.475485188968335,
529
+ "grad_norm": 14.015911102294922,
530
+ "learning_rate": 9.489274770173647e-06,
531
+ "loss": 3.3545,
532
+ "step": 2793
533
+ },
534
+ {
535
+ "epoch": 0.5005107252298263,
536
+ "grad_norm": 33.59694290161133,
537
+ "learning_rate": 9.989785495403473e-06,
538
+ "loss": 3.3247,
539
+ "step": 2940
540
+ },
541
+ {
542
+ "epoch": 0.5005107252298263,
543
+ "eval_nli-pairs_loss": 2.7121565341949463,
544
+ "eval_nli-pairs_runtime": 4.1564,
545
+ "eval_nli-pairs_samples_per_second": 36.089,
546
+ "eval_nli-pairs_steps_per_second": 1.203,
547
+ "eval_sts-test_pearson_cosine": 0.716623047702725,
548
+ "eval_sts-test_pearson_dot": 0.6128451070598809,
549
+ "eval_sts-test_pearson_euclidean": 0.7138791236031807,
550
+ "eval_sts-test_pearson_manhattan": 0.7213151818687454,
551
+ "eval_sts-test_pearson_max": 0.7213151818687454,
552
+ "eval_sts-test_spearman_cosine": 0.6919792400941177,
553
+ "eval_sts-test_spearman_dot": 0.5867158357121192,
554
+ "eval_sts-test_spearman_euclidean": 0.6925037259567834,
555
+ "eval_sts-test_spearman_manhattan": 0.7008895667910079,
556
+ "eval_sts-test_spearman_max": 0.7008895667910079,
557
+ "step": 2940
558
+ },
559
+ {
560
+ "epoch": 0.5005107252298263,
561
+ "eval_vitaminc-pairs_loss": 2.225992441177368,
562
+ "eval_vitaminc-pairs_runtime": 2.253,
563
+ "eval_vitaminc-pairs_samples_per_second": 66.577,
564
+ "eval_vitaminc-pairs_steps_per_second": 2.219,
565
+ "step": 2940
566
+ },
567
+ {
568
+ "epoch": 0.5005107252298263,
569
+ "eval_qnli-contrastive_loss": 4.92629861831665,
570
+ "eval_qnli-contrastive_runtime": 0.5005,
571
+ "eval_qnli-contrastive_samples_per_second": 299.691,
572
+ "eval_qnli-contrastive_steps_per_second": 9.99,
573
+ "step": 2940
574
+ },
575
+ {
576
+ "epoch": 0.5005107252298263,
577
+ "eval_scitail-pairs-qa_loss": 0.5898066163063049,
578
+ "eval_scitail-pairs-qa_runtime": 1.2227,
579
+ "eval_scitail-pairs-qa_samples_per_second": 122.682,
580
+ "eval_scitail-pairs-qa_steps_per_second": 4.089,
581
+ "step": 2940
582
+ },
583
+ {
584
+ "epoch": 0.5005107252298263,
585
+ "eval_scitail-pairs-pos_loss": 1.4237287044525146,
586
+ "eval_scitail-pairs-pos_runtime": 2.4409,
587
+ "eval_scitail-pairs-pos_samples_per_second": 61.452,
588
+ "eval_scitail-pairs-pos_steps_per_second": 2.048,
589
+ "step": 2940
590
+ },
591
+ {
592
+ "epoch": 0.5005107252298263,
593
+ "eval_xsum-pairs_loss": 1.8388895988464355,
594
+ "eval_xsum-pairs_runtime": 2.2831,
595
+ "eval_xsum-pairs_samples_per_second": 65.7,
596
+ "eval_xsum-pairs_steps_per_second": 2.19,
597
+ "step": 2940
598
+ },
599
+ {
600
+ "epoch": 0.5005107252298263,
601
+ "eval_compression-pairs_loss": 1.1590967178344727,
602
+ "eval_compression-pairs_runtime": 0.5152,
603
+ "eval_compression-pairs_samples_per_second": 291.165,
604
+ "eval_compression-pairs_steps_per_second": 9.706,
605
+ "step": 2940
606
+ },
607
+ {
608
+ "epoch": 0.5005107252298263,
609
+ "eval_sciq_pairs_loss": 8.282496452331543,
610
+ "eval_sciq_pairs_runtime": 7.2871,
611
+ "eval_sciq_pairs_samples_per_second": 20.584,
612
+ "eval_sciq_pairs_steps_per_second": 0.686,
613
+ "step": 2940
614
+ },
615
+ {
616
+ "epoch": 0.5005107252298263,
617
+ "eval_qasc_pairs_loss": 7.817965507507324,
618
+ "eval_qasc_pairs_runtime": 2.0211,
619
+ "eval_qasc_pairs_samples_per_second": 74.218,
620
+ "eval_qasc_pairs_steps_per_second": 2.474,
621
+ "step": 2940
622
+ },
623
+ {
624
+ "epoch": 0.5005107252298263,
625
+ "eval_openbookqa_pairs_loss": 4.619383811950684,
626
+ "eval_openbookqa_pairs_runtime": 0.8531,
627
+ "eval_openbookqa_pairs_samples_per_second": 120.731,
628
+ "eval_openbookqa_pairs_steps_per_second": 4.689,
629
+ "step": 2940
630
+ },
631
+ {
632
+ "epoch": 0.5005107252298263,
633
+ "eval_msmarco_pairs_loss": 3.478559970855713,
634
+ "eval_msmarco_pairs_runtime": 2.7512,
635
+ "eval_msmarco_pairs_samples_per_second": 54.522,
636
+ "eval_msmarco_pairs_steps_per_second": 1.817,
637
+ "step": 2940
638
+ },
639
+ {
640
+ "epoch": 0.5005107252298263,
641
+ "eval_nq_pairs_loss": 3.3449866771698,
642
+ "eval_nq_pairs_runtime": 5.0591,
643
+ "eval_nq_pairs_samples_per_second": 29.649,
644
+ "eval_nq_pairs_steps_per_second": 0.988,
645
+ "step": 2940
646
+ },
647
+ {
648
+ "epoch": 0.5005107252298263,
649
+ "eval_trivia_pairs_loss": 3.524484872817993,
650
+ "eval_trivia_pairs_runtime": 9.662,
651
+ "eval_trivia_pairs_samples_per_second": 15.525,
652
+ "eval_trivia_pairs_steps_per_second": 0.517,
653
+ "step": 2940
654
+ },
655
+ {
656
+ "epoch": 0.5005107252298263,
657
+ "eval_quora_pairs_loss": 0.9095575213432312,
658
+ "eval_quora_pairs_runtime": 1.2482,
659
+ "eval_quora_pairs_samples_per_second": 120.175,
660
+ "eval_quora_pairs_steps_per_second": 4.006,
661
+ "step": 2940
662
+ },
663
+ {
664
+ "epoch": 0.5005107252298263,
665
+ "eval_gooaq_pairs_loss": 2.6586034297943115,
666
+ "eval_gooaq_pairs_runtime": 2.1091,
667
+ "eval_gooaq_pairs_samples_per_second": 71.12,
668
+ "eval_gooaq_pairs_steps_per_second": 2.371,
669
+ "step": 2940
670
+ },
671
+ {
672
+ "epoch": 0.5255362614913177,
673
+ "grad_norm": 35.33409118652344,
674
+ "learning_rate": 1.04902962206333e-05,
675
+ "loss": 3.116,
676
+ "step": 3087
677
+ },
678
+ {
679
+ "epoch": 0.550561797752809,
680
+ "grad_norm": 22.29003143310547,
681
+ "learning_rate": 1.0990806945863125e-05,
682
+ "loss": 3.2418,
683
+ "step": 3234
684
+ },
685
+ {
686
+ "epoch": 0.5755873340143003,
687
+ "grad_norm": 31.277965545654297,
688
+ "learning_rate": 1.1491317671092953e-05,
689
+ "loss": 3.0757,
690
+ "step": 3381
691
+ },
692
+ {
693
+ "epoch": 0.6006128702757916,
694
+ "grad_norm": 24.612506866455078,
695
+ "learning_rate": 1.1991828396322778e-05,
696
+ "loss": 2.8524,
697
+ "step": 3528
698
+ },
699
+ {
700
+ "epoch": 0.625638406537283,
701
+ "grad_norm": 25.11741065979004,
702
+ "learning_rate": 1.2492339121552605e-05,
703
+ "loss": 2.6875,
704
+ "step": 3675
705
+ },
706
+ {
707
+ "epoch": 0.625638406537283,
708
+ "eval_nli-pairs_loss": 2.479051113128662,
709
+ "eval_nli-pairs_runtime": 3.9943,
710
+ "eval_nli-pairs_samples_per_second": 37.553,
711
+ "eval_nli-pairs_steps_per_second": 1.252,
712
+ "eval_sts-test_pearson_cosine": 0.7278742453545186,
713
+ "eval_sts-test_pearson_dot": 0.6217650825208566,
714
+ "eval_sts-test_pearson_euclidean": 0.7243228472931561,
715
+ "eval_sts-test_pearson_manhattan": 0.7333297580184588,
716
+ "eval_sts-test_pearson_max": 0.7333297580184588,
717
+ "eval_sts-test_spearman_cosine": 0.7013110457844404,
718
+ "eval_sts-test_spearman_dot": 0.5970993074902947,
719
+ "eval_sts-test_spearman_euclidean": 0.701564129266252,
720
+ "eval_sts-test_spearman_manhattan": 0.7116482009924582,
721
+ "eval_sts-test_spearman_max": 0.7116482009924582,
722
+ "step": 3675
723
+ },
724
+ {
725
+ "epoch": 0.625638406537283,
726
+ "eval_vitaminc-pairs_loss": 1.974273681640625,
727
+ "eval_vitaminc-pairs_runtime": 2.1754,
728
+ "eval_vitaminc-pairs_samples_per_second": 68.953,
729
+ "eval_vitaminc-pairs_steps_per_second": 2.298,
730
+ "step": 3675
731
+ },
732
+ {
733
+ "epoch": 0.625638406537283,
734
+ "eval_qnli-contrastive_loss": 1.7706010341644287,
735
+ "eval_qnli-contrastive_runtime": 0.4866,
736
+ "eval_qnli-contrastive_samples_per_second": 308.244,
737
+ "eval_qnli-contrastive_steps_per_second": 10.275,
738
+ "step": 3675
739
+ },
740
+ {
741
+ "epoch": 0.625638406537283,
742
+ "eval_scitail-pairs-qa_loss": 0.4400452673435211,
743
+ "eval_scitail-pairs-qa_runtime": 1.1519,
744
+ "eval_scitail-pairs-qa_samples_per_second": 130.222,
745
+ "eval_scitail-pairs-qa_steps_per_second": 4.341,
746
+ "step": 3675
747
+ },
748
+ {
749
+ "epoch": 0.625638406537283,
750
+ "eval_scitail-pairs-pos_loss": 1.1909903287887573,
751
+ "eval_scitail-pairs-pos_runtime": 2.1319,
752
+ "eval_scitail-pairs-pos_samples_per_second": 70.36,
753
+ "eval_scitail-pairs-pos_steps_per_second": 2.345,
754
+ "step": 3675
755
+ },
756
+ {
757
+ "epoch": 0.625638406537283,
758
+ "eval_xsum-pairs_loss": 1.4811985492706299,
759
+ "eval_xsum-pairs_runtime": 2.254,
760
+ "eval_xsum-pairs_samples_per_second": 66.548,
761
+ "eval_xsum-pairs_steps_per_second": 2.218,
762
+ "step": 3675
763
+ },
764
+ {
765
+ "epoch": 0.625638406537283,
766
+ "eval_compression-pairs_loss": 0.8453781008720398,
767
+ "eval_compression-pairs_runtime": 0.4401,
768
+ "eval_compression-pairs_samples_per_second": 340.826,
769
+ "eval_compression-pairs_steps_per_second": 11.361,
770
+ "step": 3675
771
+ },
772
+ {
773
+ "epoch": 0.625638406537283,
774
+ "eval_sciq_pairs_loss": 8.014656066894531,
775
+ "eval_sciq_pairs_runtime": 7.0707,
776
+ "eval_sciq_pairs_samples_per_second": 21.214,
777
+ "eval_sciq_pairs_steps_per_second": 0.707,
778
+ "step": 3675
779
+ },
780
+ {
781
+ "epoch": 0.625638406537283,
782
+ "eval_qasc_pairs_loss": 6.9316277503967285,
783
+ "eval_qasc_pairs_runtime": 2.0338,
784
+ "eval_qasc_pairs_samples_per_second": 73.752,
785
+ "eval_qasc_pairs_steps_per_second": 2.458,
786
+ "step": 3675
787
+ },
788
+ {
789
+ "epoch": 0.625638406537283,
790
+ "eval_openbookqa_pairs_loss": 4.21690034866333,
791
+ "eval_openbookqa_pairs_runtime": 0.918,
792
+ "eval_openbookqa_pairs_samples_per_second": 112.202,
793
+ "eval_openbookqa_pairs_steps_per_second": 4.357,
794
+ "step": 3675
795
+ },
796
+ {
797
+ "epoch": 0.625638406537283,
798
+ "eval_msmarco_pairs_loss": 3.0209598541259766,
799
+ "eval_msmarco_pairs_runtime": 2.7749,
800
+ "eval_msmarco_pairs_samples_per_second": 54.056,
801
+ "eval_msmarco_pairs_steps_per_second": 1.802,
802
+ "step": 3675
803
+ },
804
+ {
805
+ "epoch": 0.625638406537283,
806
+ "eval_nq_pairs_loss": 2.956088066101074,
807
+ "eval_nq_pairs_runtime": 5.0024,
808
+ "eval_nq_pairs_samples_per_second": 29.986,
809
+ "eval_nq_pairs_steps_per_second": 1.0,
810
+ "step": 3675
811
+ },
812
+ {
813
+ "epoch": 0.625638406537283,
814
+ "eval_trivia_pairs_loss": 3.17364501953125,
815
+ "eval_trivia_pairs_runtime": 9.4856,
816
+ "eval_trivia_pairs_samples_per_second": 15.813,
817
+ "eval_trivia_pairs_steps_per_second": 0.527,
818
+ "step": 3675
819
+ },
820
+ {
821
+ "epoch": 0.625638406537283,
822
+ "eval_quora_pairs_loss": 0.763593852519989,
823
+ "eval_quora_pairs_runtime": 1.1441,
824
+ "eval_quora_pairs_samples_per_second": 131.104,
825
+ "eval_quora_pairs_steps_per_second": 4.37,
826
+ "step": 3675
827
+ },
828
+ {
829
+ "epoch": 0.625638406537283,
830
+ "eval_gooaq_pairs_loss": 2.3524909019470215,
831
+ "eval_gooaq_pairs_runtime": 2.0161,
832
+ "eval_gooaq_pairs_samples_per_second": 74.4,
833
+ "eval_gooaq_pairs_steps_per_second": 2.48,
834
+ "step": 3675
835
+ },
836
+ {
837
+ "epoch": 0.6506639427987743,
838
+ "grad_norm": 31.163997650146484,
839
+ "learning_rate": 1.2992849846782432e-05,
840
+ "loss": 2.7808,
841
+ "step": 3822
842
+ },
843
+ {
844
+ "epoch": 0.6756894790602656,
845
+ "grad_norm": 14.883658409118652,
846
+ "learning_rate": 1.3493360572012258e-05,
847
+ "loss": 2.5687,
848
+ "step": 3969
849
+ },
850
+ {
851
+ "epoch": 0.7007150153217568,
852
+ "grad_norm": 5.874042987823486,
853
+ "learning_rate": 1.3993871297242083e-05,
854
+ "loss": 2.3034,
855
+ "step": 4116
856
+ },
857
+ {
858
+ "epoch": 0.7257405515832482,
859
+ "grad_norm": 31.464054107666016,
860
+ "learning_rate": 1.4494382022471912e-05,
861
+ "loss": 2.4412,
862
+ "step": 4263
863
+ },
864
+ {
865
+ "epoch": 0.7507660878447395,
866
+ "grad_norm": 16.43915367126465,
867
+ "learning_rate": 1.4994892747701737e-05,
868
+ "loss": 2.3293,
869
+ "step": 4410
870
+ },
871
+ {
872
+ "epoch": 0.7507660878447395,
873
+ "eval_nli-pairs_loss": 2.3226094245910645,
874
+ "eval_nli-pairs_runtime": 4.113,
875
+ "eval_nli-pairs_samples_per_second": 36.47,
876
+ "eval_nli-pairs_steps_per_second": 1.216,
877
+ "eval_sts-test_pearson_cosine": 0.7356971966139032,
878
+ "eval_sts-test_pearson_dot": 0.6150809513049869,
879
+ "eval_sts-test_pearson_euclidean": 0.7330733579988641,
880
+ "eval_sts-test_pearson_manhattan": 0.7423412248131348,
881
+ "eval_sts-test_pearson_max": 0.7423412248131348,
882
+ "eval_sts-test_spearman_cosine": 0.7121899723082045,
883
+ "eval_sts-test_spearman_dot": 0.5926505936679538,
884
+ "eval_sts-test_spearman_euclidean": 0.7130179905407037,
885
+ "eval_sts-test_spearman_manhattan": 0.7227257562995023,
886
+ "eval_sts-test_spearman_max": 0.7227257562995023,
887
+ "step": 4410
888
+ },
889
+ {
890
+ "epoch": 0.7507660878447395,
891
+ "eval_vitaminc-pairs_loss": 1.7956713438034058,
892
+ "eval_vitaminc-pairs_runtime": 2.174,
893
+ "eval_vitaminc-pairs_samples_per_second": 68.996,
894
+ "eval_vitaminc-pairs_steps_per_second": 2.3,
895
+ "step": 4410
896
+ },
897
+ {
898
+ "epoch": 0.7507660878447395,
899
+ "eval_qnli-contrastive_loss": 1.0078614950180054,
900
+ "eval_qnli-contrastive_runtime": 0.4874,
901
+ "eval_qnli-contrastive_samples_per_second": 307.763,
902
+ "eval_qnli-contrastive_steps_per_second": 10.259,
903
+ "step": 4410
904
+ },
905
+ {
906
+ "epoch": 0.7507660878447395,
907
+ "eval_scitail-pairs-qa_loss": 0.36971578001976013,
908
+ "eval_scitail-pairs-qa_runtime": 1.164,
909
+ "eval_scitail-pairs-qa_samples_per_second": 128.863,
910
+ "eval_scitail-pairs-qa_steps_per_second": 4.295,
911
+ "step": 4410
912
+ },
913
+ {
914
+ "epoch": 0.7507660878447395,
915
+ "eval_scitail-pairs-pos_loss": 1.0497769117355347,
916
+ "eval_scitail-pairs-pos_runtime": 2.1205,
917
+ "eval_scitail-pairs-pos_samples_per_second": 70.74,
918
+ "eval_scitail-pairs-pos_steps_per_second": 2.358,
919
+ "step": 4410
920
+ },
921
+ {
922
+ "epoch": 0.7507660878447395,
923
+ "eval_xsum-pairs_loss": 1.1691261529922485,
924
+ "eval_xsum-pairs_runtime": 2.259,
925
+ "eval_xsum-pairs_samples_per_second": 66.401,
926
+ "eval_xsum-pairs_steps_per_second": 2.213,
927
+ "step": 4410
928
+ },
929
+ {
930
+ "epoch": 0.7507660878447395,
931
+ "eval_compression-pairs_loss": 0.5027483105659485,
932
+ "eval_compression-pairs_runtime": 0.4403,
933
+ "eval_compression-pairs_samples_per_second": 340.682,
934
+ "eval_compression-pairs_steps_per_second": 11.356,
935
+ "step": 4410
936
+ },
937
+ {
938
+ "epoch": 0.7507660878447395,
939
+ "eval_sciq_pairs_loss": 7.823739528656006,
940
+ "eval_sciq_pairs_runtime": 7.0738,
941
+ "eval_sciq_pairs_samples_per_second": 21.205,
942
+ "eval_sciq_pairs_steps_per_second": 0.707,
943
+ "step": 4410
944
+ },
945
+ {
946
+ "epoch": 0.7507660878447395,
947
+ "eval_qasc_pairs_loss": 6.404655933380127,
948
+ "eval_qasc_pairs_runtime": 2.0346,
949
+ "eval_qasc_pairs_samples_per_second": 73.723,
950
+ "eval_qasc_pairs_steps_per_second": 2.457,
951
+ "step": 4410
952
+ },
953
+ {
954
+ "epoch": 0.7507660878447395,
955
+ "eval_openbookqa_pairs_loss": 3.857389211654663,
956
+ "eval_openbookqa_pairs_runtime": 0.8544,
957
+ "eval_openbookqa_pairs_samples_per_second": 120.547,
958
+ "eval_openbookqa_pairs_steps_per_second": 4.681,
959
+ "step": 4410
960
+ },
961
+ {
962
+ "epoch": 0.7507660878447395,
963
+ "eval_msmarco_pairs_loss": 2.7028510570526123,
964
+ "eval_msmarco_pairs_runtime": 2.7448,
965
+ "eval_msmarco_pairs_samples_per_second": 54.649,
966
+ "eval_msmarco_pairs_steps_per_second": 1.822,
967
+ "step": 4410
968
+ },
969
+ {
970
+ "epoch": 0.7507660878447395,
971
+ "eval_nq_pairs_loss": 2.679351329803467,
972
+ "eval_nq_pairs_runtime": 5.067,
973
+ "eval_nq_pairs_samples_per_second": 29.603,
974
+ "eval_nq_pairs_steps_per_second": 0.987,
975
+ "step": 4410
976
+ },
977
+ {
978
+ "epoch": 0.7507660878447395,
979
+ "eval_trivia_pairs_loss": 2.8798065185546875,
980
+ "eval_trivia_pairs_runtime": 9.5449,
981
+ "eval_trivia_pairs_samples_per_second": 15.715,
982
+ "eval_trivia_pairs_steps_per_second": 0.524,
983
+ "step": 4410
984
+ },
985
+ {
986
+ "epoch": 0.7507660878447395,
987
+ "eval_quora_pairs_loss": 0.6825175285339355,
988
+ "eval_quora_pairs_runtime": 1.1431,
989
+ "eval_quora_pairs_samples_per_second": 131.221,
990
+ "eval_quora_pairs_steps_per_second": 4.374,
991
+ "step": 4410
992
+ },
993
+ {
994
+ "epoch": 0.7507660878447395,
995
+ "eval_gooaq_pairs_loss": 2.0472166538238525,
996
+ "eval_gooaq_pairs_runtime": 2.0218,
997
+ "eval_gooaq_pairs_samples_per_second": 74.191,
998
+ "eval_gooaq_pairs_steps_per_second": 2.473,
999
+ "step": 4410
1000
+ },
1001
+ {
1002
+ "epoch": 0.7757916241062308,
1003
+ "grad_norm": 4.2425055503845215,
1004
+ "learning_rate": 1.5495403472931565e-05,
1005
+ "loss": 2.3651,
1006
+ "step": 4557
1007
+ },
1008
+ {
1009
+ "epoch": 0.8008171603677222,
1010
+ "grad_norm": 22.42776107788086,
1011
+ "learning_rate": 1.5995914198161388e-05,
1012
+ "loss": 2.6296,
1013
+ "step": 4704
1014
+ },
1015
+ {
1016
+ "epoch": 0.8258426966292135,
1017
+ "grad_norm": 21.169517517089844,
1018
+ "learning_rate": 1.6496424923391215e-05,
1019
+ "loss": 2.2108,
1020
+ "step": 4851
1021
+ },
1022
+ {
1023
+ "epoch": 0.8508682328907048,
1024
+ "grad_norm": 23.326181411743164,
1025
+ "learning_rate": 1.699693564862104e-05,
1026
+ "loss": 2.1852,
1027
+ "step": 4998
1028
+ },
1029
+ {
1030
+ "epoch": 0.8758937691521961,
1031
+ "grad_norm": 24.574176788330078,
1032
+ "learning_rate": 1.7497446373850868e-05,
1033
+ "loss": 2.2944,
1034
+ "step": 5145
1035
+ },
1036
+ {
1037
+ "epoch": 0.8758937691521961,
1038
+ "eval_nli-pairs_loss": 2.0634915828704834,
1039
+ "eval_nli-pairs_runtime": 4.0019,
1040
+ "eval_nli-pairs_samples_per_second": 37.482,
1041
+ "eval_nli-pairs_steps_per_second": 1.249,
1042
+ "eval_sts-test_pearson_cosine": 0.7466390532977636,
1043
+ "eval_sts-test_pearson_dot": 0.612259458274589,
1044
+ "eval_sts-test_pearson_euclidean": 0.7432536346376271,
1045
+ "eval_sts-test_pearson_manhattan": 0.7500490179501229,
1046
+ "eval_sts-test_pearson_max": 0.7500490179501229,
1047
+ "eval_sts-test_spearman_cosine": 0.728273260456201,
1048
+ "eval_sts-test_spearman_dot": 0.5960115087190596,
1049
+ "eval_sts-test_spearman_euclidean": 0.7272394395622148,
1050
+ "eval_sts-test_spearman_manhattan": 0.7334149564445704,
1051
+ "eval_sts-test_spearman_max": 0.7334149564445704,
1052
+ "step": 5145
1053
+ },
1054
+ {
1055
+ "epoch": 0.8758937691521961,
1056
+ "eval_vitaminc-pairs_loss": 1.638654112815857,
1057
+ "eval_vitaminc-pairs_runtime": 2.1637,
1058
+ "eval_vitaminc-pairs_samples_per_second": 69.327,
1059
+ "eval_vitaminc-pairs_steps_per_second": 2.311,
1060
+ "step": 5145
1061
+ },
1062
+ {
1063
+ "epoch": 0.8758937691521961,
1064
+ "eval_qnli-contrastive_loss": 0.9639705419540405,
1065
+ "eval_qnli-contrastive_runtime": 0.4889,
1066
+ "eval_qnli-contrastive_samples_per_second": 306.825,
1067
+ "eval_qnli-contrastive_steps_per_second": 10.228,
1068
+ "step": 5145
1069
+ },
1070
+ {
1071
+ "epoch": 0.8758937691521961,
1072
+ "eval_scitail-pairs-qa_loss": 0.31595128774642944,
1073
+ "eval_scitail-pairs-qa_runtime": 1.1467,
1074
+ "eval_scitail-pairs-qa_samples_per_second": 130.806,
1075
+ "eval_scitail-pairs-qa_steps_per_second": 4.36,
1076
+ "step": 5145
1077
+ },
1078
+ {
1079
+ "epoch": 0.8758937691521961,
1080
+ "eval_scitail-pairs-pos_loss": 0.9187478423118591,
1081
+ "eval_scitail-pairs-pos_runtime": 2.1273,
1082
+ "eval_scitail-pairs-pos_samples_per_second": 70.512,
1083
+ "eval_scitail-pairs-pos_steps_per_second": 2.35,
1084
+ "step": 5145
1085
+ },
1086
+ {
1087
+ "epoch": 0.8758937691521961,
1088
+ "eval_xsum-pairs_loss": 1.060194492340088,
1089
+ "eval_xsum-pairs_runtime": 2.2836,
1090
+ "eval_xsum-pairs_samples_per_second": 65.686,
1091
+ "eval_xsum-pairs_steps_per_second": 2.19,
1092
+ "step": 5145
1093
+ },
1094
+ {
1095
+ "epoch": 0.8758937691521961,
1096
+ "eval_compression-pairs_loss": 0.41078585386276245,
1097
+ "eval_compression-pairs_runtime": 0.4434,
1098
+ "eval_compression-pairs_samples_per_second": 338.276,
1099
+ "eval_compression-pairs_steps_per_second": 11.276,
1100
+ "step": 5145
1101
+ },
1102
+ {
1103
+ "epoch": 0.8758937691521961,
1104
+ "eval_sciq_pairs_loss": 7.577760696411133,
1105
+ "eval_sciq_pairs_runtime": 7.1025,
1106
+ "eval_sciq_pairs_samples_per_second": 21.119,
1107
+ "eval_sciq_pairs_steps_per_second": 0.704,
1108
+ "step": 5145
1109
+ },
1110
+ {
1111
+ "epoch": 0.8758937691521961,
1112
+ "eval_qasc_pairs_loss": 6.353766918182373,
1113
+ "eval_qasc_pairs_runtime": 2.0113,
1114
+ "eval_qasc_pairs_samples_per_second": 74.58,
1115
+ "eval_qasc_pairs_steps_per_second": 2.486,
1116
+ "step": 5145
1117
+ },
1118
+ {
1119
+ "epoch": 0.8758937691521961,
1120
+ "eval_openbookqa_pairs_loss": 3.7140932083129883,
1121
+ "eval_openbookqa_pairs_runtime": 0.8529,
1122
+ "eval_openbookqa_pairs_samples_per_second": 120.762,
1123
+ "eval_openbookqa_pairs_steps_per_second": 4.69,
1124
+ "step": 5145
1125
+ },
1126
+ {
1127
+ "epoch": 0.8758937691521961,
1128
+ "eval_msmarco_pairs_loss": 2.3862576484680176,
1129
+ "eval_msmarco_pairs_runtime": 2.8953,
1130
+ "eval_msmarco_pairs_samples_per_second": 51.808,
1131
+ "eval_msmarco_pairs_steps_per_second": 1.727,
1132
+ "step": 5145
1133
+ },
1134
+ {
1135
+ "epoch": 0.8758937691521961,
1136
+ "eval_nq_pairs_loss": 2.3543190956115723,
1137
+ "eval_nq_pairs_runtime": 5.0048,
1138
+ "eval_nq_pairs_samples_per_second": 29.971,
1139
+ "eval_nq_pairs_steps_per_second": 0.999,
1140
+ "step": 5145
1141
+ },
1142
+ {
1143
+ "epoch": 0.8758937691521961,
1144
+ "eval_trivia_pairs_loss": 2.494807481765747,
1145
+ "eval_trivia_pairs_runtime": 9.5513,
1146
+ "eval_trivia_pairs_samples_per_second": 15.705,
1147
+ "eval_trivia_pairs_steps_per_second": 0.523,
1148
+ "step": 5145
1149
+ },
1150
+ {
1151
+ "epoch": 0.8758937691521961,
1152
+ "eval_quora_pairs_loss": 0.6137441992759705,
1153
+ "eval_quora_pairs_runtime": 1.1541,
1154
+ "eval_quora_pairs_samples_per_second": 129.967,
1155
+ "eval_quora_pairs_steps_per_second": 4.332,
1156
+ "step": 5145
1157
+ },
1158
+ {
1159
+ "epoch": 0.8758937691521961,
1160
+ "eval_gooaq_pairs_loss": 1.8279658555984497,
1161
+ "eval_gooaq_pairs_runtime": 2.0951,
1162
+ "eval_gooaq_pairs_samples_per_second": 71.595,
1163
+ "eval_gooaq_pairs_steps_per_second": 2.387,
1164
+ "step": 5145
1165
+ },
1166
+ {
1167
+ "epoch": 0.9009193054136875,
1168
+ "grad_norm": 10.590804100036621,
1169
+ "learning_rate": 1.7997957099080695e-05,
1170
+ "loss": 2.2133,
1171
+ "step": 5292
1172
+ },
1173
+ {
1174
+ "epoch": 0.9259448416751788,
1175
+ "grad_norm": 18.527711868286133,
1176
+ "learning_rate": 1.849846782431052e-05,
1177
+ "loss": 2.2255,
1178
+ "step": 5439
1179
+ },
1180
+ {
1181
+ "epoch": 0.95097037793667,
1182
+ "grad_norm": 2.617710828781128,
1183
+ "learning_rate": 1.8995573714674838e-05,
1184
+ "loss": 2.3502,
1185
+ "step": 5586
1186
+ },
1187
+ {
1188
+ "epoch": 0.9759959141981613,
1189
+ "grad_norm": 19.551551818847656,
1190
+ "learning_rate": 1.9496084439904668e-05,
1191
+ "loss": 1.8964,
1192
+ "step": 5733
1193
+ },
1194
+ {
1195
+ "epoch": 1.0010214504596526,
1196
+ "grad_norm": 11.783225059509277,
1197
+ "learning_rate": 1.999319033026898e-05,
1198
+ "loss": 1.913,
1199
+ "step": 5880
1200
+ },
1201
+ {
1202
+ "epoch": 1.0010214504596526,
1203
+ "eval_nli-pairs_loss": 1.9677053689956665,
1204
+ "eval_nli-pairs_runtime": 4.3863,
1205
+ "eval_nli-pairs_samples_per_second": 34.198,
1206
+ "eval_nli-pairs_steps_per_second": 1.14,
1207
+ "eval_sts-test_pearson_cosine": 0.7531824359441671,
1208
+ "eval_sts-test_pearson_dot": 0.602579906515822,
1209
+ "eval_sts-test_pearson_euclidean": 0.7486763477944213,
1210
+ "eval_sts-test_pearson_manhattan": 0.7566220287347274,
1211
+ "eval_sts-test_pearson_max": 0.7566220287347274,
1212
+ "eval_sts-test_spearman_cosine": 0.7387792578665129,
1213
+ "eval_sts-test_spearman_dot": 0.5926594656319394,
1214
+ "eval_sts-test_spearman_euclidean": 0.733653805383597,
1215
+ "eval_sts-test_spearman_manhattan": 0.7420657558603486,
1216
+ "eval_sts-test_spearman_max": 0.7420657558603486,
1217
+ "step": 5880
1218
+ },
1219
+ {
1220
+ "epoch": 1.0010214504596526,
1221
+ "eval_vitaminc-pairs_loss": 1.4394291639328003,
1222
+ "eval_vitaminc-pairs_runtime": 2.2575,
1223
+ "eval_vitaminc-pairs_samples_per_second": 66.446,
1224
+ "eval_vitaminc-pairs_steps_per_second": 2.215,
1225
+ "step": 5880
1226
+ },
1227
+ {
1228
+ "epoch": 1.0010214504596526,
1229
+ "eval_qnli-contrastive_loss": 0.45715218782424927,
1230
+ "eval_qnli-contrastive_runtime": 0.501,
1231
+ "eval_qnli-contrastive_samples_per_second": 299.385,
1232
+ "eval_qnli-contrastive_steps_per_second": 9.979,
1233
+ "step": 5880
1234
+ },
1235
+ {
1236
+ "epoch": 1.0010214504596526,
1237
+ "eval_scitail-pairs-qa_loss": 0.26679515838623047,
1238
+ "eval_scitail-pairs-qa_runtime": 1.4342,
1239
+ "eval_scitail-pairs-qa_samples_per_second": 104.587,
1240
+ "eval_scitail-pairs-qa_steps_per_second": 3.486,
1241
+ "step": 5880
1242
+ },
1243
+ {
1244
+ "epoch": 1.0010214504596526,
1245
+ "eval_scitail-pairs-pos_loss": 0.8628473281860352,
1246
+ "eval_scitail-pairs-pos_runtime": 2.3485,
1247
+ "eval_scitail-pairs-pos_samples_per_second": 63.871,
1248
+ "eval_scitail-pairs-pos_steps_per_second": 2.129,
1249
+ "step": 5880
1250
+ },
1251
+ {
1252
+ "epoch": 1.0010214504596526,
1253
+ "eval_xsum-pairs_loss": 0.9014443755149841,
1254
+ "eval_xsum-pairs_runtime": 2.2896,
1255
+ "eval_xsum-pairs_samples_per_second": 65.513,
1256
+ "eval_xsum-pairs_steps_per_second": 2.184,
1257
+ "step": 5880
1258
+ },
1259
+ {
1260
+ "epoch": 1.0010214504596526,
1261
+ "eval_compression-pairs_loss": 0.3047434389591217,
1262
+ "eval_compression-pairs_runtime": 0.4852,
1263
+ "eval_compression-pairs_samples_per_second": 309.163,
1264
+ "eval_compression-pairs_steps_per_second": 10.305,
1265
+ "step": 5880
1266
+ },
1267
+ {
1268
+ "epoch": 1.0010214504596526,
1269
+ "eval_sciq_pairs_loss": 1.091601848602295,
1270
+ "eval_sciq_pairs_runtime": 7.3046,
1271
+ "eval_sciq_pairs_samples_per_second": 20.535,
1272
+ "eval_sciq_pairs_steps_per_second": 0.684,
1273
+ "step": 5880
1274
+ },
1275
+ {
1276
+ "epoch": 1.0010214504596526,
1277
+ "eval_qasc_pairs_loss": 5.947833061218262,
1278
+ "eval_qasc_pairs_runtime": 2.1787,
1279
+ "eval_qasc_pairs_samples_per_second": 68.849,
1280
+ "eval_qasc_pairs_steps_per_second": 2.295,
1281
+ "step": 5880
1282
+ },
1283
+ {
1284
+ "epoch": 1.0010214504596526,
1285
+ "eval_openbookqa_pairs_loss": 3.4724366664886475,
1286
+ "eval_openbookqa_pairs_runtime": 0.9106,
1287
+ "eval_openbookqa_pairs_samples_per_second": 113.111,
1288
+ "eval_openbookqa_pairs_steps_per_second": 4.393,
1289
+ "step": 5880
1290
+ },
1291
+ {
1292
+ "epoch": 1.0010214504596526,
1293
+ "eval_msmarco_pairs_loss": 2.1638240814208984,
1294
+ "eval_msmarco_pairs_runtime": 2.82,
1295
+ "eval_msmarco_pairs_samples_per_second": 53.191,
1296
+ "eval_msmarco_pairs_steps_per_second": 1.773,
1297
+ "step": 5880
1298
+ },
1299
+ {
1300
+ "epoch": 1.0010214504596526,
1301
+ "eval_nq_pairs_loss": 2.110903739929199,
1302
+ "eval_nq_pairs_runtime": 5.2303,
1303
+ "eval_nq_pairs_samples_per_second": 28.679,
1304
+ "eval_nq_pairs_steps_per_second": 0.956,
1305
+ "step": 5880
1306
+ },
1307
+ {
1308
+ "epoch": 1.0010214504596526,
1309
+ "eval_trivia_pairs_loss": 2.3711097240448,
1310
+ "eval_trivia_pairs_runtime": 9.6247,
1311
+ "eval_trivia_pairs_samples_per_second": 15.585,
1312
+ "eval_trivia_pairs_steps_per_second": 0.519,
1313
+ "step": 5880
1314
+ },
1315
+ {
1316
+ "epoch": 1.0010214504596526,
1317
+ "eval_quora_pairs_loss": 0.5216041803359985,
1318
+ "eval_quora_pairs_runtime": 1.3072,
1319
+ "eval_quora_pairs_samples_per_second": 114.749,
1320
+ "eval_quora_pairs_steps_per_second": 3.825,
1321
+ "step": 5880
1322
+ },
1323
+ {
1324
+ "epoch": 1.0010214504596526,
1325
+ "eval_gooaq_pairs_loss": 1.7041363716125488,
1326
+ "eval_gooaq_pairs_runtime": 2.0973,
1327
+ "eval_gooaq_pairs_samples_per_second": 71.521,
1328
+ "eval_gooaq_pairs_steps_per_second": 2.384,
1329
+ "step": 5880
1330
+ },
1331
+ {
1332
+ "epoch": 1.026046986721144,
1333
+ "grad_norm": 17.308378219604492,
1334
+ "learning_rate": 2.0493701055498808e-05,
1335
+ "loss": 1.7772,
1336
+ "step": 6027
1337
+ },
1338
+ {
1339
+ "epoch": 1.0510725229826354,
1340
+ "grad_norm": 20.248981475830078,
1341
+ "learning_rate": 2.0994211780728634e-05,
1342
+ "loss": 1.9079,
1343
+ "step": 6174
1344
+ },
1345
+ {
1346
+ "epoch": 1.0760980592441267,
1347
+ "grad_norm": 6.012618064880371,
1348
+ "learning_rate": 2.1494722505958464e-05,
1349
+ "loss": 1.8657,
1350
+ "step": 6321
1351
+ },
1352
+ {
1353
+ "epoch": 1.101123595505618,
1354
+ "grad_norm": 1.1185024976730347,
1355
+ "learning_rate": 2.1995233231188288e-05,
1356
+ "loss": 1.7144,
1357
+ "step": 6468
1358
+ },
1359
+ {
1360
+ "epoch": 1.1261491317671093,
1361
+ "grad_norm": 1.2436251640319824,
1362
+ "learning_rate": 2.2495743956418114e-05,
1363
+ "loss": 1.7661,
1364
+ "step": 6615
1365
+ },
1366
+ {
1367
+ "epoch": 1.1261491317671093,
1368
+ "eval_nli-pairs_loss": 1.7907973527908325,
1369
+ "eval_nli-pairs_runtime": 4.0147,
1370
+ "eval_nli-pairs_samples_per_second": 37.363,
1371
+ "eval_nli-pairs_steps_per_second": 1.245,
1372
+ "eval_sts-test_pearson_cosine": 0.755444461779583,
1373
+ "eval_sts-test_pearson_dot": 0.5833168145328357,
1374
+ "eval_sts-test_pearson_euclidean": 0.7437155007996056,
1375
+ "eval_sts-test_pearson_manhattan": 0.7524938984567344,
1376
+ "eval_sts-test_pearson_max": 0.755444461779583,
1377
+ "eval_sts-test_spearman_cosine": 0.7446166596886566,
1378
+ "eval_sts-test_spearman_dot": 0.5792340720766105,
1379
+ "eval_sts-test_spearman_euclidean": 0.7317285388028532,
1380
+ "eval_sts-test_spearman_manhattan": 0.7401637904976945,
1381
+ "eval_sts-test_spearman_max": 0.7446166596886566,
1382
+ "step": 6615
1383
+ },
1384
+ {
1385
+ "epoch": 1.1261491317671093,
1386
+ "eval_vitaminc-pairs_loss": 1.3403607606887817,
1387
+ "eval_vitaminc-pairs_runtime": 2.168,
1388
+ "eval_vitaminc-pairs_samples_per_second": 69.189,
1389
+ "eval_vitaminc-pairs_steps_per_second": 2.306,
1390
+ "step": 6615
1391
+ },
1392
+ {
1393
+ "epoch": 1.1261491317671093,
1394
+ "eval_qnli-contrastive_loss": 0.2736852467060089,
1395
+ "eval_qnli-contrastive_runtime": 0.4913,
1396
+ "eval_qnli-contrastive_samples_per_second": 305.336,
1397
+ "eval_qnli-contrastive_steps_per_second": 10.178,
1398
+ "step": 6615
1399
+ },
1400
+ {
1401
+ "epoch": 1.1261491317671093,
1402
+ "eval_scitail-pairs-qa_loss": 0.22441554069519043,
1403
+ "eval_scitail-pairs-qa_runtime": 1.1614,
1404
+ "eval_scitail-pairs-qa_samples_per_second": 129.152,
1405
+ "eval_scitail-pairs-qa_steps_per_second": 4.305,
1406
+ "step": 6615
1407
+ },
1408
+ {
1409
+ "epoch": 1.1261491317671093,
1410
+ "eval_scitail-pairs-pos_loss": 0.7723743915557861,
1411
+ "eval_scitail-pairs-pos_runtime": 2.1567,
1412
+ "eval_scitail-pairs-pos_samples_per_second": 69.55,
1413
+ "eval_scitail-pairs-pos_steps_per_second": 2.318,
1414
+ "step": 6615
1415
+ },
1416
+ {
1417
+ "epoch": 1.1261491317671093,
1418
+ "eval_xsum-pairs_loss": 0.8370540142059326,
1419
+ "eval_xsum-pairs_runtime": 2.2569,
1420
+ "eval_xsum-pairs_samples_per_second": 66.463,
1421
+ "eval_xsum-pairs_steps_per_second": 2.215,
1422
+ "step": 6615
1423
+ },
1424
+ {
1425
+ "epoch": 1.1261491317671093,
1426
+ "eval_compression-pairs_loss": 0.265947163105011,
1427
+ "eval_compression-pairs_runtime": 0.4431,
1428
+ "eval_compression-pairs_samples_per_second": 338.529,
1429
+ "eval_compression-pairs_steps_per_second": 11.284,
1430
+ "step": 6615
1431
+ },
1432
+ {
1433
+ "epoch": 1.1261491317671093,
1434
+ "eval_sciq_pairs_loss": 0.9383512735366821,
1435
+ "eval_sciq_pairs_runtime": 7.1464,
1436
+ "eval_sciq_pairs_samples_per_second": 20.99,
1437
+ "eval_sciq_pairs_steps_per_second": 0.7,
1438
+ "step": 6615
1439
+ },
1440
+ {
1441
+ "epoch": 1.1261491317671093,
1442
+ "eval_qasc_pairs_loss": 5.753899097442627,
1443
+ "eval_qasc_pairs_runtime": 2.0099,
1444
+ "eval_qasc_pairs_samples_per_second": 74.63,
1445
+ "eval_qasc_pairs_steps_per_second": 2.488,
1446
+ "step": 6615
1447
+ },
1448
+ {
1449
+ "epoch": 1.1261491317671093,
1450
+ "eval_openbookqa_pairs_loss": 3.3517918586730957,
1451
+ "eval_openbookqa_pairs_runtime": 0.8594,
1452
+ "eval_openbookqa_pairs_samples_per_second": 119.858,
1453
+ "eval_openbookqa_pairs_steps_per_second": 4.655,
1454
+ "step": 6615
1455
+ },
1456
+ {
1457
+ "epoch": 1.1261491317671093,
1458
+ "eval_msmarco_pairs_loss": 2.044360399246216,
1459
+ "eval_msmarco_pairs_runtime": 2.7431,
1460
+ "eval_msmarco_pairs_samples_per_second": 54.682,
1461
+ "eval_msmarco_pairs_steps_per_second": 1.823,
1462
+ "step": 6615
1463
+ },
1464
+ {
1465
+ "epoch": 1.1261491317671093,
1466
+ "eval_nq_pairs_loss": 1.9409464597702026,
1467
+ "eval_nq_pairs_runtime": 5.028,
1468
+ "eval_nq_pairs_samples_per_second": 29.833,
1469
+ "eval_nq_pairs_steps_per_second": 0.994,
1470
+ "step": 6615
1471
+ },
1472
+ {
1473
+ "epoch": 1.1261491317671093,
1474
+ "eval_trivia_pairs_loss": 2.369060754776001,
1475
+ "eval_trivia_pairs_runtime": 9.5137,
1476
+ "eval_trivia_pairs_samples_per_second": 15.767,
1477
+ "eval_trivia_pairs_steps_per_second": 0.526,
1478
+ "step": 6615
1479
+ },
1480
+ {
1481
+ "epoch": 1.1261491317671093,
1482
+ "eval_quora_pairs_loss": 0.47849634289741516,
1483
+ "eval_quora_pairs_runtime": 1.1413,
1484
+ "eval_quora_pairs_samples_per_second": 131.424,
1485
+ "eval_quora_pairs_steps_per_second": 4.381,
1486
+ "step": 6615
1487
+ },
1488
+ {
1489
+ "epoch": 1.1261491317671093,
1490
+ "eval_gooaq_pairs_loss": 1.5795674324035645,
1491
+ "eval_gooaq_pairs_runtime": 2.0155,
1492
+ "eval_gooaq_pairs_samples_per_second": 74.422,
1493
+ "eval_gooaq_pairs_steps_per_second": 2.481,
1494
+ "step": 6615
1495
+ },
1496
+ {
1497
+ "epoch": 1.1511746680286006,
1498
+ "grad_norm": 20.95261001586914,
1499
+ "learning_rate": 2.299625468164794e-05,
1500
+ "loss": 1.8066,
1501
+ "step": 6762
1502
+ },
1503
+ {
1504
+ "epoch": 1.1762002042900919,
1505
+ "grad_norm": 20.31597900390625,
1506
+ "learning_rate": 2.3496765406877764e-05,
1507
+ "loss": 1.7438,
1508
+ "step": 6909
1509
+ },
1510
+ {
1511
+ "epoch": 1.2012257405515832,
1512
+ "grad_norm": 28.363882064819336,
1513
+ "learning_rate": 2.399727613210759e-05,
1514
+ "loss": 2.0231,
1515
+ "step": 7056
1516
+ },
1517
+ {
1518
+ "epoch": 1.2262512768130747,
1519
+ "grad_norm": 14.403656959533691,
1520
+ "learning_rate": 2.449778685733742e-05,
1521
+ "loss": 1.8966,
1522
+ "step": 7203
1523
+ },
1524
+ {
1525
+ "epoch": 1.251276813074566,
1526
+ "grad_norm": 17.73562240600586,
1527
+ "learning_rate": 2.4998297582567248e-05,
1528
+ "loss": 1.7958,
1529
+ "step": 7350
1530
+ },
1531
+ {
1532
+ "epoch": 1.251276813074566,
1533
+ "eval_nli-pairs_loss": 1.5906368494033813,
1534
+ "eval_nli-pairs_runtime": 4.0261,
1535
+ "eval_nli-pairs_samples_per_second": 37.257,
1536
+ "eval_nli-pairs_steps_per_second": 1.242,
1537
+ "eval_sts-test_pearson_cosine": 0.7626661521495873,
1538
+ "eval_sts-test_pearson_dot": 0.5632604768989181,
1539
+ "eval_sts-test_pearson_euclidean": 0.7370060575260952,
1540
+ "eval_sts-test_pearson_manhattan": 0.7472706980613159,
1541
+ "eval_sts-test_pearson_max": 0.7626661521495873,
1542
+ "eval_sts-test_spearman_cosine": 0.7535266725567149,
1543
+ "eval_sts-test_spearman_dot": 0.5848997224802808,
1544
+ "eval_sts-test_spearman_euclidean": 0.7290608032903477,
1545
+ "eval_sts-test_spearman_manhattan": 0.739032087078249,
1546
+ "eval_sts-test_spearman_max": 0.7535266725567149,
1547
+ "step": 7350
1548
+ },
1549
+ {
1550
+ "epoch": 1.251276813074566,
1551
+ "eval_vitaminc-pairs_loss": 1.222551941871643,
1552
+ "eval_vitaminc-pairs_runtime": 2.1784,
1553
+ "eval_vitaminc-pairs_samples_per_second": 68.857,
1554
+ "eval_vitaminc-pairs_steps_per_second": 2.295,
1555
+ "step": 7350
1556
+ },
1557
+ {
1558
+ "epoch": 1.251276813074566,
1559
+ "eval_qnli-contrastive_loss": 0.3951484262943268,
1560
+ "eval_qnli-contrastive_runtime": 0.4916,
1561
+ "eval_qnli-contrastive_samples_per_second": 305.11,
1562
+ "eval_qnli-contrastive_steps_per_second": 10.17,
1563
+ "step": 7350
1564
+ },
1565
+ {
1566
+ "epoch": 1.251276813074566,
1567
+ "eval_scitail-pairs-qa_loss": 0.17783091962337494,
1568
+ "eval_scitail-pairs-qa_runtime": 1.1549,
1569
+ "eval_scitail-pairs-qa_samples_per_second": 129.88,
1570
+ "eval_scitail-pairs-qa_steps_per_second": 4.329,
1571
+ "step": 7350
1572
+ },
1573
+ {
1574
+ "epoch": 1.251276813074566,
1575
+ "eval_scitail-pairs-pos_loss": 0.7214661836624146,
1576
+ "eval_scitail-pairs-pos_runtime": 2.132,
1577
+ "eval_scitail-pairs-pos_samples_per_second": 70.357,
1578
+ "eval_scitail-pairs-pos_steps_per_second": 2.345,
1579
+ "step": 7350
1580
+ },
1581
+ {
1582
+ "epoch": 1.251276813074566,
1583
+ "eval_xsum-pairs_loss": 0.7919928431510925,
1584
+ "eval_xsum-pairs_runtime": 2.2579,
1585
+ "eval_xsum-pairs_samples_per_second": 66.432,
1586
+ "eval_xsum-pairs_steps_per_second": 2.214,
1587
+ "step": 7350
1588
+ },
1589
+ {
1590
+ "epoch": 1.251276813074566,
1591
+ "eval_compression-pairs_loss": 0.24975377321243286,
1592
+ "eval_compression-pairs_runtime": 0.447,
1593
+ "eval_compression-pairs_samples_per_second": 335.534,
1594
+ "eval_compression-pairs_steps_per_second": 11.184,
1595
+ "step": 7350
1596
+ },
1597
+ {
1598
+ "epoch": 1.251276813074566,
1599
+ "eval_sciq_pairs_loss": 0.8343773484230042,
1600
+ "eval_sciq_pairs_runtime": 7.1288,
1601
+ "eval_sciq_pairs_samples_per_second": 21.042,
1602
+ "eval_sciq_pairs_steps_per_second": 0.701,
1603
+ "step": 7350
1604
+ },
1605
+ {
1606
+ "epoch": 1.251276813074566,
1607
+ "eval_qasc_pairs_loss": 5.4840240478515625,
1608
+ "eval_qasc_pairs_runtime": 2.025,
1609
+ "eval_qasc_pairs_samples_per_second": 74.074,
1610
+ "eval_qasc_pairs_steps_per_second": 2.469,
1611
+ "step": 7350
1612
+ },
1613
+ {
1614
+ "epoch": 1.251276813074566,
1615
+ "eval_openbookqa_pairs_loss": 3.1631176471710205,
1616
+ "eval_openbookqa_pairs_runtime": 0.8612,
1617
+ "eval_openbookqa_pairs_samples_per_second": 119.598,
1618
+ "eval_openbookqa_pairs_steps_per_second": 4.645,
1619
+ "step": 7350
1620
+ },
1621
+ {
1622
+ "epoch": 1.251276813074566,
1623
+ "eval_msmarco_pairs_loss": 1.8952231407165527,
1624
+ "eval_msmarco_pairs_runtime": 2.7585,
1625
+ "eval_msmarco_pairs_samples_per_second": 54.378,
1626
+ "eval_msmarco_pairs_steps_per_second": 1.813,
1627
+ "step": 7350
1628
+ },
1629
+ {
1630
+ "epoch": 1.251276813074566,
1631
+ "eval_nq_pairs_loss": 1.6934970617294312,
1632
+ "eval_nq_pairs_runtime": 5.0253,
1633
+ "eval_nq_pairs_samples_per_second": 29.849,
1634
+ "eval_nq_pairs_steps_per_second": 0.995,
1635
+ "step": 7350
1636
+ },
1637
+ {
1638
+ "epoch": 1.251276813074566,
1639
+ "eval_trivia_pairs_loss": 1.9966663122177124,
1640
+ "eval_trivia_pairs_runtime": 9.5675,
1641
+ "eval_trivia_pairs_samples_per_second": 15.678,
1642
+ "eval_trivia_pairs_steps_per_second": 0.523,
1643
+ "step": 7350
1644
+ },
1645
+ {
1646
+ "epoch": 1.251276813074566,
1647
+ "eval_quora_pairs_loss": 0.405385285615921,
1648
+ "eval_quora_pairs_runtime": 1.1432,
1649
+ "eval_quora_pairs_samples_per_second": 131.209,
1650
+ "eval_quora_pairs_steps_per_second": 4.374,
1651
+ "step": 7350
1652
+ },
1653
+ {
1654
+ "epoch": 1.251276813074566,
1655
+ "eval_gooaq_pairs_loss": 1.3951071500778198,
1656
+ "eval_gooaq_pairs_runtime": 2.038,
1657
+ "eval_gooaq_pairs_samples_per_second": 73.601,
1658
+ "eval_gooaq_pairs_steps_per_second": 2.453,
1659
+ "step": 7350
1660
+ },
1661
+ {
1662
+ "epoch": 1.2763023493360572,
1663
+ "grad_norm": 21.254159927368164,
1664
+ "learning_rate": 2.549880830779707e-05,
1665
+ "loss": 1.5109,
1666
+ "step": 7497
1667
+ },
1668
+ {
1669
+ "epoch": 1.3013278855975485,
1670
+ "grad_norm": 20.08012580871582,
1671
+ "learning_rate": 2.5999319033026898e-05,
1672
+ "loss": 1.8119,
1673
+ "step": 7644
1674
+ },
1675
+ {
1676
+ "epoch": 1.3263534218590398,
1677
+ "grad_norm": 0.6448306441307068,
1678
+ "learning_rate": 2.6499829758256724e-05,
1679
+ "loss": 1.6833,
1680
+ "step": 7791
1681
+ },
1682
+ {
1683
+ "epoch": 1.351378958120531,
1684
+ "grad_norm": 16.65821647644043,
1685
+ "learning_rate": 2.7000340483486554e-05,
1686
+ "loss": 1.5917,
1687
+ "step": 7938
1688
+ },
1689
+ {
1690
+ "epoch": 1.3764044943820224,
1691
+ "grad_norm": 14.949362754821777,
1692
+ "learning_rate": 2.7500851208716378e-05,
1693
+ "loss": 1.809,
1694
+ "step": 8085
1695
+ },
1696
+ {
1697
+ "epoch": 1.3764044943820224,
1698
+ "eval_nli-pairs_loss": 1.5967836380004883,
1699
+ "eval_nli-pairs_runtime": 4.0496,
1700
+ "eval_nli-pairs_samples_per_second": 37.041,
1701
+ "eval_nli-pairs_steps_per_second": 1.235,
1702
+ "eval_sts-test_pearson_cosine": 0.7653416933913197,
1703
+ "eval_sts-test_pearson_dot": 0.5401711611334493,
1704
+ "eval_sts-test_pearson_euclidean": 0.7529907774019836,
1705
+ "eval_sts-test_pearson_manhattan": 0.7605105025260754,
1706
+ "eval_sts-test_pearson_max": 0.7653416933913197,
1707
+ "eval_sts-test_spearman_cosine": 0.7593865234485873,
1708
+ "eval_sts-test_spearman_dot": 0.5559615063301898,
1709
+ "eval_sts-test_spearman_euclidean": 0.7436431053840061,
1710
+ "eval_sts-test_spearman_manhattan": 0.7515978828464567,
1711
+ "eval_sts-test_spearman_max": 0.7593865234485873,
1712
+ "step": 8085
1713
+ },
1714
+ {
1715
+ "epoch": 1.3764044943820224,
1716
+ "eval_vitaminc-pairs_loss": 1.1434590816497803,
1717
+ "eval_vitaminc-pairs_runtime": 2.2066,
1718
+ "eval_vitaminc-pairs_samples_per_second": 67.977,
1719
+ "eval_vitaminc-pairs_steps_per_second": 2.266,
1720
+ "step": 8085
1721
+ },
1722
+ {
1723
+ "epoch": 1.3764044943820224,
1724
+ "eval_qnli-contrastive_loss": 0.3819103538990021,
1725
+ "eval_qnli-contrastive_runtime": 0.4972,
1726
+ "eval_qnli-contrastive_samples_per_second": 301.706,
1727
+ "eval_qnli-contrastive_steps_per_second": 10.057,
1728
+ "step": 8085
1729
+ },
1730
+ {
1731
+ "epoch": 1.3764044943820224,
1732
+ "eval_scitail-pairs-qa_loss": 0.15774373710155487,
1733
+ "eval_scitail-pairs-qa_runtime": 1.1704,
1734
+ "eval_scitail-pairs-qa_samples_per_second": 128.161,
1735
+ "eval_scitail-pairs-qa_steps_per_second": 4.272,
1736
+ "step": 8085
1737
+ },
1738
+ {
1739
+ "epoch": 1.3764044943820224,
1740
+ "eval_scitail-pairs-pos_loss": 0.6571963429450989,
1741
+ "eval_scitail-pairs-pos_runtime": 2.1634,
1742
+ "eval_scitail-pairs-pos_samples_per_second": 69.335,
1743
+ "eval_scitail-pairs-pos_steps_per_second": 2.311,
1744
+ "step": 8085
1745
+ },
1746
+ {
1747
+ "epoch": 1.3764044943820224,
1748
+ "eval_xsum-pairs_loss": 0.7028753757476807,
1749
+ "eval_xsum-pairs_runtime": 2.2608,
1750
+ "eval_xsum-pairs_samples_per_second": 66.347,
1751
+ "eval_xsum-pairs_steps_per_second": 2.212,
1752
+ "step": 8085
1753
+ },
1754
+ {
1755
+ "epoch": 1.3764044943820224,
1756
+ "eval_compression-pairs_loss": 0.23010987043380737,
1757
+ "eval_compression-pairs_runtime": 0.4514,
1758
+ "eval_compression-pairs_samples_per_second": 332.284,
1759
+ "eval_compression-pairs_steps_per_second": 11.076,
1760
+ "step": 8085
1761
+ },
1762
+ {
1763
+ "epoch": 1.3764044943820224,
1764
+ "eval_sciq_pairs_loss": 0.799666702747345,
1765
+ "eval_sciq_pairs_runtime": 7.1816,
1766
+ "eval_sciq_pairs_samples_per_second": 20.887,
1767
+ "eval_sciq_pairs_steps_per_second": 0.696,
1768
+ "step": 8085
1769
+ },
1770
+ {
1771
+ "epoch": 1.3764044943820224,
1772
+ "eval_qasc_pairs_loss": 5.433376789093018,
1773
+ "eval_qasc_pairs_runtime": 2.0592,
1774
+ "eval_qasc_pairs_samples_per_second": 72.843,
1775
+ "eval_qasc_pairs_steps_per_second": 2.428,
1776
+ "step": 8085
1777
+ },
1778
+ {
1779
+ "epoch": 1.3764044943820224,
1780
+ "eval_openbookqa_pairs_loss": 2.9010672569274902,
1781
+ "eval_openbookqa_pairs_runtime": 0.865,
1782
+ "eval_openbookqa_pairs_samples_per_second": 119.074,
1783
+ "eval_openbookqa_pairs_steps_per_second": 4.624,
1784
+ "step": 8085
1785
+ },
1786
+ {
1787
+ "epoch": 1.3764044943820224,
1788
+ "eval_msmarco_pairs_loss": 1.7567836046218872,
1789
+ "eval_msmarco_pairs_runtime": 2.7812,
1790
+ "eval_msmarco_pairs_samples_per_second": 53.933,
1791
+ "eval_msmarco_pairs_steps_per_second": 1.798,
1792
+ "step": 8085
1793
+ },
1794
+ {
1795
+ "epoch": 1.3764044943820224,
1796
+ "eval_nq_pairs_loss": 1.5407707691192627,
1797
+ "eval_nq_pairs_runtime": 5.0607,
1798
+ "eval_nq_pairs_samples_per_second": 29.64,
1799
+ "eval_nq_pairs_steps_per_second": 0.988,
1800
+ "step": 8085
1801
+ },
1802
+ {
1803
+ "epoch": 1.3764044943820224,
1804
+ "eval_trivia_pairs_loss": 1.8419283628463745,
1805
+ "eval_trivia_pairs_runtime": 9.5535,
1806
+ "eval_trivia_pairs_samples_per_second": 15.701,
1807
+ "eval_trivia_pairs_steps_per_second": 0.523,
1808
+ "step": 8085
1809
+ },
1810
+ {
1811
+ "epoch": 1.3764044943820224,
1812
+ "eval_quora_pairs_loss": 0.3561370372772217,
1813
+ "eval_quora_pairs_runtime": 1.2005,
1814
+ "eval_quora_pairs_samples_per_second": 124.946,
1815
+ "eval_quora_pairs_steps_per_second": 4.165,
1816
+ "step": 8085
1817
+ },
1818
+ {
1819
+ "epoch": 1.3764044943820224,
1820
+ "eval_gooaq_pairs_loss": 1.1745914220809937,
1821
+ "eval_gooaq_pairs_runtime": 2.0463,
1822
+ "eval_gooaq_pairs_samples_per_second": 73.305,
1823
+ "eval_gooaq_pairs_steps_per_second": 2.443,
1824
+ "step": 8085
1825
+ },
1826
+ {
1827
+ "epoch": 1.401430030643514,
1828
+ "grad_norm": 14.31106185913086,
1829
+ "learning_rate": 2.8001361933946204e-05,
1830
+ "loss": 1.5561,
1831
+ "step": 8232
1832
+ },
1833
+ {
1834
+ "epoch": 1.4264555669050052,
1835
+ "grad_norm": 11.82392692565918,
1836
+ "learning_rate": 2.850187265917603e-05,
1837
+ "loss": 1.5325,
1838
+ "step": 8379
1839
+ },
1840
+ {
1841
+ "epoch": 1.4514811031664965,
1842
+ "grad_norm": 21.716449737548828,
1843
+ "learning_rate": 2.9002383384405858e-05,
1844
+ "loss": 1.5085,
1845
+ "step": 8526
1846
+ },
1847
+ {
1848
+ "epoch": 1.4765066394279878,
1849
+ "grad_norm": 6.5607147216796875,
1850
+ "learning_rate": 2.950289410963568e-05,
1851
+ "loss": 1.5634,
1852
+ "step": 8673
1853
+ }
1854
+ ],
1855
+ "logging_steps": 147,
1856
+ "max_steps": 29370,
1857
+ "num_input_tokens_seen": 0,
1858
+ "num_train_epochs": 5,
1859
+ "save_steps": 2937,
1860
+ "stateful_callbacks": {
1861
+ "TrainerControl": {
1862
+ "args": {
1863
+ "should_epoch_stop": false,
1864
+ "should_evaluate": false,
1865
+ "should_log": false,
1866
+ "should_save": true,
1867
+ "should_training_stop": false
1868
+ },
1869
+ "attributes": {}
1870
+ }
1871
+ },
1872
+ "total_flos": 0.0,
1873
+ "train_batch_size": 32,
1874
+ "trial_name": null,
1875
+ "trial_params": null
1876
+ }
checkpoint-8811/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298cfa7a9e669e98ab55937cac47ecfa89fb6a36f2afe18f46ba782a5f5bf5a4
3
+ size 5624