bobox commited on
Commit
31dff8b
1 Parent(s): a402917

Training in progress, step 11748, checkpoint

Browse files
checkpoint-11748/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-11748/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-11748/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-11748/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.41.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128100
35
+ }
checkpoint-11748/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.41.2",
5
+ "pytorch": "2.1.2"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-11748/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-11748/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c4bbc8e1e46fe8ab5da59f14d367fbd679602e0a1943a1d9dc5109b9bf8155
3
+ size 1130520122
checkpoint-11748/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b25d31c3114f97e26da394c5e37d75ee6601df1297fcd4137293447089a0c5a
3
+ size 565251810
checkpoint-11748/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4512a4d3613f6b7d86b146939f89e3c62b0366edbc96849275f6672de9354f51
3
+ size 14244
checkpoint-11748/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80168c8f4520d6288706a30964c96e9c61e9d14287267f1dc90b6c495f5d6935
3
+ size 1064
checkpoint-11748/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-11748/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-11748/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-11748/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-11748/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-11748/trainer_state.json ADDED
@@ -0,0 +1,2536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 735,
6
+ "global_step": 11748,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02502553626149132,
13
+ "grad_norm": 65.55949401855469,
14
+ "learning_rate": 4.834865509022812e-07,
15
+ "loss": 16.851,
16
+ "step": 147
17
+ },
18
+ {
19
+ "epoch": 0.05005107252298264,
20
+ "grad_norm": 23.207971572875977,
21
+ "learning_rate": 9.805924412665985e-07,
22
+ "loss": 11.2787,
23
+ "step": 294
24
+ },
25
+ {
26
+ "epoch": 0.07507660878447395,
27
+ "grad_norm": 176.1532440185547,
28
+ "learning_rate": 1.481103166496425e-06,
29
+ "loss": 8.9166,
30
+ "step": 441
31
+ },
32
+ {
33
+ "epoch": 0.10010214504596528,
34
+ "grad_norm": 22.1564998626709,
35
+ "learning_rate": 1.981613891726251e-06,
36
+ "loss": 7.9463,
37
+ "step": 588
38
+ },
39
+ {
40
+ "epoch": 0.12512768130745658,
41
+ "grad_norm": 20.11876106262207,
42
+ "learning_rate": 2.4821246169560777e-06,
43
+ "loss": 7.2108,
44
+ "step": 735
45
+ },
46
+ {
47
+ "epoch": 0.12512768130745658,
48
+ "eval_nli-pairs_loss": 6.905651569366455,
49
+ "eval_nli-pairs_runtime": 4.0844,
50
+ "eval_nli-pairs_samples_per_second": 36.725,
51
+ "eval_nli-pairs_steps_per_second": 1.224,
52
+ "eval_sts-test_pearson_cosine": 0.3740256550072784,
53
+ "eval_sts-test_pearson_dot": 0.13384893803205677,
54
+ "eval_sts-test_pearson_euclidean": 0.3912387619869807,
55
+ "eval_sts-test_pearson_manhattan": 0.4202605137823524,
56
+ "eval_sts-test_pearson_max": 0.4202605137823524,
57
+ "eval_sts-test_spearman_cosine": 0.37210107338950205,
58
+ "eval_sts-test_spearman_dot": 0.12092409843417483,
59
+ "eval_sts-test_spearman_euclidean": 0.39172287978780546,
60
+ "eval_sts-test_spearman_manhattan": 0.4169664738563951,
61
+ "eval_sts-test_spearman_max": 0.4169664738563951,
62
+ "step": 735
63
+ },
64
+ {
65
+ "epoch": 0.12512768130745658,
66
+ "eval_vitaminc-pairs_loss": 5.720878601074219,
67
+ "eval_vitaminc-pairs_runtime": 2.1703,
68
+ "eval_vitaminc-pairs_samples_per_second": 69.115,
69
+ "eval_vitaminc-pairs_steps_per_second": 2.304,
70
+ "step": 735
71
+ },
72
+ {
73
+ "epoch": 0.12512768130745658,
74
+ "eval_qnli-contrastive_loss": 8.1649751663208,
75
+ "eval_qnli-contrastive_runtime": 0.4937,
76
+ "eval_qnli-contrastive_samples_per_second": 303.841,
77
+ "eval_qnli-contrastive_steps_per_second": 10.128,
78
+ "step": 735
79
+ },
80
+ {
81
+ "epoch": 0.12512768130745658,
82
+ "eval_scitail-pairs-qa_loss": 3.7859296798706055,
83
+ "eval_scitail-pairs-qa_runtime": 1.1509,
84
+ "eval_scitail-pairs-qa_samples_per_second": 130.329,
85
+ "eval_scitail-pairs-qa_steps_per_second": 4.344,
86
+ "step": 735
87
+ },
88
+ {
89
+ "epoch": 0.12512768130745658,
90
+ "eval_scitail-pairs-pos_loss": 3.9919917583465576,
91
+ "eval_scitail-pairs-pos_runtime": 2.1442,
92
+ "eval_scitail-pairs-pos_samples_per_second": 69.956,
93
+ "eval_scitail-pairs-pos_steps_per_second": 2.332,
94
+ "step": 735
95
+ },
96
+ {
97
+ "epoch": 0.12512768130745658,
98
+ "eval_xsum-pairs_loss": 4.600368976593018,
99
+ "eval_xsum-pairs_runtime": 2.26,
100
+ "eval_xsum-pairs_samples_per_second": 66.371,
101
+ "eval_xsum-pairs_steps_per_second": 2.212,
102
+ "step": 735
103
+ },
104
+ {
105
+ "epoch": 0.12512768130745658,
106
+ "eval_compression-pairs_loss": 3.3037569522857666,
107
+ "eval_compression-pairs_runtime": 0.449,
108
+ "eval_compression-pairs_samples_per_second": 334.078,
109
+ "eval_compression-pairs_steps_per_second": 11.136,
110
+ "step": 735
111
+ },
112
+ {
113
+ "epoch": 0.12512768130745658,
114
+ "eval_sciq_pairs_loss": 10.214456558227539,
115
+ "eval_sciq_pairs_runtime": 7.1179,
116
+ "eval_sciq_pairs_samples_per_second": 21.074,
117
+ "eval_sciq_pairs_steps_per_second": 0.702,
118
+ "step": 735
119
+ },
120
+ {
121
+ "epoch": 0.12512768130745658,
122
+ "eval_qasc_pairs_loss": 10.58031940460205,
123
+ "eval_qasc_pairs_runtime": 2.0175,
124
+ "eval_qasc_pairs_samples_per_second": 74.348,
125
+ "eval_qasc_pairs_steps_per_second": 2.478,
126
+ "step": 735
127
+ },
128
+ {
129
+ "epoch": 0.12512768130745658,
130
+ "eval_openbookqa_pairs_loss": 7.862658977508545,
131
+ "eval_openbookqa_pairs_runtime": 0.8571,
132
+ "eval_openbookqa_pairs_samples_per_second": 120.168,
133
+ "eval_openbookqa_pairs_steps_per_second": 4.667,
134
+ "step": 735
135
+ },
136
+ {
137
+ "epoch": 0.12512768130745658,
138
+ "eval_msmarco_pairs_loss": 8.754273414611816,
139
+ "eval_msmarco_pairs_runtime": 2.7533,
140
+ "eval_msmarco_pairs_samples_per_second": 54.481,
141
+ "eval_msmarco_pairs_steps_per_second": 1.816,
142
+ "step": 735
143
+ },
144
+ {
145
+ "epoch": 0.12512768130745658,
146
+ "eval_nq_pairs_loss": 8.415486335754395,
147
+ "eval_nq_pairs_runtime": 5.0894,
148
+ "eval_nq_pairs_samples_per_second": 29.473,
149
+ "eval_nq_pairs_steps_per_second": 0.982,
150
+ "step": 735
151
+ },
152
+ {
153
+ "epoch": 0.12512768130745658,
154
+ "eval_trivia_pairs_loss": 9.051105499267578,
155
+ "eval_trivia_pairs_runtime": 9.5498,
156
+ "eval_trivia_pairs_samples_per_second": 15.707,
157
+ "eval_trivia_pairs_steps_per_second": 0.524,
158
+ "step": 735
159
+ },
160
+ {
161
+ "epoch": 0.12512768130745658,
162
+ "eval_quora_pairs_loss": 4.5232110023498535,
163
+ "eval_quora_pairs_runtime": 1.1469,
164
+ "eval_quora_pairs_samples_per_second": 130.785,
165
+ "eval_quora_pairs_steps_per_second": 4.36,
166
+ "step": 735
167
+ },
168
+ {
169
+ "epoch": 0.12512768130745658,
170
+ "eval_gooaq_pairs_loss": 7.579105854034424,
171
+ "eval_gooaq_pairs_runtime": 2.0491,
172
+ "eval_gooaq_pairs_samples_per_second": 73.203,
173
+ "eval_gooaq_pairs_steps_per_second": 2.44,
174
+ "step": 735
175
+ },
176
+ {
177
+ "epoch": 0.1501532175689479,
178
+ "grad_norm": 31.7736759185791,
179
+ "learning_rate": 2.982635342185904e-06,
180
+ "loss": 6.7709,
181
+ "step": 882
182
+ },
183
+ {
184
+ "epoch": 0.1751787538304392,
185
+ "grad_norm": 31.57339096069336,
186
+ "learning_rate": 3.4831460674157306e-06,
187
+ "loss": 6.1746,
188
+ "step": 1029
189
+ },
190
+ {
191
+ "epoch": 0.20020429009193055,
192
+ "grad_norm": 25.392702102661133,
193
+ "learning_rate": 3.9836567926455565e-06,
194
+ "loss": 5.7706,
195
+ "step": 1176
196
+ },
197
+ {
198
+ "epoch": 0.22522982635342187,
199
+ "grad_norm": 32.390472412109375,
200
+ "learning_rate": 4.484167517875383e-06,
201
+ "loss": 5.7283,
202
+ "step": 1323
203
+ },
204
+ {
205
+ "epoch": 0.25025536261491316,
206
+ "grad_norm": 18.85039520263672,
207
+ "learning_rate": 4.98467824310521e-06,
208
+ "loss": 5.1856,
209
+ "step": 1470
210
+ },
211
+ {
212
+ "epoch": 0.25025536261491316,
213
+ "eval_nli-pairs_loss": 4.352054119110107,
214
+ "eval_nli-pairs_runtime": 4.1476,
215
+ "eval_nli-pairs_samples_per_second": 36.165,
216
+ "eval_nli-pairs_steps_per_second": 1.206,
217
+ "eval_sts-test_pearson_cosine": 0.6694155778571752,
218
+ "eval_sts-test_pearson_dot": 0.5201102118957572,
219
+ "eval_sts-test_pearson_euclidean": 0.6613028243200022,
220
+ "eval_sts-test_pearson_manhattan": 0.6670710500315469,
221
+ "eval_sts-test_pearson_max": 0.6694155778571752,
222
+ "eval_sts-test_spearman_cosine": 0.6367853204388882,
223
+ "eval_sts-test_spearman_dot": 0.4940207180607985,
224
+ "eval_sts-test_spearman_euclidean": 0.6391132775161348,
225
+ "eval_sts-test_spearman_manhattan": 0.6446159957787251,
226
+ "eval_sts-test_spearman_max": 0.6446159957787251,
227
+ "step": 1470
228
+ },
229
+ {
230
+ "epoch": 0.25025536261491316,
231
+ "eval_vitaminc-pairs_loss": 3.4987735748291016,
232
+ "eval_vitaminc-pairs_runtime": 2.1678,
233
+ "eval_vitaminc-pairs_samples_per_second": 69.194,
234
+ "eval_vitaminc-pairs_steps_per_second": 2.306,
235
+ "step": 1470
236
+ },
237
+ {
238
+ "epoch": 0.25025536261491316,
239
+ "eval_qnli-contrastive_loss": 12.915559768676758,
240
+ "eval_qnli-contrastive_runtime": 0.4918,
241
+ "eval_qnli-contrastive_samples_per_second": 304.99,
242
+ "eval_qnli-contrastive_steps_per_second": 10.166,
243
+ "step": 1470
244
+ },
245
+ {
246
+ "epoch": 0.25025536261491316,
247
+ "eval_scitail-pairs-qa_loss": 1.3250077962875366,
248
+ "eval_scitail-pairs-qa_runtime": 1.154,
249
+ "eval_scitail-pairs-qa_samples_per_second": 129.984,
250
+ "eval_scitail-pairs-qa_steps_per_second": 4.333,
251
+ "step": 1470
252
+ },
253
+ {
254
+ "epoch": 0.25025536261491316,
255
+ "eval_scitail-pairs-pos_loss": 2.457335948944092,
256
+ "eval_scitail-pairs-pos_runtime": 2.1475,
257
+ "eval_scitail-pairs-pos_samples_per_second": 69.85,
258
+ "eval_scitail-pairs-pos_steps_per_second": 2.328,
259
+ "step": 1470
260
+ },
261
+ {
262
+ "epoch": 0.25025536261491316,
263
+ "eval_xsum-pairs_loss": 3.071201801300049,
264
+ "eval_xsum-pairs_runtime": 2.2634,
265
+ "eval_xsum-pairs_samples_per_second": 66.271,
266
+ "eval_xsum-pairs_steps_per_second": 2.209,
267
+ "step": 1470
268
+ },
269
+ {
270
+ "epoch": 0.25025536261491316,
271
+ "eval_compression-pairs_loss": 2.0629916191101074,
272
+ "eval_compression-pairs_runtime": 0.4529,
273
+ "eval_compression-pairs_samples_per_second": 331.23,
274
+ "eval_compression-pairs_steps_per_second": 11.041,
275
+ "step": 1470
276
+ },
277
+ {
278
+ "epoch": 0.25025536261491316,
279
+ "eval_sciq_pairs_loss": 9.06814193725586,
280
+ "eval_sciq_pairs_runtime": 7.1445,
281
+ "eval_sciq_pairs_samples_per_second": 20.995,
282
+ "eval_sciq_pairs_steps_per_second": 0.7,
283
+ "step": 1470
284
+ },
285
+ {
286
+ "epoch": 0.25025536261491316,
287
+ "eval_qasc_pairs_loss": 9.245658874511719,
288
+ "eval_qasc_pairs_runtime": 2.0471,
289
+ "eval_qasc_pairs_samples_per_second": 73.274,
290
+ "eval_qasc_pairs_steps_per_second": 2.442,
291
+ "step": 1470
292
+ },
293
+ {
294
+ "epoch": 0.25025536261491316,
295
+ "eval_openbookqa_pairs_loss": 5.652446746826172,
296
+ "eval_openbookqa_pairs_runtime": 0.8946,
297
+ "eval_openbookqa_pairs_samples_per_second": 115.14,
298
+ "eval_openbookqa_pairs_steps_per_second": 4.471,
299
+ "step": 1470
300
+ },
301
+ {
302
+ "epoch": 0.25025536261491316,
303
+ "eval_msmarco_pairs_loss": 4.844855785369873,
304
+ "eval_msmarco_pairs_runtime": 2.7887,
305
+ "eval_msmarco_pairs_samples_per_second": 53.788,
306
+ "eval_msmarco_pairs_steps_per_second": 1.793,
307
+ "step": 1470
308
+ },
309
+ {
310
+ "epoch": 0.25025536261491316,
311
+ "eval_nq_pairs_loss": 5.023958206176758,
312
+ "eval_nq_pairs_runtime": 5.0823,
313
+ "eval_nq_pairs_samples_per_second": 29.514,
314
+ "eval_nq_pairs_steps_per_second": 0.984,
315
+ "step": 1470
316
+ },
317
+ {
318
+ "epoch": 0.25025536261491316,
319
+ "eval_trivia_pairs_loss": 5.2907304763793945,
320
+ "eval_trivia_pairs_runtime": 9.6673,
321
+ "eval_trivia_pairs_samples_per_second": 15.516,
322
+ "eval_trivia_pairs_steps_per_second": 0.517,
323
+ "step": 1470
324
+ },
325
+ {
326
+ "epoch": 0.25025536261491316,
327
+ "eval_quora_pairs_loss": 1.5572240352630615,
328
+ "eval_quora_pairs_runtime": 1.1979,
329
+ "eval_quora_pairs_samples_per_second": 125.218,
330
+ "eval_quora_pairs_steps_per_second": 4.174,
331
+ "step": 1470
332
+ },
333
+ {
334
+ "epoch": 0.25025536261491316,
335
+ "eval_gooaq_pairs_loss": 3.970768928527832,
336
+ "eval_gooaq_pairs_runtime": 2.117,
337
+ "eval_gooaq_pairs_samples_per_second": 70.855,
338
+ "eval_gooaq_pairs_steps_per_second": 2.362,
339
+ "step": 1470
340
+ },
341
+ {
342
+ "epoch": 0.2752808988764045,
343
+ "grad_norm": 40.67585754394531,
344
+ "learning_rate": 5.4851889683350365e-06,
345
+ "loss": 4.185,
346
+ "step": 1617
347
+ },
348
+ {
349
+ "epoch": 0.3003064351378958,
350
+ "grad_norm": 45.92570495605469,
351
+ "learning_rate": 5.985699693564862e-06,
352
+ "loss": 4.6367,
353
+ "step": 1764
354
+ },
355
+ {
356
+ "epoch": 0.32533197139938713,
357
+ "grad_norm": 13.566838264465332,
358
+ "learning_rate": 6.486210418794688e-06,
359
+ "loss": 4.3615,
360
+ "step": 1911
361
+ },
362
+ {
363
+ "epoch": 0.3503575076608784,
364
+ "grad_norm": 9.495999336242676,
365
+ "learning_rate": 6.986721144024515e-06,
366
+ "loss": 4.1791,
367
+ "step": 2058
368
+ },
369
+ {
370
+ "epoch": 0.37538304392236976,
371
+ "grad_norm": 32.735416412353516,
372
+ "learning_rate": 7.487231869254341e-06,
373
+ "loss": 4.1051,
374
+ "step": 2205
375
+ },
376
+ {
377
+ "epoch": 0.37538304392236976,
378
+ "eval_nli-pairs_loss": 3.2717113494873047,
379
+ "eval_nli-pairs_runtime": 4.0124,
380
+ "eval_nli-pairs_samples_per_second": 37.384,
381
+ "eval_nli-pairs_steps_per_second": 1.246,
382
+ "eval_sts-test_pearson_cosine": 0.6958570089637609,
383
+ "eval_sts-test_pearson_dot": 0.5824298957890577,
384
+ "eval_sts-test_pearson_euclidean": 0.6893962819387462,
385
+ "eval_sts-test_pearson_manhattan": 0.6993681181979946,
386
+ "eval_sts-test_pearson_max": 0.6993681181979946,
387
+ "eval_sts-test_spearman_cosine": 0.6652712160836801,
388
+ "eval_sts-test_spearman_dot": 0.5536505624407877,
389
+ "eval_sts-test_spearman_euclidean": 0.6659844314307678,
390
+ "eval_sts-test_spearman_manhattan": 0.675740852112121,
391
+ "eval_sts-test_spearman_max": 0.675740852112121,
392
+ "step": 2205
393
+ },
394
+ {
395
+ "epoch": 0.37538304392236976,
396
+ "eval_vitaminc-pairs_loss": 2.7197911739349365,
397
+ "eval_vitaminc-pairs_runtime": 2.1625,
398
+ "eval_vitaminc-pairs_samples_per_second": 69.365,
399
+ "eval_vitaminc-pairs_steps_per_second": 2.312,
400
+ "step": 2205
401
+ },
402
+ {
403
+ "epoch": 0.37538304392236976,
404
+ "eval_qnli-contrastive_loss": 9.638714790344238,
405
+ "eval_qnli-contrastive_runtime": 0.4877,
406
+ "eval_qnli-contrastive_samples_per_second": 307.567,
407
+ "eval_qnli-contrastive_steps_per_second": 10.252,
408
+ "step": 2205
409
+ },
410
+ {
411
+ "epoch": 0.37538304392236976,
412
+ "eval_scitail-pairs-qa_loss": 0.8106752634048462,
413
+ "eval_scitail-pairs-qa_runtime": 1.1588,
414
+ "eval_scitail-pairs-qa_samples_per_second": 129.449,
415
+ "eval_scitail-pairs-qa_steps_per_second": 4.315,
416
+ "step": 2205
417
+ },
418
+ {
419
+ "epoch": 0.37538304392236976,
420
+ "eval_scitail-pairs-pos_loss": 1.8894625902175903,
421
+ "eval_scitail-pairs-pos_runtime": 2.1181,
422
+ "eval_scitail-pairs-pos_samples_per_second": 70.817,
423
+ "eval_scitail-pairs-pos_steps_per_second": 2.361,
424
+ "step": 2205
425
+ },
426
+ {
427
+ "epoch": 0.37538304392236976,
428
+ "eval_xsum-pairs_loss": 2.262718439102173,
429
+ "eval_xsum-pairs_runtime": 2.2585,
430
+ "eval_xsum-pairs_samples_per_second": 66.416,
431
+ "eval_xsum-pairs_steps_per_second": 2.214,
432
+ "step": 2205
433
+ },
434
+ {
435
+ "epoch": 0.37538304392236976,
436
+ "eval_compression-pairs_loss": 1.4910633563995361,
437
+ "eval_compression-pairs_runtime": 0.4462,
438
+ "eval_compression-pairs_samples_per_second": 336.204,
439
+ "eval_compression-pairs_steps_per_second": 11.207,
440
+ "step": 2205
441
+ },
442
+ {
443
+ "epoch": 0.37538304392236976,
444
+ "eval_sciq_pairs_loss": 8.59740161895752,
445
+ "eval_sciq_pairs_runtime": 7.1845,
446
+ "eval_sciq_pairs_samples_per_second": 20.878,
447
+ "eval_sciq_pairs_steps_per_second": 0.696,
448
+ "step": 2205
449
+ },
450
+ {
451
+ "epoch": 0.37538304392236976,
452
+ "eval_qasc_pairs_loss": 8.103879928588867,
453
+ "eval_qasc_pairs_runtime": 2.0762,
454
+ "eval_qasc_pairs_samples_per_second": 72.246,
455
+ "eval_qasc_pairs_steps_per_second": 2.408,
456
+ "step": 2205
457
+ },
458
+ {
459
+ "epoch": 0.37538304392236976,
460
+ "eval_openbookqa_pairs_loss": 5.090969562530518,
461
+ "eval_openbookqa_pairs_runtime": 0.89,
462
+ "eval_openbookqa_pairs_samples_per_second": 115.726,
463
+ "eval_openbookqa_pairs_steps_per_second": 4.494,
464
+ "step": 2205
465
+ },
466
+ {
467
+ "epoch": 0.37538304392236976,
468
+ "eval_msmarco_pairs_loss": 3.9566943645477295,
469
+ "eval_msmarco_pairs_runtime": 2.8183,
470
+ "eval_msmarco_pairs_samples_per_second": 53.223,
471
+ "eval_msmarco_pairs_steps_per_second": 1.774,
472
+ "step": 2205
473
+ },
474
+ {
475
+ "epoch": 0.37538304392236976,
476
+ "eval_nq_pairs_loss": 4.009054183959961,
477
+ "eval_nq_pairs_runtime": 5.0219,
478
+ "eval_nq_pairs_samples_per_second": 29.869,
479
+ "eval_nq_pairs_steps_per_second": 0.996,
480
+ "step": 2205
481
+ },
482
+ {
483
+ "epoch": 0.37538304392236976,
484
+ "eval_trivia_pairs_loss": 4.286431312561035,
485
+ "eval_trivia_pairs_runtime": 9.4975,
486
+ "eval_trivia_pairs_samples_per_second": 15.794,
487
+ "eval_trivia_pairs_steps_per_second": 0.526,
488
+ "step": 2205
489
+ },
490
+ {
491
+ "epoch": 0.37538304392236976,
492
+ "eval_quora_pairs_loss": 1.123273491859436,
493
+ "eval_quora_pairs_runtime": 1.1487,
494
+ "eval_quora_pairs_samples_per_second": 130.586,
495
+ "eval_quora_pairs_steps_per_second": 4.353,
496
+ "step": 2205
497
+ },
498
+ {
499
+ "epoch": 0.37538304392236976,
500
+ "eval_gooaq_pairs_loss": 3.222414255142212,
501
+ "eval_gooaq_pairs_runtime": 2.0173,
502
+ "eval_gooaq_pairs_samples_per_second": 74.357,
503
+ "eval_gooaq_pairs_steps_per_second": 2.479,
504
+ "step": 2205
505
+ },
506
+ {
507
+ "epoch": 0.4004085801838611,
508
+ "grad_norm": 218.56105041503906,
509
+ "learning_rate": 7.987742594484168e-06,
510
+ "loss": 3.7674,
511
+ "step": 2352
512
+ },
513
+ {
514
+ "epoch": 0.4254341164453524,
515
+ "grad_norm": 27.877609252929688,
516
+ "learning_rate": 8.488253319713993e-06,
517
+ "loss": 3.8729,
518
+ "step": 2499
519
+ },
520
+ {
521
+ "epoch": 0.45045965270684374,
522
+ "grad_norm": 33.50013732910156,
523
+ "learning_rate": 8.988764044943822e-06,
524
+ "loss": 3.4527,
525
+ "step": 2646
526
+ },
527
+ {
528
+ "epoch": 0.475485188968335,
529
+ "grad_norm": 14.015911102294922,
530
+ "learning_rate": 9.489274770173647e-06,
531
+ "loss": 3.3545,
532
+ "step": 2793
533
+ },
534
+ {
535
+ "epoch": 0.5005107252298263,
536
+ "grad_norm": 33.59694290161133,
537
+ "learning_rate": 9.989785495403473e-06,
538
+ "loss": 3.3247,
539
+ "step": 2940
540
+ },
541
+ {
542
+ "epoch": 0.5005107252298263,
543
+ "eval_nli-pairs_loss": 2.7121565341949463,
544
+ "eval_nli-pairs_runtime": 4.1564,
545
+ "eval_nli-pairs_samples_per_second": 36.089,
546
+ "eval_nli-pairs_steps_per_second": 1.203,
547
+ "eval_sts-test_pearson_cosine": 0.716623047702725,
548
+ "eval_sts-test_pearson_dot": 0.6128451070598809,
549
+ "eval_sts-test_pearson_euclidean": 0.7138791236031807,
550
+ "eval_sts-test_pearson_manhattan": 0.7213151818687454,
551
+ "eval_sts-test_pearson_max": 0.7213151818687454,
552
+ "eval_sts-test_spearman_cosine": 0.6919792400941177,
553
+ "eval_sts-test_spearman_dot": 0.5867158357121192,
554
+ "eval_sts-test_spearman_euclidean": 0.6925037259567834,
555
+ "eval_sts-test_spearman_manhattan": 0.7008895667910079,
556
+ "eval_sts-test_spearman_max": 0.7008895667910079,
557
+ "step": 2940
558
+ },
559
+ {
560
+ "epoch": 0.5005107252298263,
561
+ "eval_vitaminc-pairs_loss": 2.225992441177368,
562
+ "eval_vitaminc-pairs_runtime": 2.253,
563
+ "eval_vitaminc-pairs_samples_per_second": 66.577,
564
+ "eval_vitaminc-pairs_steps_per_second": 2.219,
565
+ "step": 2940
566
+ },
567
+ {
568
+ "epoch": 0.5005107252298263,
569
+ "eval_qnli-contrastive_loss": 4.92629861831665,
570
+ "eval_qnli-contrastive_runtime": 0.5005,
571
+ "eval_qnli-contrastive_samples_per_second": 299.691,
572
+ "eval_qnli-contrastive_steps_per_second": 9.99,
573
+ "step": 2940
574
+ },
575
+ {
576
+ "epoch": 0.5005107252298263,
577
+ "eval_scitail-pairs-qa_loss": 0.5898066163063049,
578
+ "eval_scitail-pairs-qa_runtime": 1.2227,
579
+ "eval_scitail-pairs-qa_samples_per_second": 122.682,
580
+ "eval_scitail-pairs-qa_steps_per_second": 4.089,
581
+ "step": 2940
582
+ },
583
+ {
584
+ "epoch": 0.5005107252298263,
585
+ "eval_scitail-pairs-pos_loss": 1.4237287044525146,
586
+ "eval_scitail-pairs-pos_runtime": 2.4409,
587
+ "eval_scitail-pairs-pos_samples_per_second": 61.452,
588
+ "eval_scitail-pairs-pos_steps_per_second": 2.048,
589
+ "step": 2940
590
+ },
591
+ {
592
+ "epoch": 0.5005107252298263,
593
+ "eval_xsum-pairs_loss": 1.8388895988464355,
594
+ "eval_xsum-pairs_runtime": 2.2831,
595
+ "eval_xsum-pairs_samples_per_second": 65.7,
596
+ "eval_xsum-pairs_steps_per_second": 2.19,
597
+ "step": 2940
598
+ },
599
+ {
600
+ "epoch": 0.5005107252298263,
601
+ "eval_compression-pairs_loss": 1.1590967178344727,
602
+ "eval_compression-pairs_runtime": 0.5152,
603
+ "eval_compression-pairs_samples_per_second": 291.165,
604
+ "eval_compression-pairs_steps_per_second": 9.706,
605
+ "step": 2940
606
+ },
607
+ {
608
+ "epoch": 0.5005107252298263,
609
+ "eval_sciq_pairs_loss": 8.282496452331543,
610
+ "eval_sciq_pairs_runtime": 7.2871,
611
+ "eval_sciq_pairs_samples_per_second": 20.584,
612
+ "eval_sciq_pairs_steps_per_second": 0.686,
613
+ "step": 2940
614
+ },
615
+ {
616
+ "epoch": 0.5005107252298263,
617
+ "eval_qasc_pairs_loss": 7.817965507507324,
618
+ "eval_qasc_pairs_runtime": 2.0211,
619
+ "eval_qasc_pairs_samples_per_second": 74.218,
620
+ "eval_qasc_pairs_steps_per_second": 2.474,
621
+ "step": 2940
622
+ },
623
+ {
624
+ "epoch": 0.5005107252298263,
625
+ "eval_openbookqa_pairs_loss": 4.619383811950684,
626
+ "eval_openbookqa_pairs_runtime": 0.8531,
627
+ "eval_openbookqa_pairs_samples_per_second": 120.731,
628
+ "eval_openbookqa_pairs_steps_per_second": 4.689,
629
+ "step": 2940
630
+ },
631
+ {
632
+ "epoch": 0.5005107252298263,
633
+ "eval_msmarco_pairs_loss": 3.478559970855713,
634
+ "eval_msmarco_pairs_runtime": 2.7512,
635
+ "eval_msmarco_pairs_samples_per_second": 54.522,
636
+ "eval_msmarco_pairs_steps_per_second": 1.817,
637
+ "step": 2940
638
+ },
639
+ {
640
+ "epoch": 0.5005107252298263,
641
+ "eval_nq_pairs_loss": 3.3449866771698,
642
+ "eval_nq_pairs_runtime": 5.0591,
643
+ "eval_nq_pairs_samples_per_second": 29.649,
644
+ "eval_nq_pairs_steps_per_second": 0.988,
645
+ "step": 2940
646
+ },
647
+ {
648
+ "epoch": 0.5005107252298263,
649
+ "eval_trivia_pairs_loss": 3.524484872817993,
650
+ "eval_trivia_pairs_runtime": 9.662,
651
+ "eval_trivia_pairs_samples_per_second": 15.525,
652
+ "eval_trivia_pairs_steps_per_second": 0.517,
653
+ "step": 2940
654
+ },
655
+ {
656
+ "epoch": 0.5005107252298263,
657
+ "eval_quora_pairs_loss": 0.9095575213432312,
658
+ "eval_quora_pairs_runtime": 1.2482,
659
+ "eval_quora_pairs_samples_per_second": 120.175,
660
+ "eval_quora_pairs_steps_per_second": 4.006,
661
+ "step": 2940
662
+ },
663
+ {
664
+ "epoch": 0.5005107252298263,
665
+ "eval_gooaq_pairs_loss": 2.6586034297943115,
666
+ "eval_gooaq_pairs_runtime": 2.1091,
667
+ "eval_gooaq_pairs_samples_per_second": 71.12,
668
+ "eval_gooaq_pairs_steps_per_second": 2.371,
669
+ "step": 2940
670
+ },
671
+ {
672
+ "epoch": 0.5255362614913177,
673
+ "grad_norm": 35.33409118652344,
674
+ "learning_rate": 1.04902962206333e-05,
675
+ "loss": 3.116,
676
+ "step": 3087
677
+ },
678
+ {
679
+ "epoch": 0.550561797752809,
680
+ "grad_norm": 22.29003143310547,
681
+ "learning_rate": 1.0990806945863125e-05,
682
+ "loss": 3.2418,
683
+ "step": 3234
684
+ },
685
+ {
686
+ "epoch": 0.5755873340143003,
687
+ "grad_norm": 31.277965545654297,
688
+ "learning_rate": 1.1491317671092953e-05,
689
+ "loss": 3.0757,
690
+ "step": 3381
691
+ },
692
+ {
693
+ "epoch": 0.6006128702757916,
694
+ "grad_norm": 24.612506866455078,
695
+ "learning_rate": 1.1991828396322778e-05,
696
+ "loss": 2.8524,
697
+ "step": 3528
698
+ },
699
+ {
700
+ "epoch": 0.625638406537283,
701
+ "grad_norm": 25.11741065979004,
702
+ "learning_rate": 1.2492339121552605e-05,
703
+ "loss": 2.6875,
704
+ "step": 3675
705
+ },
706
+ {
707
+ "epoch": 0.625638406537283,
708
+ "eval_nli-pairs_loss": 2.479051113128662,
709
+ "eval_nli-pairs_runtime": 3.9943,
710
+ "eval_nli-pairs_samples_per_second": 37.553,
711
+ "eval_nli-pairs_steps_per_second": 1.252,
712
+ "eval_sts-test_pearson_cosine": 0.7278742453545186,
713
+ "eval_sts-test_pearson_dot": 0.6217650825208566,
714
+ "eval_sts-test_pearson_euclidean": 0.7243228472931561,
715
+ "eval_sts-test_pearson_manhattan": 0.7333297580184588,
716
+ "eval_sts-test_pearson_max": 0.7333297580184588,
717
+ "eval_sts-test_spearman_cosine": 0.7013110457844404,
718
+ "eval_sts-test_spearman_dot": 0.5970993074902947,
719
+ "eval_sts-test_spearman_euclidean": 0.701564129266252,
720
+ "eval_sts-test_spearman_manhattan": 0.7116482009924582,
721
+ "eval_sts-test_spearman_max": 0.7116482009924582,
722
+ "step": 3675
723
+ },
724
+ {
725
+ "epoch": 0.625638406537283,
726
+ "eval_vitaminc-pairs_loss": 1.974273681640625,
727
+ "eval_vitaminc-pairs_runtime": 2.1754,
728
+ "eval_vitaminc-pairs_samples_per_second": 68.953,
729
+ "eval_vitaminc-pairs_steps_per_second": 2.298,
730
+ "step": 3675
731
+ },
732
+ {
733
+ "epoch": 0.625638406537283,
734
+ "eval_qnli-contrastive_loss": 1.7706010341644287,
735
+ "eval_qnli-contrastive_runtime": 0.4866,
736
+ "eval_qnli-contrastive_samples_per_second": 308.244,
737
+ "eval_qnli-contrastive_steps_per_second": 10.275,
738
+ "step": 3675
739
+ },
740
+ {
741
+ "epoch": 0.625638406537283,
742
+ "eval_scitail-pairs-qa_loss": 0.4400452673435211,
743
+ "eval_scitail-pairs-qa_runtime": 1.1519,
744
+ "eval_scitail-pairs-qa_samples_per_second": 130.222,
745
+ "eval_scitail-pairs-qa_steps_per_second": 4.341,
746
+ "step": 3675
747
+ },
748
+ {
749
+ "epoch": 0.625638406537283,
750
+ "eval_scitail-pairs-pos_loss": 1.1909903287887573,
751
+ "eval_scitail-pairs-pos_runtime": 2.1319,
752
+ "eval_scitail-pairs-pos_samples_per_second": 70.36,
753
+ "eval_scitail-pairs-pos_steps_per_second": 2.345,
754
+ "step": 3675
755
+ },
756
+ {
757
+ "epoch": 0.625638406537283,
758
+ "eval_xsum-pairs_loss": 1.4811985492706299,
759
+ "eval_xsum-pairs_runtime": 2.254,
760
+ "eval_xsum-pairs_samples_per_second": 66.548,
761
+ "eval_xsum-pairs_steps_per_second": 2.218,
762
+ "step": 3675
763
+ },
764
+ {
765
+ "epoch": 0.625638406537283,
766
+ "eval_compression-pairs_loss": 0.8453781008720398,
767
+ "eval_compression-pairs_runtime": 0.4401,
768
+ "eval_compression-pairs_samples_per_second": 340.826,
769
+ "eval_compression-pairs_steps_per_second": 11.361,
770
+ "step": 3675
771
+ },
772
+ {
773
+ "epoch": 0.625638406537283,
774
+ "eval_sciq_pairs_loss": 8.014656066894531,
775
+ "eval_sciq_pairs_runtime": 7.0707,
776
+ "eval_sciq_pairs_samples_per_second": 21.214,
777
+ "eval_sciq_pairs_steps_per_second": 0.707,
778
+ "step": 3675
779
+ },
780
+ {
781
+ "epoch": 0.625638406537283,
782
+ "eval_qasc_pairs_loss": 6.9316277503967285,
783
+ "eval_qasc_pairs_runtime": 2.0338,
784
+ "eval_qasc_pairs_samples_per_second": 73.752,
785
+ "eval_qasc_pairs_steps_per_second": 2.458,
786
+ "step": 3675
787
+ },
788
+ {
789
+ "epoch": 0.625638406537283,
790
+ "eval_openbookqa_pairs_loss": 4.21690034866333,
791
+ "eval_openbookqa_pairs_runtime": 0.918,
792
+ "eval_openbookqa_pairs_samples_per_second": 112.202,
793
+ "eval_openbookqa_pairs_steps_per_second": 4.357,
794
+ "step": 3675
795
+ },
796
+ {
797
+ "epoch": 0.625638406537283,
798
+ "eval_msmarco_pairs_loss": 3.0209598541259766,
799
+ "eval_msmarco_pairs_runtime": 2.7749,
800
+ "eval_msmarco_pairs_samples_per_second": 54.056,
801
+ "eval_msmarco_pairs_steps_per_second": 1.802,
802
+ "step": 3675
803
+ },
804
+ {
805
+ "epoch": 0.625638406537283,
806
+ "eval_nq_pairs_loss": 2.956088066101074,
807
+ "eval_nq_pairs_runtime": 5.0024,
808
+ "eval_nq_pairs_samples_per_second": 29.986,
809
+ "eval_nq_pairs_steps_per_second": 1.0,
810
+ "step": 3675
811
+ },
812
+ {
813
+ "epoch": 0.625638406537283,
814
+ "eval_trivia_pairs_loss": 3.17364501953125,
815
+ "eval_trivia_pairs_runtime": 9.4856,
816
+ "eval_trivia_pairs_samples_per_second": 15.813,
817
+ "eval_trivia_pairs_steps_per_second": 0.527,
818
+ "step": 3675
819
+ },
820
+ {
821
+ "epoch": 0.625638406537283,
822
+ "eval_quora_pairs_loss": 0.763593852519989,
823
+ "eval_quora_pairs_runtime": 1.1441,
824
+ "eval_quora_pairs_samples_per_second": 131.104,
825
+ "eval_quora_pairs_steps_per_second": 4.37,
826
+ "step": 3675
827
+ },
828
+ {
829
+ "epoch": 0.625638406537283,
830
+ "eval_gooaq_pairs_loss": 2.3524909019470215,
831
+ "eval_gooaq_pairs_runtime": 2.0161,
832
+ "eval_gooaq_pairs_samples_per_second": 74.4,
833
+ "eval_gooaq_pairs_steps_per_second": 2.48,
834
+ "step": 3675
835
+ },
836
+ {
837
+ "epoch": 0.6506639427987743,
838
+ "grad_norm": 31.163997650146484,
839
+ "learning_rate": 1.2992849846782432e-05,
840
+ "loss": 2.7808,
841
+ "step": 3822
842
+ },
843
+ {
844
+ "epoch": 0.6756894790602656,
845
+ "grad_norm": 14.883658409118652,
846
+ "learning_rate": 1.3493360572012258e-05,
847
+ "loss": 2.5687,
848
+ "step": 3969
849
+ },
850
+ {
851
+ "epoch": 0.7007150153217568,
852
+ "grad_norm": 5.874042987823486,
853
+ "learning_rate": 1.3993871297242083e-05,
854
+ "loss": 2.3034,
855
+ "step": 4116
856
+ },
857
+ {
858
+ "epoch": 0.7257405515832482,
859
+ "grad_norm": 31.464054107666016,
860
+ "learning_rate": 1.4494382022471912e-05,
861
+ "loss": 2.4412,
862
+ "step": 4263
863
+ },
864
+ {
865
+ "epoch": 0.7507660878447395,
866
+ "grad_norm": 16.43915367126465,
867
+ "learning_rate": 1.4994892747701737e-05,
868
+ "loss": 2.3293,
869
+ "step": 4410
870
+ },
871
+ {
872
+ "epoch": 0.7507660878447395,
873
+ "eval_nli-pairs_loss": 2.3226094245910645,
874
+ "eval_nli-pairs_runtime": 4.113,
875
+ "eval_nli-pairs_samples_per_second": 36.47,
876
+ "eval_nli-pairs_steps_per_second": 1.216,
877
+ "eval_sts-test_pearson_cosine": 0.7356971966139032,
878
+ "eval_sts-test_pearson_dot": 0.6150809513049869,
879
+ "eval_sts-test_pearson_euclidean": 0.7330733579988641,
880
+ "eval_sts-test_pearson_manhattan": 0.7423412248131348,
881
+ "eval_sts-test_pearson_max": 0.7423412248131348,
882
+ "eval_sts-test_spearman_cosine": 0.7121899723082045,
883
+ "eval_sts-test_spearman_dot": 0.5926505936679538,
884
+ "eval_sts-test_spearman_euclidean": 0.7130179905407037,
885
+ "eval_sts-test_spearman_manhattan": 0.7227257562995023,
886
+ "eval_sts-test_spearman_max": 0.7227257562995023,
887
+ "step": 4410
888
+ },
889
+ {
890
+ "epoch": 0.7507660878447395,
891
+ "eval_vitaminc-pairs_loss": 1.7956713438034058,
892
+ "eval_vitaminc-pairs_runtime": 2.174,
893
+ "eval_vitaminc-pairs_samples_per_second": 68.996,
894
+ "eval_vitaminc-pairs_steps_per_second": 2.3,
895
+ "step": 4410
896
+ },
897
+ {
898
+ "epoch": 0.7507660878447395,
899
+ "eval_qnli-contrastive_loss": 1.0078614950180054,
900
+ "eval_qnli-contrastive_runtime": 0.4874,
901
+ "eval_qnli-contrastive_samples_per_second": 307.763,
902
+ "eval_qnli-contrastive_steps_per_second": 10.259,
903
+ "step": 4410
904
+ },
905
+ {
906
+ "epoch": 0.7507660878447395,
907
+ "eval_scitail-pairs-qa_loss": 0.36971578001976013,
908
+ "eval_scitail-pairs-qa_runtime": 1.164,
909
+ "eval_scitail-pairs-qa_samples_per_second": 128.863,
910
+ "eval_scitail-pairs-qa_steps_per_second": 4.295,
911
+ "step": 4410
912
+ },
913
+ {
914
+ "epoch": 0.7507660878447395,
915
+ "eval_scitail-pairs-pos_loss": 1.0497769117355347,
916
+ "eval_scitail-pairs-pos_runtime": 2.1205,
917
+ "eval_scitail-pairs-pos_samples_per_second": 70.74,
918
+ "eval_scitail-pairs-pos_steps_per_second": 2.358,
919
+ "step": 4410
920
+ },
921
+ {
922
+ "epoch": 0.7507660878447395,
923
+ "eval_xsum-pairs_loss": 1.1691261529922485,
924
+ "eval_xsum-pairs_runtime": 2.259,
925
+ "eval_xsum-pairs_samples_per_second": 66.401,
926
+ "eval_xsum-pairs_steps_per_second": 2.213,
927
+ "step": 4410
928
+ },
929
+ {
930
+ "epoch": 0.7507660878447395,
931
+ "eval_compression-pairs_loss": 0.5027483105659485,
932
+ "eval_compression-pairs_runtime": 0.4403,
933
+ "eval_compression-pairs_samples_per_second": 340.682,
934
+ "eval_compression-pairs_steps_per_second": 11.356,
935
+ "step": 4410
936
+ },
937
+ {
938
+ "epoch": 0.7507660878447395,
939
+ "eval_sciq_pairs_loss": 7.823739528656006,
940
+ "eval_sciq_pairs_runtime": 7.0738,
941
+ "eval_sciq_pairs_samples_per_second": 21.205,
942
+ "eval_sciq_pairs_steps_per_second": 0.707,
943
+ "step": 4410
944
+ },
945
+ {
946
+ "epoch": 0.7507660878447395,
947
+ "eval_qasc_pairs_loss": 6.404655933380127,
948
+ "eval_qasc_pairs_runtime": 2.0346,
949
+ "eval_qasc_pairs_samples_per_second": 73.723,
950
+ "eval_qasc_pairs_steps_per_second": 2.457,
951
+ "step": 4410
952
+ },
953
+ {
954
+ "epoch": 0.7507660878447395,
955
+ "eval_openbookqa_pairs_loss": 3.857389211654663,
956
+ "eval_openbookqa_pairs_runtime": 0.8544,
957
+ "eval_openbookqa_pairs_samples_per_second": 120.547,
958
+ "eval_openbookqa_pairs_steps_per_second": 4.681,
959
+ "step": 4410
960
+ },
961
+ {
962
+ "epoch": 0.7507660878447395,
963
+ "eval_msmarco_pairs_loss": 2.7028510570526123,
964
+ "eval_msmarco_pairs_runtime": 2.7448,
965
+ "eval_msmarco_pairs_samples_per_second": 54.649,
966
+ "eval_msmarco_pairs_steps_per_second": 1.822,
967
+ "step": 4410
968
+ },
969
+ {
970
+ "epoch": 0.7507660878447395,
971
+ "eval_nq_pairs_loss": 2.679351329803467,
972
+ "eval_nq_pairs_runtime": 5.067,
973
+ "eval_nq_pairs_samples_per_second": 29.603,
974
+ "eval_nq_pairs_steps_per_second": 0.987,
975
+ "step": 4410
976
+ },
977
+ {
978
+ "epoch": 0.7507660878447395,
979
+ "eval_trivia_pairs_loss": 2.8798065185546875,
980
+ "eval_trivia_pairs_runtime": 9.5449,
981
+ "eval_trivia_pairs_samples_per_second": 15.715,
982
+ "eval_trivia_pairs_steps_per_second": 0.524,
983
+ "step": 4410
984
+ },
985
+ {
986
+ "epoch": 0.7507660878447395,
987
+ "eval_quora_pairs_loss": 0.6825175285339355,
988
+ "eval_quora_pairs_runtime": 1.1431,
989
+ "eval_quora_pairs_samples_per_second": 131.221,
990
+ "eval_quora_pairs_steps_per_second": 4.374,
991
+ "step": 4410
992
+ },
993
+ {
994
+ "epoch": 0.7507660878447395,
995
+ "eval_gooaq_pairs_loss": 2.0472166538238525,
996
+ "eval_gooaq_pairs_runtime": 2.0218,
997
+ "eval_gooaq_pairs_samples_per_second": 74.191,
998
+ "eval_gooaq_pairs_steps_per_second": 2.473,
999
+ "step": 4410
1000
+ },
1001
+ {
1002
+ "epoch": 0.7757916241062308,
1003
+ "grad_norm": 4.2425055503845215,
1004
+ "learning_rate": 1.5495403472931565e-05,
1005
+ "loss": 2.3651,
1006
+ "step": 4557
1007
+ },
1008
+ {
1009
+ "epoch": 0.8008171603677222,
1010
+ "grad_norm": 22.42776107788086,
1011
+ "learning_rate": 1.5995914198161388e-05,
1012
+ "loss": 2.6296,
1013
+ "step": 4704
1014
+ },
1015
+ {
1016
+ "epoch": 0.8258426966292135,
1017
+ "grad_norm": 21.169517517089844,
1018
+ "learning_rate": 1.6496424923391215e-05,
1019
+ "loss": 2.2108,
1020
+ "step": 4851
1021
+ },
1022
+ {
1023
+ "epoch": 0.8508682328907048,
1024
+ "grad_norm": 23.326181411743164,
1025
+ "learning_rate": 1.699693564862104e-05,
1026
+ "loss": 2.1852,
1027
+ "step": 4998
1028
+ },
1029
+ {
1030
+ "epoch": 0.8758937691521961,
1031
+ "grad_norm": 24.574176788330078,
1032
+ "learning_rate": 1.7497446373850868e-05,
1033
+ "loss": 2.2944,
1034
+ "step": 5145
1035
+ },
1036
+ {
1037
+ "epoch": 0.8758937691521961,
1038
+ "eval_nli-pairs_loss": 2.0634915828704834,
1039
+ "eval_nli-pairs_runtime": 4.0019,
1040
+ "eval_nli-pairs_samples_per_second": 37.482,
1041
+ "eval_nli-pairs_steps_per_second": 1.249,
1042
+ "eval_sts-test_pearson_cosine": 0.7466390532977636,
1043
+ "eval_sts-test_pearson_dot": 0.612259458274589,
1044
+ "eval_sts-test_pearson_euclidean": 0.7432536346376271,
1045
+ "eval_sts-test_pearson_manhattan": 0.7500490179501229,
1046
+ "eval_sts-test_pearson_max": 0.7500490179501229,
1047
+ "eval_sts-test_spearman_cosine": 0.728273260456201,
1048
+ "eval_sts-test_spearman_dot": 0.5960115087190596,
1049
+ "eval_sts-test_spearman_euclidean": 0.7272394395622148,
1050
+ "eval_sts-test_spearman_manhattan": 0.7334149564445704,
1051
+ "eval_sts-test_spearman_max": 0.7334149564445704,
1052
+ "step": 5145
1053
+ },
1054
+ {
1055
+ "epoch": 0.8758937691521961,
1056
+ "eval_vitaminc-pairs_loss": 1.638654112815857,
1057
+ "eval_vitaminc-pairs_runtime": 2.1637,
1058
+ "eval_vitaminc-pairs_samples_per_second": 69.327,
1059
+ "eval_vitaminc-pairs_steps_per_second": 2.311,
1060
+ "step": 5145
1061
+ },
1062
+ {
1063
+ "epoch": 0.8758937691521961,
1064
+ "eval_qnli-contrastive_loss": 0.9639705419540405,
1065
+ "eval_qnli-contrastive_runtime": 0.4889,
1066
+ "eval_qnli-contrastive_samples_per_second": 306.825,
1067
+ "eval_qnli-contrastive_steps_per_second": 10.228,
1068
+ "step": 5145
1069
+ },
1070
+ {
1071
+ "epoch": 0.8758937691521961,
1072
+ "eval_scitail-pairs-qa_loss": 0.31595128774642944,
1073
+ "eval_scitail-pairs-qa_runtime": 1.1467,
1074
+ "eval_scitail-pairs-qa_samples_per_second": 130.806,
1075
+ "eval_scitail-pairs-qa_steps_per_second": 4.36,
1076
+ "step": 5145
1077
+ },
1078
+ {
1079
+ "epoch": 0.8758937691521961,
1080
+ "eval_scitail-pairs-pos_loss": 0.9187478423118591,
1081
+ "eval_scitail-pairs-pos_runtime": 2.1273,
1082
+ "eval_scitail-pairs-pos_samples_per_second": 70.512,
1083
+ "eval_scitail-pairs-pos_steps_per_second": 2.35,
1084
+ "step": 5145
1085
+ },
1086
+ {
1087
+ "epoch": 0.8758937691521961,
1088
+ "eval_xsum-pairs_loss": 1.060194492340088,
1089
+ "eval_xsum-pairs_runtime": 2.2836,
1090
+ "eval_xsum-pairs_samples_per_second": 65.686,
1091
+ "eval_xsum-pairs_steps_per_second": 2.19,
1092
+ "step": 5145
1093
+ },
1094
+ {
1095
+ "epoch": 0.8758937691521961,
1096
+ "eval_compression-pairs_loss": 0.41078585386276245,
1097
+ "eval_compression-pairs_runtime": 0.4434,
1098
+ "eval_compression-pairs_samples_per_second": 338.276,
1099
+ "eval_compression-pairs_steps_per_second": 11.276,
1100
+ "step": 5145
1101
+ },
1102
+ {
1103
+ "epoch": 0.8758937691521961,
1104
+ "eval_sciq_pairs_loss": 7.577760696411133,
1105
+ "eval_sciq_pairs_runtime": 7.1025,
1106
+ "eval_sciq_pairs_samples_per_second": 21.119,
1107
+ "eval_sciq_pairs_steps_per_second": 0.704,
1108
+ "step": 5145
1109
+ },
1110
+ {
1111
+ "epoch": 0.8758937691521961,
1112
+ "eval_qasc_pairs_loss": 6.353766918182373,
1113
+ "eval_qasc_pairs_runtime": 2.0113,
1114
+ "eval_qasc_pairs_samples_per_second": 74.58,
1115
+ "eval_qasc_pairs_steps_per_second": 2.486,
1116
+ "step": 5145
1117
+ },
1118
+ {
1119
+ "epoch": 0.8758937691521961,
1120
+ "eval_openbookqa_pairs_loss": 3.7140932083129883,
1121
+ "eval_openbookqa_pairs_runtime": 0.8529,
1122
+ "eval_openbookqa_pairs_samples_per_second": 120.762,
1123
+ "eval_openbookqa_pairs_steps_per_second": 4.69,
1124
+ "step": 5145
1125
+ },
1126
+ {
1127
+ "epoch": 0.8758937691521961,
1128
+ "eval_msmarco_pairs_loss": 2.3862576484680176,
1129
+ "eval_msmarco_pairs_runtime": 2.8953,
1130
+ "eval_msmarco_pairs_samples_per_second": 51.808,
1131
+ "eval_msmarco_pairs_steps_per_second": 1.727,
1132
+ "step": 5145
1133
+ },
1134
+ {
1135
+ "epoch": 0.8758937691521961,
1136
+ "eval_nq_pairs_loss": 2.3543190956115723,
1137
+ "eval_nq_pairs_runtime": 5.0048,
1138
+ "eval_nq_pairs_samples_per_second": 29.971,
1139
+ "eval_nq_pairs_steps_per_second": 0.999,
1140
+ "step": 5145
1141
+ },
1142
+ {
1143
+ "epoch": 0.8758937691521961,
1144
+ "eval_trivia_pairs_loss": 2.494807481765747,
1145
+ "eval_trivia_pairs_runtime": 9.5513,
1146
+ "eval_trivia_pairs_samples_per_second": 15.705,
1147
+ "eval_trivia_pairs_steps_per_second": 0.523,
1148
+ "step": 5145
1149
+ },
1150
+ {
1151
+ "epoch": 0.8758937691521961,
1152
+ "eval_quora_pairs_loss": 0.6137441992759705,
1153
+ "eval_quora_pairs_runtime": 1.1541,
1154
+ "eval_quora_pairs_samples_per_second": 129.967,
1155
+ "eval_quora_pairs_steps_per_second": 4.332,
1156
+ "step": 5145
1157
+ },
1158
+ {
1159
+ "epoch": 0.8758937691521961,
1160
+ "eval_gooaq_pairs_loss": 1.8279658555984497,
1161
+ "eval_gooaq_pairs_runtime": 2.0951,
1162
+ "eval_gooaq_pairs_samples_per_second": 71.595,
1163
+ "eval_gooaq_pairs_steps_per_second": 2.387,
1164
+ "step": 5145
1165
+ },
1166
+ {
1167
+ "epoch": 0.9009193054136875,
1168
+ "grad_norm": 10.590804100036621,
1169
+ "learning_rate": 1.7997957099080695e-05,
1170
+ "loss": 2.2133,
1171
+ "step": 5292
1172
+ },
1173
+ {
1174
+ "epoch": 0.9259448416751788,
1175
+ "grad_norm": 18.527711868286133,
1176
+ "learning_rate": 1.849846782431052e-05,
1177
+ "loss": 2.2255,
1178
+ "step": 5439
1179
+ },
1180
+ {
1181
+ "epoch": 0.95097037793667,
1182
+ "grad_norm": 2.617710828781128,
1183
+ "learning_rate": 1.8995573714674838e-05,
1184
+ "loss": 2.3502,
1185
+ "step": 5586
1186
+ },
1187
+ {
1188
+ "epoch": 0.9759959141981613,
1189
+ "grad_norm": 19.551551818847656,
1190
+ "learning_rate": 1.9496084439904668e-05,
1191
+ "loss": 1.8964,
1192
+ "step": 5733
1193
+ },
1194
+ {
1195
+ "epoch": 1.0010214504596526,
1196
+ "grad_norm": 11.783225059509277,
1197
+ "learning_rate": 1.999319033026898e-05,
1198
+ "loss": 1.913,
1199
+ "step": 5880
1200
+ },
1201
+ {
1202
+ "epoch": 1.0010214504596526,
1203
+ "eval_nli-pairs_loss": 1.9677053689956665,
1204
+ "eval_nli-pairs_runtime": 4.3863,
1205
+ "eval_nli-pairs_samples_per_second": 34.198,
1206
+ "eval_nli-pairs_steps_per_second": 1.14,
1207
+ "eval_sts-test_pearson_cosine": 0.7531824359441671,
1208
+ "eval_sts-test_pearson_dot": 0.602579906515822,
1209
+ "eval_sts-test_pearson_euclidean": 0.7486763477944213,
1210
+ "eval_sts-test_pearson_manhattan": 0.7566220287347274,
1211
+ "eval_sts-test_pearson_max": 0.7566220287347274,
1212
+ "eval_sts-test_spearman_cosine": 0.7387792578665129,
1213
+ "eval_sts-test_spearman_dot": 0.5926594656319394,
1214
+ "eval_sts-test_spearman_euclidean": 0.733653805383597,
1215
+ "eval_sts-test_spearman_manhattan": 0.7420657558603486,
1216
+ "eval_sts-test_spearman_max": 0.7420657558603486,
1217
+ "step": 5880
1218
+ },
1219
+ {
1220
+ "epoch": 1.0010214504596526,
1221
+ "eval_vitaminc-pairs_loss": 1.4394291639328003,
1222
+ "eval_vitaminc-pairs_runtime": 2.2575,
1223
+ "eval_vitaminc-pairs_samples_per_second": 66.446,
1224
+ "eval_vitaminc-pairs_steps_per_second": 2.215,
1225
+ "step": 5880
1226
+ },
1227
+ {
1228
+ "epoch": 1.0010214504596526,
1229
+ "eval_qnli-contrastive_loss": 0.45715218782424927,
1230
+ "eval_qnli-contrastive_runtime": 0.501,
1231
+ "eval_qnli-contrastive_samples_per_second": 299.385,
1232
+ "eval_qnli-contrastive_steps_per_second": 9.979,
1233
+ "step": 5880
1234
+ },
1235
+ {
1236
+ "epoch": 1.0010214504596526,
1237
+ "eval_scitail-pairs-qa_loss": 0.26679515838623047,
1238
+ "eval_scitail-pairs-qa_runtime": 1.4342,
1239
+ "eval_scitail-pairs-qa_samples_per_second": 104.587,
1240
+ "eval_scitail-pairs-qa_steps_per_second": 3.486,
1241
+ "step": 5880
1242
+ },
1243
+ {
1244
+ "epoch": 1.0010214504596526,
1245
+ "eval_scitail-pairs-pos_loss": 0.8628473281860352,
1246
+ "eval_scitail-pairs-pos_runtime": 2.3485,
1247
+ "eval_scitail-pairs-pos_samples_per_second": 63.871,
1248
+ "eval_scitail-pairs-pos_steps_per_second": 2.129,
1249
+ "step": 5880
1250
+ },
1251
+ {
1252
+ "epoch": 1.0010214504596526,
1253
+ "eval_xsum-pairs_loss": 0.9014443755149841,
1254
+ "eval_xsum-pairs_runtime": 2.2896,
1255
+ "eval_xsum-pairs_samples_per_second": 65.513,
1256
+ "eval_xsum-pairs_steps_per_second": 2.184,
1257
+ "step": 5880
1258
+ },
1259
+ {
1260
+ "epoch": 1.0010214504596526,
1261
+ "eval_compression-pairs_loss": 0.3047434389591217,
1262
+ "eval_compression-pairs_runtime": 0.4852,
1263
+ "eval_compression-pairs_samples_per_second": 309.163,
1264
+ "eval_compression-pairs_steps_per_second": 10.305,
1265
+ "step": 5880
1266
+ },
1267
+ {
1268
+ "epoch": 1.0010214504596526,
1269
+ "eval_sciq_pairs_loss": 1.091601848602295,
1270
+ "eval_sciq_pairs_runtime": 7.3046,
1271
+ "eval_sciq_pairs_samples_per_second": 20.535,
1272
+ "eval_sciq_pairs_steps_per_second": 0.684,
1273
+ "step": 5880
1274
+ },
1275
+ {
1276
+ "epoch": 1.0010214504596526,
1277
+ "eval_qasc_pairs_loss": 5.947833061218262,
1278
+ "eval_qasc_pairs_runtime": 2.1787,
1279
+ "eval_qasc_pairs_samples_per_second": 68.849,
1280
+ "eval_qasc_pairs_steps_per_second": 2.295,
1281
+ "step": 5880
1282
+ },
1283
+ {
1284
+ "epoch": 1.0010214504596526,
1285
+ "eval_openbookqa_pairs_loss": 3.4724366664886475,
1286
+ "eval_openbookqa_pairs_runtime": 0.9106,
1287
+ "eval_openbookqa_pairs_samples_per_second": 113.111,
1288
+ "eval_openbookqa_pairs_steps_per_second": 4.393,
1289
+ "step": 5880
1290
+ },
1291
+ {
1292
+ "epoch": 1.0010214504596526,
1293
+ "eval_msmarco_pairs_loss": 2.1638240814208984,
1294
+ "eval_msmarco_pairs_runtime": 2.82,
1295
+ "eval_msmarco_pairs_samples_per_second": 53.191,
1296
+ "eval_msmarco_pairs_steps_per_second": 1.773,
1297
+ "step": 5880
1298
+ },
1299
+ {
1300
+ "epoch": 1.0010214504596526,
1301
+ "eval_nq_pairs_loss": 2.110903739929199,
1302
+ "eval_nq_pairs_runtime": 5.2303,
1303
+ "eval_nq_pairs_samples_per_second": 28.679,
1304
+ "eval_nq_pairs_steps_per_second": 0.956,
1305
+ "step": 5880
1306
+ },
1307
+ {
1308
+ "epoch": 1.0010214504596526,
1309
+ "eval_trivia_pairs_loss": 2.3711097240448,
1310
+ "eval_trivia_pairs_runtime": 9.6247,
1311
+ "eval_trivia_pairs_samples_per_second": 15.585,
1312
+ "eval_trivia_pairs_steps_per_second": 0.519,
1313
+ "step": 5880
1314
+ },
1315
+ {
1316
+ "epoch": 1.0010214504596526,
1317
+ "eval_quora_pairs_loss": 0.5216041803359985,
1318
+ "eval_quora_pairs_runtime": 1.3072,
1319
+ "eval_quora_pairs_samples_per_second": 114.749,
1320
+ "eval_quora_pairs_steps_per_second": 3.825,
1321
+ "step": 5880
1322
+ },
1323
+ {
1324
+ "epoch": 1.0010214504596526,
1325
+ "eval_gooaq_pairs_loss": 1.7041363716125488,
1326
+ "eval_gooaq_pairs_runtime": 2.0973,
1327
+ "eval_gooaq_pairs_samples_per_second": 71.521,
1328
+ "eval_gooaq_pairs_steps_per_second": 2.384,
1329
+ "step": 5880
1330
+ },
1331
+ {
1332
+ "epoch": 1.026046986721144,
1333
+ "grad_norm": 17.308378219604492,
1334
+ "learning_rate": 2.0493701055498808e-05,
1335
+ "loss": 1.7772,
1336
+ "step": 6027
1337
+ },
1338
+ {
1339
+ "epoch": 1.0510725229826354,
1340
+ "grad_norm": 20.248981475830078,
1341
+ "learning_rate": 2.0994211780728634e-05,
1342
+ "loss": 1.9079,
1343
+ "step": 6174
1344
+ },
1345
+ {
1346
+ "epoch": 1.0760980592441267,
1347
+ "grad_norm": 6.012618064880371,
1348
+ "learning_rate": 2.1494722505958464e-05,
1349
+ "loss": 1.8657,
1350
+ "step": 6321
1351
+ },
1352
+ {
1353
+ "epoch": 1.101123595505618,
1354
+ "grad_norm": 1.1185024976730347,
1355
+ "learning_rate": 2.1995233231188288e-05,
1356
+ "loss": 1.7144,
1357
+ "step": 6468
1358
+ },
1359
+ {
1360
+ "epoch": 1.1261491317671093,
1361
+ "grad_norm": 1.2436251640319824,
1362
+ "learning_rate": 2.2495743956418114e-05,
1363
+ "loss": 1.7661,
1364
+ "step": 6615
1365
+ },
1366
+ {
1367
+ "epoch": 1.1261491317671093,
1368
+ "eval_nli-pairs_loss": 1.7907973527908325,
1369
+ "eval_nli-pairs_runtime": 4.0147,
1370
+ "eval_nli-pairs_samples_per_second": 37.363,
1371
+ "eval_nli-pairs_steps_per_second": 1.245,
1372
+ "eval_sts-test_pearson_cosine": 0.755444461779583,
1373
+ "eval_sts-test_pearson_dot": 0.5833168145328357,
1374
+ "eval_sts-test_pearson_euclidean": 0.7437155007996056,
1375
+ "eval_sts-test_pearson_manhattan": 0.7524938984567344,
1376
+ "eval_sts-test_pearson_max": 0.755444461779583,
1377
+ "eval_sts-test_spearman_cosine": 0.7446166596886566,
1378
+ "eval_sts-test_spearman_dot": 0.5792340720766105,
1379
+ "eval_sts-test_spearman_euclidean": 0.7317285388028532,
1380
+ "eval_sts-test_spearman_manhattan": 0.7401637904976945,
1381
+ "eval_sts-test_spearman_max": 0.7446166596886566,
1382
+ "step": 6615
1383
+ },
1384
+ {
1385
+ "epoch": 1.1261491317671093,
1386
+ "eval_vitaminc-pairs_loss": 1.3403607606887817,
1387
+ "eval_vitaminc-pairs_runtime": 2.168,
1388
+ "eval_vitaminc-pairs_samples_per_second": 69.189,
1389
+ "eval_vitaminc-pairs_steps_per_second": 2.306,
1390
+ "step": 6615
1391
+ },
1392
+ {
1393
+ "epoch": 1.1261491317671093,
1394
+ "eval_qnli-contrastive_loss": 0.2736852467060089,
1395
+ "eval_qnli-contrastive_runtime": 0.4913,
1396
+ "eval_qnli-contrastive_samples_per_second": 305.336,
1397
+ "eval_qnli-contrastive_steps_per_second": 10.178,
1398
+ "step": 6615
1399
+ },
1400
+ {
1401
+ "epoch": 1.1261491317671093,
1402
+ "eval_scitail-pairs-qa_loss": 0.22441554069519043,
1403
+ "eval_scitail-pairs-qa_runtime": 1.1614,
1404
+ "eval_scitail-pairs-qa_samples_per_second": 129.152,
1405
+ "eval_scitail-pairs-qa_steps_per_second": 4.305,
1406
+ "step": 6615
1407
+ },
1408
+ {
1409
+ "epoch": 1.1261491317671093,
1410
+ "eval_scitail-pairs-pos_loss": 0.7723743915557861,
1411
+ "eval_scitail-pairs-pos_runtime": 2.1567,
1412
+ "eval_scitail-pairs-pos_samples_per_second": 69.55,
1413
+ "eval_scitail-pairs-pos_steps_per_second": 2.318,
1414
+ "step": 6615
1415
+ },
1416
+ {
1417
+ "epoch": 1.1261491317671093,
1418
+ "eval_xsum-pairs_loss": 0.8370540142059326,
1419
+ "eval_xsum-pairs_runtime": 2.2569,
1420
+ "eval_xsum-pairs_samples_per_second": 66.463,
1421
+ "eval_xsum-pairs_steps_per_second": 2.215,
1422
+ "step": 6615
1423
+ },
1424
+ {
1425
+ "epoch": 1.1261491317671093,
1426
+ "eval_compression-pairs_loss": 0.265947163105011,
1427
+ "eval_compression-pairs_runtime": 0.4431,
1428
+ "eval_compression-pairs_samples_per_second": 338.529,
1429
+ "eval_compression-pairs_steps_per_second": 11.284,
1430
+ "step": 6615
1431
+ },
1432
+ {
1433
+ "epoch": 1.1261491317671093,
1434
+ "eval_sciq_pairs_loss": 0.9383512735366821,
1435
+ "eval_sciq_pairs_runtime": 7.1464,
1436
+ "eval_sciq_pairs_samples_per_second": 20.99,
1437
+ "eval_sciq_pairs_steps_per_second": 0.7,
1438
+ "step": 6615
1439
+ },
1440
+ {
1441
+ "epoch": 1.1261491317671093,
1442
+ "eval_qasc_pairs_loss": 5.753899097442627,
1443
+ "eval_qasc_pairs_runtime": 2.0099,
1444
+ "eval_qasc_pairs_samples_per_second": 74.63,
1445
+ "eval_qasc_pairs_steps_per_second": 2.488,
1446
+ "step": 6615
1447
+ },
1448
+ {
1449
+ "epoch": 1.1261491317671093,
1450
+ "eval_openbookqa_pairs_loss": 3.3517918586730957,
1451
+ "eval_openbookqa_pairs_runtime": 0.8594,
1452
+ "eval_openbookqa_pairs_samples_per_second": 119.858,
1453
+ "eval_openbookqa_pairs_steps_per_second": 4.655,
1454
+ "step": 6615
1455
+ },
1456
+ {
1457
+ "epoch": 1.1261491317671093,
1458
+ "eval_msmarco_pairs_loss": 2.044360399246216,
1459
+ "eval_msmarco_pairs_runtime": 2.7431,
1460
+ "eval_msmarco_pairs_samples_per_second": 54.682,
1461
+ "eval_msmarco_pairs_steps_per_second": 1.823,
1462
+ "step": 6615
1463
+ },
1464
+ {
1465
+ "epoch": 1.1261491317671093,
1466
+ "eval_nq_pairs_loss": 1.9409464597702026,
1467
+ "eval_nq_pairs_runtime": 5.028,
1468
+ "eval_nq_pairs_samples_per_second": 29.833,
1469
+ "eval_nq_pairs_steps_per_second": 0.994,
1470
+ "step": 6615
1471
+ },
1472
+ {
1473
+ "epoch": 1.1261491317671093,
1474
+ "eval_trivia_pairs_loss": 2.369060754776001,
1475
+ "eval_trivia_pairs_runtime": 9.5137,
1476
+ "eval_trivia_pairs_samples_per_second": 15.767,
1477
+ "eval_trivia_pairs_steps_per_second": 0.526,
1478
+ "step": 6615
1479
+ },
1480
+ {
1481
+ "epoch": 1.1261491317671093,
1482
+ "eval_quora_pairs_loss": 0.47849634289741516,
1483
+ "eval_quora_pairs_runtime": 1.1413,
1484
+ "eval_quora_pairs_samples_per_second": 131.424,
1485
+ "eval_quora_pairs_steps_per_second": 4.381,
1486
+ "step": 6615
1487
+ },
1488
+ {
1489
+ "epoch": 1.1261491317671093,
1490
+ "eval_gooaq_pairs_loss": 1.5795674324035645,
1491
+ "eval_gooaq_pairs_runtime": 2.0155,
1492
+ "eval_gooaq_pairs_samples_per_second": 74.422,
1493
+ "eval_gooaq_pairs_steps_per_second": 2.481,
1494
+ "step": 6615
1495
+ },
1496
+ {
1497
+ "epoch": 1.1511746680286006,
1498
+ "grad_norm": 20.95261001586914,
1499
+ "learning_rate": 2.299625468164794e-05,
1500
+ "loss": 1.8066,
1501
+ "step": 6762
1502
+ },
1503
+ {
1504
+ "epoch": 1.1762002042900919,
1505
+ "grad_norm": 20.31597900390625,
1506
+ "learning_rate": 2.3496765406877764e-05,
1507
+ "loss": 1.7438,
1508
+ "step": 6909
1509
+ },
1510
+ {
1511
+ "epoch": 1.2012257405515832,
1512
+ "grad_norm": 28.363882064819336,
1513
+ "learning_rate": 2.399727613210759e-05,
1514
+ "loss": 2.0231,
1515
+ "step": 7056
1516
+ },
1517
+ {
1518
+ "epoch": 1.2262512768130747,
1519
+ "grad_norm": 14.403656959533691,
1520
+ "learning_rate": 2.449778685733742e-05,
1521
+ "loss": 1.8966,
1522
+ "step": 7203
1523
+ },
1524
+ {
1525
+ "epoch": 1.251276813074566,
1526
+ "grad_norm": 17.73562240600586,
1527
+ "learning_rate": 2.4998297582567248e-05,
1528
+ "loss": 1.7958,
1529
+ "step": 7350
1530
+ },
1531
+ {
1532
+ "epoch": 1.251276813074566,
1533
+ "eval_nli-pairs_loss": 1.5906368494033813,
1534
+ "eval_nli-pairs_runtime": 4.0261,
1535
+ "eval_nli-pairs_samples_per_second": 37.257,
1536
+ "eval_nli-pairs_steps_per_second": 1.242,
1537
+ "eval_sts-test_pearson_cosine": 0.7626661521495873,
1538
+ "eval_sts-test_pearson_dot": 0.5632604768989181,
1539
+ "eval_sts-test_pearson_euclidean": 0.7370060575260952,
1540
+ "eval_sts-test_pearson_manhattan": 0.7472706980613159,
1541
+ "eval_sts-test_pearson_max": 0.7626661521495873,
1542
+ "eval_sts-test_spearman_cosine": 0.7535266725567149,
1543
+ "eval_sts-test_spearman_dot": 0.5848997224802808,
1544
+ "eval_sts-test_spearman_euclidean": 0.7290608032903477,
1545
+ "eval_sts-test_spearman_manhattan": 0.739032087078249,
1546
+ "eval_sts-test_spearman_max": 0.7535266725567149,
1547
+ "step": 7350
1548
+ },
1549
+ {
1550
+ "epoch": 1.251276813074566,
1551
+ "eval_vitaminc-pairs_loss": 1.222551941871643,
1552
+ "eval_vitaminc-pairs_runtime": 2.1784,
1553
+ "eval_vitaminc-pairs_samples_per_second": 68.857,
1554
+ "eval_vitaminc-pairs_steps_per_second": 2.295,
1555
+ "step": 7350
1556
+ },
1557
+ {
1558
+ "epoch": 1.251276813074566,
1559
+ "eval_qnli-contrastive_loss": 0.3951484262943268,
1560
+ "eval_qnli-contrastive_runtime": 0.4916,
1561
+ "eval_qnli-contrastive_samples_per_second": 305.11,
1562
+ "eval_qnli-contrastive_steps_per_second": 10.17,
1563
+ "step": 7350
1564
+ },
1565
+ {
1566
+ "epoch": 1.251276813074566,
1567
+ "eval_scitail-pairs-qa_loss": 0.17783091962337494,
1568
+ "eval_scitail-pairs-qa_runtime": 1.1549,
1569
+ "eval_scitail-pairs-qa_samples_per_second": 129.88,
1570
+ "eval_scitail-pairs-qa_steps_per_second": 4.329,
1571
+ "step": 7350
1572
+ },
1573
+ {
1574
+ "epoch": 1.251276813074566,
1575
+ "eval_scitail-pairs-pos_loss": 0.7214661836624146,
1576
+ "eval_scitail-pairs-pos_runtime": 2.132,
1577
+ "eval_scitail-pairs-pos_samples_per_second": 70.357,
1578
+ "eval_scitail-pairs-pos_steps_per_second": 2.345,
1579
+ "step": 7350
1580
+ },
1581
+ {
1582
+ "epoch": 1.251276813074566,
1583
+ "eval_xsum-pairs_loss": 0.7919928431510925,
1584
+ "eval_xsum-pairs_runtime": 2.2579,
1585
+ "eval_xsum-pairs_samples_per_second": 66.432,
1586
+ "eval_xsum-pairs_steps_per_second": 2.214,
1587
+ "step": 7350
1588
+ },
1589
+ {
1590
+ "epoch": 1.251276813074566,
1591
+ "eval_compression-pairs_loss": 0.24975377321243286,
1592
+ "eval_compression-pairs_runtime": 0.447,
1593
+ "eval_compression-pairs_samples_per_second": 335.534,
1594
+ "eval_compression-pairs_steps_per_second": 11.184,
1595
+ "step": 7350
1596
+ },
1597
+ {
1598
+ "epoch": 1.251276813074566,
1599
+ "eval_sciq_pairs_loss": 0.8343773484230042,
1600
+ "eval_sciq_pairs_runtime": 7.1288,
1601
+ "eval_sciq_pairs_samples_per_second": 21.042,
1602
+ "eval_sciq_pairs_steps_per_second": 0.701,
1603
+ "step": 7350
1604
+ },
1605
+ {
1606
+ "epoch": 1.251276813074566,
1607
+ "eval_qasc_pairs_loss": 5.4840240478515625,
1608
+ "eval_qasc_pairs_runtime": 2.025,
1609
+ "eval_qasc_pairs_samples_per_second": 74.074,
1610
+ "eval_qasc_pairs_steps_per_second": 2.469,
1611
+ "step": 7350
1612
+ },
1613
+ {
1614
+ "epoch": 1.251276813074566,
1615
+ "eval_openbookqa_pairs_loss": 3.1631176471710205,
1616
+ "eval_openbookqa_pairs_runtime": 0.8612,
1617
+ "eval_openbookqa_pairs_samples_per_second": 119.598,
1618
+ "eval_openbookqa_pairs_steps_per_second": 4.645,
1619
+ "step": 7350
1620
+ },
1621
+ {
1622
+ "epoch": 1.251276813074566,
1623
+ "eval_msmarco_pairs_loss": 1.8952231407165527,
1624
+ "eval_msmarco_pairs_runtime": 2.7585,
1625
+ "eval_msmarco_pairs_samples_per_second": 54.378,
1626
+ "eval_msmarco_pairs_steps_per_second": 1.813,
1627
+ "step": 7350
1628
+ },
1629
+ {
1630
+ "epoch": 1.251276813074566,
1631
+ "eval_nq_pairs_loss": 1.6934970617294312,
1632
+ "eval_nq_pairs_runtime": 5.0253,
1633
+ "eval_nq_pairs_samples_per_second": 29.849,
1634
+ "eval_nq_pairs_steps_per_second": 0.995,
1635
+ "step": 7350
1636
+ },
1637
+ {
1638
+ "epoch": 1.251276813074566,
1639
+ "eval_trivia_pairs_loss": 1.9966663122177124,
1640
+ "eval_trivia_pairs_runtime": 9.5675,
1641
+ "eval_trivia_pairs_samples_per_second": 15.678,
1642
+ "eval_trivia_pairs_steps_per_second": 0.523,
1643
+ "step": 7350
1644
+ },
1645
+ {
1646
+ "epoch": 1.251276813074566,
1647
+ "eval_quora_pairs_loss": 0.405385285615921,
1648
+ "eval_quora_pairs_runtime": 1.1432,
1649
+ "eval_quora_pairs_samples_per_second": 131.209,
1650
+ "eval_quora_pairs_steps_per_second": 4.374,
1651
+ "step": 7350
1652
+ },
1653
+ {
1654
+ "epoch": 1.251276813074566,
1655
+ "eval_gooaq_pairs_loss": 1.3951071500778198,
1656
+ "eval_gooaq_pairs_runtime": 2.038,
1657
+ "eval_gooaq_pairs_samples_per_second": 73.601,
1658
+ "eval_gooaq_pairs_steps_per_second": 2.453,
1659
+ "step": 7350
1660
+ },
1661
+ {
1662
+ "epoch": 1.2763023493360572,
1663
+ "grad_norm": 21.254159927368164,
1664
+ "learning_rate": 2.549880830779707e-05,
1665
+ "loss": 1.5109,
1666
+ "step": 7497
1667
+ },
1668
+ {
1669
+ "epoch": 1.3013278855975485,
1670
+ "grad_norm": 20.08012580871582,
1671
+ "learning_rate": 2.5999319033026898e-05,
1672
+ "loss": 1.8119,
1673
+ "step": 7644
1674
+ },
1675
+ {
1676
+ "epoch": 1.3263534218590398,
1677
+ "grad_norm": 0.6448306441307068,
1678
+ "learning_rate": 2.6499829758256724e-05,
1679
+ "loss": 1.6833,
1680
+ "step": 7791
1681
+ },
1682
+ {
1683
+ "epoch": 1.351378958120531,
1684
+ "grad_norm": 16.65821647644043,
1685
+ "learning_rate": 2.7000340483486554e-05,
1686
+ "loss": 1.5917,
1687
+ "step": 7938
1688
+ },
1689
+ {
1690
+ "epoch": 1.3764044943820224,
1691
+ "grad_norm": 14.949362754821777,
1692
+ "learning_rate": 2.7500851208716378e-05,
1693
+ "loss": 1.809,
1694
+ "step": 8085
1695
+ },
1696
+ {
1697
+ "epoch": 1.3764044943820224,
1698
+ "eval_nli-pairs_loss": 1.5967836380004883,
1699
+ "eval_nli-pairs_runtime": 4.0496,
1700
+ "eval_nli-pairs_samples_per_second": 37.041,
1701
+ "eval_nli-pairs_steps_per_second": 1.235,
1702
+ "eval_sts-test_pearson_cosine": 0.7653416933913197,
1703
+ "eval_sts-test_pearson_dot": 0.5401711611334493,
1704
+ "eval_sts-test_pearson_euclidean": 0.7529907774019836,
1705
+ "eval_sts-test_pearson_manhattan": 0.7605105025260754,
1706
+ "eval_sts-test_pearson_max": 0.7653416933913197,
1707
+ "eval_sts-test_spearman_cosine": 0.7593865234485873,
1708
+ "eval_sts-test_spearman_dot": 0.5559615063301898,
1709
+ "eval_sts-test_spearman_euclidean": 0.7436431053840061,
1710
+ "eval_sts-test_spearman_manhattan": 0.7515978828464567,
1711
+ "eval_sts-test_spearman_max": 0.7593865234485873,
1712
+ "step": 8085
1713
+ },
1714
+ {
1715
+ "epoch": 1.3764044943820224,
1716
+ "eval_vitaminc-pairs_loss": 1.1434590816497803,
1717
+ "eval_vitaminc-pairs_runtime": 2.2066,
1718
+ "eval_vitaminc-pairs_samples_per_second": 67.977,
1719
+ "eval_vitaminc-pairs_steps_per_second": 2.266,
1720
+ "step": 8085
1721
+ },
1722
+ {
1723
+ "epoch": 1.3764044943820224,
1724
+ "eval_qnli-contrastive_loss": 0.3819103538990021,
1725
+ "eval_qnli-contrastive_runtime": 0.4972,
1726
+ "eval_qnli-contrastive_samples_per_second": 301.706,
1727
+ "eval_qnli-contrastive_steps_per_second": 10.057,
1728
+ "step": 8085
1729
+ },
1730
+ {
1731
+ "epoch": 1.3764044943820224,
1732
+ "eval_scitail-pairs-qa_loss": 0.15774373710155487,
1733
+ "eval_scitail-pairs-qa_runtime": 1.1704,
1734
+ "eval_scitail-pairs-qa_samples_per_second": 128.161,
1735
+ "eval_scitail-pairs-qa_steps_per_second": 4.272,
1736
+ "step": 8085
1737
+ },
1738
+ {
1739
+ "epoch": 1.3764044943820224,
1740
+ "eval_scitail-pairs-pos_loss": 0.6571963429450989,
1741
+ "eval_scitail-pairs-pos_runtime": 2.1634,
1742
+ "eval_scitail-pairs-pos_samples_per_second": 69.335,
1743
+ "eval_scitail-pairs-pos_steps_per_second": 2.311,
1744
+ "step": 8085
1745
+ },
1746
+ {
1747
+ "epoch": 1.3764044943820224,
1748
+ "eval_xsum-pairs_loss": 0.7028753757476807,
1749
+ "eval_xsum-pairs_runtime": 2.2608,
1750
+ "eval_xsum-pairs_samples_per_second": 66.347,
1751
+ "eval_xsum-pairs_steps_per_second": 2.212,
1752
+ "step": 8085
1753
+ },
1754
+ {
1755
+ "epoch": 1.3764044943820224,
1756
+ "eval_compression-pairs_loss": 0.23010987043380737,
1757
+ "eval_compression-pairs_runtime": 0.4514,
1758
+ "eval_compression-pairs_samples_per_second": 332.284,
1759
+ "eval_compression-pairs_steps_per_second": 11.076,
1760
+ "step": 8085
1761
+ },
1762
+ {
1763
+ "epoch": 1.3764044943820224,
1764
+ "eval_sciq_pairs_loss": 0.799666702747345,
1765
+ "eval_sciq_pairs_runtime": 7.1816,
1766
+ "eval_sciq_pairs_samples_per_second": 20.887,
1767
+ "eval_sciq_pairs_steps_per_second": 0.696,
1768
+ "step": 8085
1769
+ },
1770
+ {
1771
+ "epoch": 1.3764044943820224,
1772
+ "eval_qasc_pairs_loss": 5.433376789093018,
1773
+ "eval_qasc_pairs_runtime": 2.0592,
1774
+ "eval_qasc_pairs_samples_per_second": 72.843,
1775
+ "eval_qasc_pairs_steps_per_second": 2.428,
1776
+ "step": 8085
1777
+ },
1778
+ {
1779
+ "epoch": 1.3764044943820224,
1780
+ "eval_openbookqa_pairs_loss": 2.9010672569274902,
1781
+ "eval_openbookqa_pairs_runtime": 0.865,
1782
+ "eval_openbookqa_pairs_samples_per_second": 119.074,
1783
+ "eval_openbookqa_pairs_steps_per_second": 4.624,
1784
+ "step": 8085
1785
+ },
1786
+ {
1787
+ "epoch": 1.3764044943820224,
1788
+ "eval_msmarco_pairs_loss": 1.7567836046218872,
1789
+ "eval_msmarco_pairs_runtime": 2.7812,
1790
+ "eval_msmarco_pairs_samples_per_second": 53.933,
1791
+ "eval_msmarco_pairs_steps_per_second": 1.798,
1792
+ "step": 8085
1793
+ },
1794
+ {
1795
+ "epoch": 1.3764044943820224,
1796
+ "eval_nq_pairs_loss": 1.5407707691192627,
1797
+ "eval_nq_pairs_runtime": 5.0607,
1798
+ "eval_nq_pairs_samples_per_second": 29.64,
1799
+ "eval_nq_pairs_steps_per_second": 0.988,
1800
+ "step": 8085
1801
+ },
1802
+ {
1803
+ "epoch": 1.3764044943820224,
1804
+ "eval_trivia_pairs_loss": 1.8419283628463745,
1805
+ "eval_trivia_pairs_runtime": 9.5535,
1806
+ "eval_trivia_pairs_samples_per_second": 15.701,
1807
+ "eval_trivia_pairs_steps_per_second": 0.523,
1808
+ "step": 8085
1809
+ },
1810
+ {
1811
+ "epoch": 1.3764044943820224,
1812
+ "eval_quora_pairs_loss": 0.3561370372772217,
1813
+ "eval_quora_pairs_runtime": 1.2005,
1814
+ "eval_quora_pairs_samples_per_second": 124.946,
1815
+ "eval_quora_pairs_steps_per_second": 4.165,
1816
+ "step": 8085
1817
+ },
1818
+ {
1819
+ "epoch": 1.3764044943820224,
1820
+ "eval_gooaq_pairs_loss": 1.1745914220809937,
1821
+ "eval_gooaq_pairs_runtime": 2.0463,
1822
+ "eval_gooaq_pairs_samples_per_second": 73.305,
1823
+ "eval_gooaq_pairs_steps_per_second": 2.443,
1824
+ "step": 8085
1825
+ },
1826
+ {
1827
+ "epoch": 1.401430030643514,
1828
+ "grad_norm": 14.31106185913086,
1829
+ "learning_rate": 2.8001361933946204e-05,
1830
+ "loss": 1.5561,
1831
+ "step": 8232
1832
+ },
1833
+ {
1834
+ "epoch": 1.4264555669050052,
1835
+ "grad_norm": 11.82392692565918,
1836
+ "learning_rate": 2.850187265917603e-05,
1837
+ "loss": 1.5325,
1838
+ "step": 8379
1839
+ },
1840
+ {
1841
+ "epoch": 1.4514811031664965,
1842
+ "grad_norm": 21.716449737548828,
1843
+ "learning_rate": 2.9002383384405858e-05,
1844
+ "loss": 1.5085,
1845
+ "step": 8526
1846
+ },
1847
+ {
1848
+ "epoch": 1.4765066394279878,
1849
+ "grad_norm": 6.5607147216796875,
1850
+ "learning_rate": 2.950289410963568e-05,
1851
+ "loss": 1.5634,
1852
+ "step": 8673
1853
+ },
1854
+ {
1855
+ "epoch": 1.501532175689479,
1856
+ "grad_norm": 8.737595558166504,
1857
+ "learning_rate": 2.9999998423842776e-05,
1858
+ "loss": 1.3857,
1859
+ "step": 8820
1860
+ },
1861
+ {
1862
+ "epoch": 1.501532175689479,
1863
+ "eval_nli-pairs_loss": 1.454946517944336,
1864
+ "eval_nli-pairs_runtime": 4.3786,
1865
+ "eval_nli-pairs_samples_per_second": 34.257,
1866
+ "eval_nli-pairs_steps_per_second": 1.142,
1867
+ "eval_sts-test_pearson_cosine": 0.758856517299588,
1868
+ "eval_sts-test_pearson_dot": 0.5254244903711445,
1869
+ "eval_sts-test_pearson_euclidean": 0.7467439510002647,
1870
+ "eval_sts-test_pearson_manhattan": 0.7525779346304055,
1871
+ "eval_sts-test_pearson_max": 0.758856517299588,
1872
+ "eval_sts-test_spearman_cosine": 0.7596605816446022,
1873
+ "eval_sts-test_spearman_dot": 0.5600186533991508,
1874
+ "eval_sts-test_spearman_euclidean": 0.7367598380547504,
1875
+ "eval_sts-test_spearman_manhattan": 0.7440123650923844,
1876
+ "eval_sts-test_spearman_max": 0.7596605816446022,
1877
+ "step": 8820
1878
+ },
1879
+ {
1880
+ "epoch": 1.501532175689479,
1881
+ "eval_vitaminc-pairs_loss": 1.1383781433105469,
1882
+ "eval_vitaminc-pairs_runtime": 2.3314,
1883
+ "eval_vitaminc-pairs_samples_per_second": 64.34,
1884
+ "eval_vitaminc-pairs_steps_per_second": 2.145,
1885
+ "step": 8820
1886
+ },
1887
+ {
1888
+ "epoch": 1.501532175689479,
1889
+ "eval_qnli-contrastive_loss": 0.32092100381851196,
1890
+ "eval_qnli-contrastive_runtime": 0.5002,
1891
+ "eval_qnli-contrastive_samples_per_second": 299.881,
1892
+ "eval_qnli-contrastive_steps_per_second": 9.996,
1893
+ "step": 8820
1894
+ },
1895
+ {
1896
+ "epoch": 1.501532175689479,
1897
+ "eval_scitail-pairs-qa_loss": 0.14513270556926727,
1898
+ "eval_scitail-pairs-qa_runtime": 1.5154,
1899
+ "eval_scitail-pairs-qa_samples_per_second": 98.985,
1900
+ "eval_scitail-pairs-qa_steps_per_second": 3.3,
1901
+ "step": 8820
1902
+ },
1903
+ {
1904
+ "epoch": 1.501532175689479,
1905
+ "eval_scitail-pairs-pos_loss": 0.6857669353485107,
1906
+ "eval_scitail-pairs-pos_runtime": 2.4178,
1907
+ "eval_scitail-pairs-pos_samples_per_second": 62.041,
1908
+ "eval_scitail-pairs-pos_steps_per_second": 2.068,
1909
+ "step": 8820
1910
+ },
1911
+ {
1912
+ "epoch": 1.501532175689479,
1913
+ "eval_xsum-pairs_loss": 0.683724045753479,
1914
+ "eval_xsum-pairs_runtime": 2.2766,
1915
+ "eval_xsum-pairs_samples_per_second": 65.887,
1916
+ "eval_xsum-pairs_steps_per_second": 2.196,
1917
+ "step": 8820
1918
+ },
1919
+ {
1920
+ "epoch": 1.501532175689479,
1921
+ "eval_compression-pairs_loss": 0.20896266400814056,
1922
+ "eval_compression-pairs_runtime": 0.4683,
1923
+ "eval_compression-pairs_samples_per_second": 320.274,
1924
+ "eval_compression-pairs_steps_per_second": 10.676,
1925
+ "step": 8820
1926
+ },
1927
+ {
1928
+ "epoch": 1.501532175689479,
1929
+ "eval_sciq_pairs_loss": 0.7911179661750793,
1930
+ "eval_sciq_pairs_runtime": 7.3506,
1931
+ "eval_sciq_pairs_samples_per_second": 20.407,
1932
+ "eval_sciq_pairs_steps_per_second": 0.68,
1933
+ "step": 8820
1934
+ },
1935
+ {
1936
+ "epoch": 1.501532175689479,
1937
+ "eval_qasc_pairs_loss": 5.3092241287231445,
1938
+ "eval_qasc_pairs_runtime": 2.1926,
1939
+ "eval_qasc_pairs_samples_per_second": 68.411,
1940
+ "eval_qasc_pairs_steps_per_second": 2.28,
1941
+ "step": 8820
1942
+ },
1943
+ {
1944
+ "epoch": 1.501532175689479,
1945
+ "eval_openbookqa_pairs_loss": 2.923464298248291,
1946
+ "eval_openbookqa_pairs_runtime": 0.963,
1947
+ "eval_openbookqa_pairs_samples_per_second": 106.961,
1948
+ "eval_openbookqa_pairs_steps_per_second": 4.154,
1949
+ "step": 8820
1950
+ },
1951
+ {
1952
+ "epoch": 1.501532175689479,
1953
+ "eval_msmarco_pairs_loss": 1.674107313156128,
1954
+ "eval_msmarco_pairs_runtime": 2.8516,
1955
+ "eval_msmarco_pairs_samples_per_second": 52.602,
1956
+ "eval_msmarco_pairs_steps_per_second": 1.753,
1957
+ "step": 8820
1958
+ },
1959
+ {
1960
+ "epoch": 1.501532175689479,
1961
+ "eval_nq_pairs_loss": 1.4419037103652954,
1962
+ "eval_nq_pairs_runtime": 5.1485,
1963
+ "eval_nq_pairs_samples_per_second": 29.135,
1964
+ "eval_nq_pairs_steps_per_second": 0.971,
1965
+ "step": 8820
1966
+ },
1967
+ {
1968
+ "epoch": 1.501532175689479,
1969
+ "eval_trivia_pairs_loss": 1.7546964883804321,
1970
+ "eval_trivia_pairs_runtime": 9.6901,
1971
+ "eval_trivia_pairs_samples_per_second": 15.48,
1972
+ "eval_trivia_pairs_steps_per_second": 0.516,
1973
+ "step": 8820
1974
+ },
1975
+ {
1976
+ "epoch": 1.501532175689479,
1977
+ "eval_quora_pairs_loss": 0.31785744428634644,
1978
+ "eval_quora_pairs_runtime": 1.2699,
1979
+ "eval_quora_pairs_samples_per_second": 118.117,
1980
+ "eval_quora_pairs_steps_per_second": 3.937,
1981
+ "step": 8820
1982
+ },
1983
+ {
1984
+ "epoch": 1.501532175689479,
1985
+ "eval_gooaq_pairs_loss": 1.1328644752502441,
1986
+ "eval_gooaq_pairs_runtime": 2.1292,
1987
+ "eval_gooaq_pairs_samples_per_second": 70.448,
1988
+ "eval_gooaq_pairs_steps_per_second": 2.348,
1989
+ "step": 8820
1990
+ },
1991
+ {
1992
+ "epoch": 1.5265577119509703,
1993
+ "grad_norm": 15.168123245239258,
1994
+ "learning_rate": 2.9965489092992677e-05,
1995
+ "loss": 1.6167,
1996
+ "step": 8967
1997
+ },
1998
+ {
1999
+ "epoch": 1.5515832482124616,
2000
+ "grad_norm": 29.32268524169922,
2001
+ "learning_rate": 2.9863043834895476e-05,
2002
+ "loss": 1.6664,
2003
+ "step": 9114
2004
+ },
2005
+ {
2006
+ "epoch": 1.5766087844739531,
2007
+ "grad_norm": 2.5159287452697754,
2008
+ "learning_rate": 2.9693127700413034e-05,
2009
+ "loss": 1.4785,
2010
+ "step": 9261
2011
+ },
2012
+ {
2013
+ "epoch": 1.6016343207354442,
2014
+ "grad_norm": 17.4219970703125,
2015
+ "learning_rate": 2.9456512024854113e-05,
2016
+ "loss": 1.5881,
2017
+ "step": 9408
2018
+ },
2019
+ {
2020
+ "epoch": 1.6266598569969357,
2021
+ "grad_norm": 15.60139274597168,
2022
+ "learning_rate": 2.915427092649312e-05,
2023
+ "loss": 1.3379,
2024
+ "step": 9555
2025
+ },
2026
+ {
2027
+ "epoch": 1.6266598569969357,
2028
+ "eval_nli-pairs_loss": 1.3879741430282593,
2029
+ "eval_nli-pairs_runtime": 4.1363,
2030
+ "eval_nli-pairs_samples_per_second": 36.264,
2031
+ "eval_nli-pairs_steps_per_second": 1.209,
2032
+ "eval_sts-test_pearson_cosine": 0.7733483283639441,
2033
+ "eval_sts-test_pearson_dot": 0.5424296843493538,
2034
+ "eval_sts-test_pearson_euclidean": 0.7555770040784449,
2035
+ "eval_sts-test_pearson_manhattan": 0.7604742759594404,
2036
+ "eval_sts-test_pearson_max": 0.7733483283639441,
2037
+ "eval_sts-test_spearman_cosine": 0.779671933510953,
2038
+ "eval_sts-test_spearman_dot": 0.5784449139725663,
2039
+ "eval_sts-test_spearman_euclidean": 0.7515003599642571,
2040
+ "eval_sts-test_spearman_manhattan": 0.7568440288585417,
2041
+ "eval_sts-test_spearman_max": 0.779671933510953,
2042
+ "step": 9555
2043
+ },
2044
+ {
2045
+ "epoch": 1.6266598569969357,
2046
+ "eval_vitaminc-pairs_loss": 0.9942379593849182,
2047
+ "eval_vitaminc-pairs_runtime": 2.2185,
2048
+ "eval_vitaminc-pairs_samples_per_second": 67.613,
2049
+ "eval_vitaminc-pairs_steps_per_second": 2.254,
2050
+ "step": 9555
2051
+ },
2052
+ {
2053
+ "epoch": 1.6266598569969357,
2054
+ "eval_qnli-contrastive_loss": 0.2178214192390442,
2055
+ "eval_qnli-contrastive_runtime": 0.4991,
2056
+ "eval_qnli-contrastive_samples_per_second": 300.549,
2057
+ "eval_qnli-contrastive_steps_per_second": 10.018,
2058
+ "step": 9555
2059
+ },
2060
+ {
2061
+ "epoch": 1.6266598569969357,
2062
+ "eval_scitail-pairs-qa_loss": 0.13629749417304993,
2063
+ "eval_scitail-pairs-qa_runtime": 1.1751,
2064
+ "eval_scitail-pairs-qa_samples_per_second": 127.653,
2065
+ "eval_scitail-pairs-qa_steps_per_second": 4.255,
2066
+ "step": 9555
2067
+ },
2068
+ {
2069
+ "epoch": 1.6266598569969357,
2070
+ "eval_scitail-pairs-pos_loss": 0.5964671969413757,
2071
+ "eval_scitail-pairs-pos_runtime": 2.1841,
2072
+ "eval_scitail-pairs-pos_samples_per_second": 68.677,
2073
+ "eval_scitail-pairs-pos_steps_per_second": 2.289,
2074
+ "step": 9555
2075
+ },
2076
+ {
2077
+ "epoch": 1.6266598569969357,
2078
+ "eval_xsum-pairs_loss": 0.6746851205825806,
2079
+ "eval_xsum-pairs_runtime": 2.2628,
2080
+ "eval_xsum-pairs_samples_per_second": 66.291,
2081
+ "eval_xsum-pairs_steps_per_second": 2.21,
2082
+ "step": 9555
2083
+ },
2084
+ {
2085
+ "epoch": 1.6266598569969357,
2086
+ "eval_compression-pairs_loss": 0.17857055366039276,
2087
+ "eval_compression-pairs_runtime": 0.4506,
2088
+ "eval_compression-pairs_samples_per_second": 332.902,
2089
+ "eval_compression-pairs_steps_per_second": 11.097,
2090
+ "step": 9555
2091
+ },
2092
+ {
2093
+ "epoch": 1.6266598569969357,
2094
+ "eval_sciq_pairs_loss": 0.7349148988723755,
2095
+ "eval_sciq_pairs_runtime": 7.116,
2096
+ "eval_sciq_pairs_samples_per_second": 21.079,
2097
+ "eval_sciq_pairs_steps_per_second": 0.703,
2098
+ "step": 9555
2099
+ },
2100
+ {
2101
+ "epoch": 1.6266598569969357,
2102
+ "eval_qasc_pairs_loss": 5.115650177001953,
2103
+ "eval_qasc_pairs_runtime": 2.0271,
2104
+ "eval_qasc_pairs_samples_per_second": 73.997,
2105
+ "eval_qasc_pairs_steps_per_second": 2.467,
2106
+ "step": 9555
2107
+ },
2108
+ {
2109
+ "epoch": 1.6266598569969357,
2110
+ "eval_openbookqa_pairs_loss": 2.694535255432129,
2111
+ "eval_openbookqa_pairs_runtime": 0.8634,
2112
+ "eval_openbookqa_pairs_samples_per_second": 119.302,
2113
+ "eval_openbookqa_pairs_steps_per_second": 4.633,
2114
+ "step": 9555
2115
+ },
2116
+ {
2117
+ "epoch": 1.6266598569969357,
2118
+ "eval_msmarco_pairs_loss": 1.5184054374694824,
2119
+ "eval_msmarco_pairs_runtime": 2.7561,
2120
+ "eval_msmarco_pairs_samples_per_second": 54.424,
2121
+ "eval_msmarco_pairs_steps_per_second": 1.814,
2122
+ "step": 9555
2123
+ },
2124
+ {
2125
+ "epoch": 1.6266598569969357,
2126
+ "eval_nq_pairs_loss": 1.293426752090454,
2127
+ "eval_nq_pairs_runtime": 5.0107,
2128
+ "eval_nq_pairs_samples_per_second": 29.936,
2129
+ "eval_nq_pairs_steps_per_second": 0.998,
2130
+ "step": 9555
2131
+ },
2132
+ {
2133
+ "epoch": 1.6266598569969357,
2134
+ "eval_trivia_pairs_loss": 1.5939557552337646,
2135
+ "eval_trivia_pairs_runtime": 9.5368,
2136
+ "eval_trivia_pairs_samples_per_second": 15.728,
2137
+ "eval_trivia_pairs_steps_per_second": 0.524,
2138
+ "step": 9555
2139
+ },
2140
+ {
2141
+ "epoch": 1.6266598569969357,
2142
+ "eval_quora_pairs_loss": 0.31308451294898987,
2143
+ "eval_quora_pairs_runtime": 1.1456,
2144
+ "eval_quora_pairs_samples_per_second": 130.932,
2145
+ "eval_quora_pairs_steps_per_second": 4.364,
2146
+ "step": 9555
2147
+ },
2148
+ {
2149
+ "epoch": 1.6266598569969357,
2150
+ "eval_gooaq_pairs_loss": 1.0807112455368042,
2151
+ "eval_gooaq_pairs_runtime": 2.0197,
2152
+ "eval_gooaq_pairs_samples_per_second": 74.269,
2153
+ "eval_gooaq_pairs_steps_per_second": 2.476,
2154
+ "step": 9555
2155
+ },
2156
+ {
2157
+ "epoch": 1.651685393258427,
2158
+ "grad_norm": 0.7546759843826294,
2159
+ "learning_rate": 2.878777643060379e-05,
2160
+ "loss": 1.4469,
2161
+ "step": 9702
2162
+ },
2163
+ {
2164
+ "epoch": 1.6767109295199183,
2165
+ "grad_norm": 0.8483991026878357,
2166
+ "learning_rate": 2.835869224114224e-05,
2167
+ "loss": 1.3878,
2168
+ "step": 9849
2169
+ },
2170
+ {
2171
+ "epoch": 1.7017364657814096,
2172
+ "grad_norm": 20.814105987548828,
2173
+ "learning_rate": 2.7868966188352908e-05,
2174
+ "loss": 1.2764,
2175
+ "step": 9996
2176
+ },
2177
+ {
2178
+ "epoch": 1.7267620020429009,
2179
+ "grad_norm": 3.1025094985961914,
2180
+ "learning_rate": 2.73208213865815e-05,
2181
+ "loss": 1.3884,
2182
+ "step": 10143
2183
+ },
2184
+ {
2185
+ "epoch": 1.7517875383043924,
2186
+ "grad_norm": 14.80810260772705,
2187
+ "learning_rate": 2.671674614243416e-05,
2188
+ "loss": 1.2977,
2189
+ "step": 10290
2190
+ },
2191
+ {
2192
+ "epoch": 1.7517875383043924,
2193
+ "eval_nli-pairs_loss": 1.3081562519073486,
2194
+ "eval_nli-pairs_runtime": 4.0165,
2195
+ "eval_nli-pairs_samples_per_second": 37.346,
2196
+ "eval_nli-pairs_steps_per_second": 1.245,
2197
+ "eval_sts-test_pearson_cosine": 0.7681143802843627,
2198
+ "eval_sts-test_pearson_dot": 0.5287526695750702,
2199
+ "eval_sts-test_pearson_euclidean": 0.7538805205317111,
2200
+ "eval_sts-test_pearson_manhattan": 0.7596894203751682,
2201
+ "eval_sts-test_pearson_max": 0.7681143802843627,
2202
+ "eval_sts-test_spearman_cosine": 0.770908506196058,
2203
+ "eval_sts-test_spearman_dot": 0.5670572774538138,
2204
+ "eval_sts-test_spearman_euclidean": 0.7452730842318486,
2205
+ "eval_sts-test_spearman_manhattan": 0.7517699916174685,
2206
+ "eval_sts-test_spearman_max": 0.770908506196058,
2207
+ "step": 10290
2208
+ },
2209
+ {
2210
+ "epoch": 1.7517875383043924,
2211
+ "eval_vitaminc-pairs_loss": 0.9676446318626404,
2212
+ "eval_vitaminc-pairs_runtime": 2.1787,
2213
+ "eval_vitaminc-pairs_samples_per_second": 68.85,
2214
+ "eval_vitaminc-pairs_steps_per_second": 2.295,
2215
+ "step": 10290
2216
+ },
2217
+ {
2218
+ "epoch": 1.7517875383043924,
2219
+ "eval_qnli-contrastive_loss": 0.244391530752182,
2220
+ "eval_qnli-contrastive_runtime": 0.4884,
2221
+ "eval_qnli-contrastive_samples_per_second": 307.113,
2222
+ "eval_qnli-contrastive_steps_per_second": 10.237,
2223
+ "step": 10290
2224
+ },
2225
+ {
2226
+ "epoch": 1.7517875383043924,
2227
+ "eval_scitail-pairs-qa_loss": 0.1264333575963974,
2228
+ "eval_scitail-pairs-qa_runtime": 1.1536,
2229
+ "eval_scitail-pairs-qa_samples_per_second": 130.03,
2230
+ "eval_scitail-pairs-qa_steps_per_second": 4.334,
2231
+ "step": 10290
2232
+ },
2233
+ {
2234
+ "epoch": 1.7517875383043924,
2235
+ "eval_scitail-pairs-pos_loss": 0.5472012162208557,
2236
+ "eval_scitail-pairs-pos_runtime": 2.1213,
2237
+ "eval_scitail-pairs-pos_samples_per_second": 70.711,
2238
+ "eval_scitail-pairs-pos_steps_per_second": 2.357,
2239
+ "step": 10290
2240
+ },
2241
+ {
2242
+ "epoch": 1.7517875383043924,
2243
+ "eval_xsum-pairs_loss": 0.5869634747505188,
2244
+ "eval_xsum-pairs_runtime": 2.2876,
2245
+ "eval_xsum-pairs_samples_per_second": 65.571,
2246
+ "eval_xsum-pairs_steps_per_second": 2.186,
2247
+ "step": 10290
2248
+ },
2249
+ {
2250
+ "epoch": 1.7517875383043924,
2251
+ "eval_compression-pairs_loss": 0.16663199663162231,
2252
+ "eval_compression-pairs_runtime": 0.4431,
2253
+ "eval_compression-pairs_samples_per_second": 338.526,
2254
+ "eval_compression-pairs_steps_per_second": 11.284,
2255
+ "step": 10290
2256
+ },
2257
+ {
2258
+ "epoch": 1.7517875383043924,
2259
+ "eval_sciq_pairs_loss": 0.6884138584136963,
2260
+ "eval_sciq_pairs_runtime": 7.0451,
2261
+ "eval_sciq_pairs_samples_per_second": 21.291,
2262
+ "eval_sciq_pairs_steps_per_second": 0.71,
2263
+ "step": 10290
2264
+ },
2265
+ {
2266
+ "epoch": 1.7517875383043924,
2267
+ "eval_qasc_pairs_loss": 5.099090099334717,
2268
+ "eval_qasc_pairs_runtime": 2.0309,
2269
+ "eval_qasc_pairs_samples_per_second": 73.86,
2270
+ "eval_qasc_pairs_steps_per_second": 2.462,
2271
+ "step": 10290
2272
+ },
2273
+ {
2274
+ "epoch": 1.7517875383043924,
2275
+ "eval_openbookqa_pairs_loss": 2.6562159061431885,
2276
+ "eval_openbookqa_pairs_runtime": 0.8531,
2277
+ "eval_openbookqa_pairs_samples_per_second": 120.74,
2278
+ "eval_openbookqa_pairs_steps_per_second": 4.689,
2279
+ "step": 10290
2280
+ },
2281
+ {
2282
+ "epoch": 1.7517875383043924,
2283
+ "eval_msmarco_pairs_loss": 1.3729219436645508,
2284
+ "eval_msmarco_pairs_runtime": 2.7346,
2285
+ "eval_msmarco_pairs_samples_per_second": 54.853,
2286
+ "eval_msmarco_pairs_steps_per_second": 1.828,
2287
+ "step": 10290
2288
+ },
2289
+ {
2290
+ "epoch": 1.7517875383043924,
2291
+ "eval_nq_pairs_loss": 1.2174726724624634,
2292
+ "eval_nq_pairs_runtime": 4.9981,
2293
+ "eval_nq_pairs_samples_per_second": 30.012,
2294
+ "eval_nq_pairs_steps_per_second": 1.0,
2295
+ "step": 10290
2296
+ },
2297
+ {
2298
+ "epoch": 1.7517875383043924,
2299
+ "eval_trivia_pairs_loss": 1.5839861631393433,
2300
+ "eval_trivia_pairs_runtime": 9.4611,
2301
+ "eval_trivia_pairs_samples_per_second": 15.854,
2302
+ "eval_trivia_pairs_steps_per_second": 0.528,
2303
+ "step": 10290
2304
+ },
2305
+ {
2306
+ "epoch": 1.7517875383043924,
2307
+ "eval_quora_pairs_loss": 0.2804078757762909,
2308
+ "eval_quora_pairs_runtime": 1.1799,
2309
+ "eval_quora_pairs_samples_per_second": 127.13,
2310
+ "eval_quora_pairs_steps_per_second": 4.238,
2311
+ "step": 10290
2312
+ },
2313
+ {
2314
+ "epoch": 1.7517875383043924,
2315
+ "eval_gooaq_pairs_loss": 0.9541385769844055,
2316
+ "eval_gooaq_pairs_runtime": 2.1014,
2317
+ "eval_gooaq_pairs_samples_per_second": 71.38,
2318
+ "eval_gooaq_pairs_steps_per_second": 2.379,
2319
+ "step": 10290
2320
+ },
2321
+ {
2322
+ "epoch": 1.7768130745658834,
2323
+ "grad_norm": 51.763004302978516,
2324
+ "learning_rate": 2.6059482659094694e-05,
2325
+ "loss": 1.4422,
2326
+ "step": 10437
2327
+ },
2328
+ {
2329
+ "epoch": 1.801838610827375,
2330
+ "grad_norm": 3.4887988567352295,
2331
+ "learning_rate": 2.5352014588076858e-05,
2332
+ "loss": 1.4997,
2333
+ "step": 10584
2334
+ },
2335
+ {
2336
+ "epoch": 1.8268641470888662,
2337
+ "grad_norm": 6.360722064971924,
2338
+ "learning_rate": 2.4597553484920438e-05,
2339
+ "loss": 1.2797,
2340
+ "step": 10731
2341
+ },
2342
+ {
2343
+ "epoch": 1.8518896833503575,
2344
+ "grad_norm": 16.216428756713867,
2345
+ "learning_rate": 2.3799524230315696e-05,
2346
+ "loss": 1.2362,
2347
+ "step": 10878
2348
+ },
2349
+ {
2350
+ "epoch": 1.8769152196118488,
2351
+ "grad_norm": 19.113628387451172,
2352
+ "learning_rate": 2.2961549482836967e-05,
2353
+ "loss": 1.2799,
2354
+ "step": 11025
2355
+ },
2356
+ {
2357
+ "epoch": 1.8769152196118488,
2358
+ "eval_nli-pairs_loss": 1.2669230699539185,
2359
+ "eval_nli-pairs_runtime": 4.0111,
2360
+ "eval_nli-pairs_samples_per_second": 37.396,
2361
+ "eval_nli-pairs_steps_per_second": 1.247,
2362
+ "eval_sts-test_pearson_cosine": 0.774489523257569,
2363
+ "eval_sts-test_pearson_dot": 0.5150859135257536,
2364
+ "eval_sts-test_pearson_euclidean": 0.7570251269629877,
2365
+ "eval_sts-test_pearson_manhattan": 0.7623769541465137,
2366
+ "eval_sts-test_pearson_max": 0.774489523257569,
2367
+ "eval_sts-test_spearman_cosine": 0.7816800005074528,
2368
+ "eval_sts-test_spearman_dot": 0.565603897190929,
2369
+ "eval_sts-test_spearman_euclidean": 0.7507848233553155,
2370
+ "eval_sts-test_spearman_manhattan": 0.756029656784038,
2371
+ "eval_sts-test_spearman_max": 0.7816800005074528,
2372
+ "step": 11025
2373
+ },
2374
+ {
2375
+ "epoch": 1.8769152196118488,
2376
+ "eval_vitaminc-pairs_loss": 0.875577986240387,
2377
+ "eval_vitaminc-pairs_runtime": 2.2185,
2378
+ "eval_vitaminc-pairs_samples_per_second": 67.614,
2379
+ "eval_vitaminc-pairs_steps_per_second": 2.254,
2380
+ "step": 11025
2381
+ },
2382
+ {
2383
+ "epoch": 1.8769152196118488,
2384
+ "eval_qnli-contrastive_loss": 0.23095794022083282,
2385
+ "eval_qnli-contrastive_runtime": 0.4906,
2386
+ "eval_qnli-contrastive_samples_per_second": 305.756,
2387
+ "eval_qnli-contrastive_steps_per_second": 10.192,
2388
+ "step": 11025
2389
+ },
2390
+ {
2391
+ "epoch": 1.8769152196118488,
2392
+ "eval_scitail-pairs-qa_loss": 0.11762743443250656,
2393
+ "eval_scitail-pairs-qa_runtime": 1.1505,
2394
+ "eval_scitail-pairs-qa_samples_per_second": 130.379,
2395
+ "eval_scitail-pairs-qa_steps_per_second": 4.346,
2396
+ "step": 11025
2397
+ },
2398
+ {
2399
+ "epoch": 1.8769152196118488,
2400
+ "eval_scitail-pairs-pos_loss": 0.5055103898048401,
2401
+ "eval_scitail-pairs-pos_runtime": 2.1912,
2402
+ "eval_scitail-pairs-pos_samples_per_second": 68.456,
2403
+ "eval_scitail-pairs-pos_steps_per_second": 2.282,
2404
+ "step": 11025
2405
+ },
2406
+ {
2407
+ "epoch": 1.8769152196118488,
2408
+ "eval_xsum-pairs_loss": 0.5941822528839111,
2409
+ "eval_xsum-pairs_runtime": 2.26,
2410
+ "eval_xsum-pairs_samples_per_second": 66.371,
2411
+ "eval_xsum-pairs_steps_per_second": 2.212,
2412
+ "step": 11025
2413
+ },
2414
+ {
2415
+ "epoch": 1.8769152196118488,
2416
+ "eval_compression-pairs_loss": 0.16561630368232727,
2417
+ "eval_compression-pairs_runtime": 0.4447,
2418
+ "eval_compression-pairs_samples_per_second": 337.281,
2419
+ "eval_compression-pairs_steps_per_second": 11.243,
2420
+ "step": 11025
2421
+ },
2422
+ {
2423
+ "epoch": 1.8769152196118488,
2424
+ "eval_sciq_pairs_loss": 0.6859617233276367,
2425
+ "eval_sciq_pairs_runtime": 7.2855,
2426
+ "eval_sciq_pairs_samples_per_second": 20.589,
2427
+ "eval_sciq_pairs_steps_per_second": 0.686,
2428
+ "step": 11025
2429
+ },
2430
+ {
2431
+ "epoch": 1.8769152196118488,
2432
+ "eval_qasc_pairs_loss": 4.979205131530762,
2433
+ "eval_qasc_pairs_runtime": 2.0332,
2434
+ "eval_qasc_pairs_samples_per_second": 73.775,
2435
+ "eval_qasc_pairs_steps_per_second": 2.459,
2436
+ "step": 11025
2437
+ },
2438
+ {
2439
+ "epoch": 1.8769152196118488,
2440
+ "eval_openbookqa_pairs_loss": 2.5103061199188232,
2441
+ "eval_openbookqa_pairs_runtime": 0.8673,
2442
+ "eval_openbookqa_pairs_samples_per_second": 118.755,
2443
+ "eval_openbookqa_pairs_steps_per_second": 4.612,
2444
+ "step": 11025
2445
+ },
2446
+ {
2447
+ "epoch": 1.8769152196118488,
2448
+ "eval_msmarco_pairs_loss": 1.2753304243087769,
2449
+ "eval_msmarco_pairs_runtime": 2.7942,
2450
+ "eval_msmarco_pairs_samples_per_second": 53.683,
2451
+ "eval_msmarco_pairs_steps_per_second": 1.789,
2452
+ "step": 11025
2453
+ },
2454
+ {
2455
+ "epoch": 1.8769152196118488,
2456
+ "eval_nq_pairs_loss": 1.057248592376709,
2457
+ "eval_nq_pairs_runtime": 5.0749,
2458
+ "eval_nq_pairs_samples_per_second": 29.557,
2459
+ "eval_nq_pairs_steps_per_second": 0.985,
2460
+ "step": 11025
2461
+ },
2462
+ {
2463
+ "epoch": 1.8769152196118488,
2464
+ "eval_trivia_pairs_loss": 1.4893617630004883,
2465
+ "eval_trivia_pairs_runtime": 9.5535,
2466
+ "eval_trivia_pairs_samples_per_second": 15.701,
2467
+ "eval_trivia_pairs_steps_per_second": 0.523,
2468
+ "step": 11025
2469
+ },
2470
+ {
2471
+ "epoch": 1.8769152196118488,
2472
+ "eval_quora_pairs_loss": 0.27783504128456116,
2473
+ "eval_quora_pairs_runtime": 1.1843,
2474
+ "eval_quora_pairs_samples_per_second": 126.653,
2475
+ "eval_quora_pairs_steps_per_second": 4.222,
2476
+ "step": 11025
2477
+ },
2478
+ {
2479
+ "epoch": 1.8769152196118488,
2480
+ "eval_gooaq_pairs_loss": 0.8971360325813293,
2481
+ "eval_gooaq_pairs_runtime": 2.0278,
2482
+ "eval_gooaq_pairs_samples_per_second": 73.97,
2483
+ "eval_gooaq_pairs_steps_per_second": 2.466,
2484
+ "step": 11025
2485
+ },
2486
+ {
2487
+ "epoch": 1.90194075587334,
2488
+ "grad_norm": 14.915979385375977,
2489
+ "learning_rate": 2.2087433233862403e-05,
2490
+ "loss": 1.2292,
2491
+ "step": 11172
2492
+ },
2493
+ {
2494
+ "epoch": 1.9269662921348316,
2495
+ "grad_norm": 13.753366470336914,
2496
+ "learning_rate": 2.118740830659258e-05,
2497
+ "loss": 1.0362,
2498
+ "step": 11319
2499
+ },
2500
+ {
2501
+ "epoch": 1.9519918283963227,
2502
+ "grad_norm": 8.33267593383789,
2503
+ "learning_rate": 2.0259676306932596e-05,
2504
+ "loss": 1.1851,
2505
+ "step": 11466
2506
+ },
2507
+ {
2508
+ "epoch": 1.9770173646578142,
2509
+ "grad_norm": 0.6671110987663269,
2510
+ "learning_rate": 1.9301804508269106e-05,
2511
+ "loss": 1.0248,
2512
+ "step": 11613
2513
+ }
2514
+ ],
2515
+ "logging_steps": 147,
2516
+ "max_steps": 29370,
2517
+ "num_input_tokens_seen": 0,
2518
+ "num_train_epochs": 5,
2519
+ "save_steps": 2937,
2520
+ "stateful_callbacks": {
2521
+ "TrainerControl": {
2522
+ "args": {
2523
+ "should_epoch_stop": false,
2524
+ "should_evaluate": false,
2525
+ "should_log": false,
2526
+ "should_save": true,
2527
+ "should_training_stop": false
2528
+ },
2529
+ "attributes": {}
2530
+ }
2531
+ },
2532
+ "total_flos": 0.0,
2533
+ "train_batch_size": 32,
2534
+ "trial_name": null,
2535
+ "trial_params": null
2536
+ }
checkpoint-11748/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298cfa7a9e669e98ab55937cac47ecfa89fb6a36f2afe18f46ba782a5f5bf5a4
3
+ size 5624