vangard703 commited on
Commit
7d0d63c
·
verified ·
1 Parent(s): 5df613b

Model save

Browse files
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: DPO-PairRM-5-SMI-lr-1e6-iteration-5-t-7e-beta-15e3-1-iteration-6e1-confidence-D1-D2_smi
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # DPO-PairRM-5-SMI-lr-1e6-iteration-5-t-7e-beta-15e3-1-iteration-6e1-confidence-D1-D2_smi
17
+
18
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.6444
21
+ - Rewards/chosen: -2.4793
22
+ - Rewards/rejected: -2.9560
23
+ - Rewards/accuracies: 0.6667
24
+ - Rewards/margins: 0.4767
25
+ - Rewards/mix Margin: 0.1749
26
+ - Logps/rejected: -481.8095
27
+ - Logps/chosen: -453.2426
28
+ - Logits/rejected: -1.7012
29
+ - Logits/chosen: -1.7287
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 1e-06
49
+ - train_batch_size: 1
50
+ - eval_batch_size: 1
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 4
54
+ - gradient_accumulation_steps: 16
55
+ - total_train_batch_size: 64
56
+ - total_eval_batch_size: 4
57
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
+ - lr_scheduler_type: cosine
59
+ - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 1
61
+
62
+ ### Training results
63
+
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 4.36.2
69
+ - Pytorch 2.1.2
70
+ - Datasets 2.17.1
71
+ - Tokenizers 0.15.1
all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -1.7287349700927734,
4
+ "eval_logits/rejected": -1.7011512517929077,
5
+ "eval_logps/chosen": -453.2426452636719,
6
+ "eval_logps/rejected": -481.8094787597656,
7
+ "eval_loss": 0.6444016695022583,
8
+ "eval_rewards/accuracies": 0.6666666865348816,
9
+ "eval_rewards/chosen": -2.479323625564575,
10
+ "eval_rewards/margins": 0.47671714425086975,
11
+ "eval_rewards/mix_margin": 0.17488937079906464,
12
+ "eval_rewards/rejected": -2.9560413360595703,
13
+ "eval_runtime": 716.6697,
14
+ "eval_samples": 1522,
15
+ "eval_samples_per_second": 2.124,
16
+ "eval_steps_per_second": 0.532,
17
+ "train_loss": 0.5442263817735228,
18
+ "train_runtime": 33688.2171,
19
+ "train_samples": 29394,
20
+ "train_samples_per_second": 0.873,
21
+ "train_steps_per_second": 0.014
22
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": null,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.36.2",
24
+ "use_cache": false,
25
+ "vocab_size": 32000
26
+ }
eval_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -1.7287349700927734,
4
+ "eval_logits/rejected": -1.7011512517929077,
5
+ "eval_logps/chosen": -453.2426452636719,
6
+ "eval_logps/rejected": -481.8094787597656,
7
+ "eval_loss": 0.6444016695022583,
8
+ "eval_rewards/accuracies": 0.6666666865348816,
9
+ "eval_rewards/chosen": -2.479323625564575,
10
+ "eval_rewards/margins": 0.47671714425086975,
11
+ "eval_rewards/mix_margin": 0.17488937079906464,
12
+ "eval_rewards/rejected": -2.9560413360595703,
13
+ "eval_runtime": 716.6697,
14
+ "eval_samples": 1522,
15
+ "eval_samples_per_second": 2.124,
16
+ "eval_steps_per_second": 0.532
17
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01145455128921bf94f24e0c1a8f6fea6dabc01cc32be8f13574c23bffda35ff
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69f3c130039a4d474ce0f547b42968d7e1763257e451ec31597f1049f5bb494
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ef9c7bcdb279ba1a9d8c2dd9c0ddfb82b401680153b3728b772fffe4a8c69d
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Apr25_01-20-17_alin22/events.out.tfevents.1713975746.alin22.3792232.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b6a2c4720840d27c586b81d2ecc18bac4f1a7a9949182ff4e8d3784582f69e
3
+ size 50553
runs/Apr25_01-20-17_alin22/events.out.tfevents.1714010446.alin22.3792232.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f69f05c2e9057754d2c13501454e231443297c0a73cc22a874e3855a6064f8
3
+ size 890
runs/Apr25_12-37-07_alin22/events.out.tfevents.1714016355.alin22.4154124.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7792cf2bd89521f4cd807add85a15aa519fdc420c9728b900a51b7b3db8f2c4a
3
+ size 49587
runs/Apr25_12-37-07_alin22/events.out.tfevents.1714050760.alin22.4154124.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c7cf0c73f963aa39d7cdae7643fd79a954139f34ea41cb116c4facf3014b45
3
+ size 890
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5442263817735228,
4
+ "train_runtime": 33688.2171,
5
+ "train_samples": 29394,
6
+ "train_samples_per_second": 0.873,
7
+ "train_steps_per_second": 0.014
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,900 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9993196353245339,
5
+ "eval_steps": 1000,
6
+ "global_step": 459,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 2.1739130434782606e-08,
14
+ "logits/chosen": -2.3381757736206055,
15
+ "logits/rejected": -2.3721675872802734,
16
+ "logps/chosen": -308.3559875488281,
17
+ "logps/rejected": -268.3885192871094,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/mix_margin": -1.0728836485895954e-08,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.02,
28
+ "learning_rate": 2.1739130434782607e-07,
29
+ "logits/chosen": -2.3499717712402344,
30
+ "logits/rejected": -2.30268931388855,
31
+ "logps/chosen": -347.79754638671875,
32
+ "logps/rejected": -304.29705810546875,
33
+ "loss": 0.6927,
34
+ "rewards/accuracies": 0.4513888955116272,
35
+ "rewards/chosen": 0.0014620342990383506,
36
+ "rewards/confidence": -0.011617942713201046,
37
+ "rewards/confidence_mean_diff": 0.011617942713201046,
38
+ "rewards/confidence_moving_diff": 0.0008721869089640677,
39
+ "rewards/margins": 0.0018856121459975839,
40
+ "rewards/mix_margin": 0.000961379031650722,
41
+ "rewards/real_percentage": 14.571428298950195,
42
+ "rewards/rejected": -0.00042357799247838557,
43
+ "step": 10
44
+ },
45
+ {
46
+ "epoch": 0.04,
47
+ "learning_rate": 4.3478260869565214e-07,
48
+ "logits/chosen": -2.4307799339294434,
49
+ "logits/rejected": -2.4313855171203613,
50
+ "logps/chosen": -337.871826171875,
51
+ "logps/rejected": -309.18695068359375,
52
+ "loss": 0.6902,
53
+ "rewards/accuracies": 0.5249999761581421,
54
+ "rewards/chosen": 0.018211863934993744,
55
+ "rewards/confidence": -0.013249741867184639,
56
+ "rewards/confidence_mean_diff": 0.013249741867184639,
57
+ "rewards/confidence_moving_diff": 0.0012787743471562862,
58
+ "rewards/margins": 0.004662538878619671,
59
+ "rewards/mix_margin": 0.002308625727891922,
60
+ "rewards/real_percentage": 8.800000190734863,
61
+ "rewards/rejected": 0.013549325987696648,
62
+ "step": 20
63
+ },
64
+ {
65
+ "epoch": 0.07,
66
+ "learning_rate": 6.521739130434782e-07,
67
+ "logits/chosen": -2.3925347328186035,
68
+ "logits/rejected": -2.3656105995178223,
69
+ "logps/chosen": -333.0788879394531,
70
+ "logps/rejected": -297.11865234375,
71
+ "loss": 0.679,
72
+ "rewards/accuracies": 0.6499999761581421,
73
+ "rewards/chosen": 0.07145825028419495,
74
+ "rewards/confidence": -0.036696191877126694,
75
+ "rewards/confidence_mean_diff": 0.036696191877126694,
76
+ "rewards/confidence_moving_diff": 0.0040471795946359634,
77
+ "rewards/margins": 0.04734768718481064,
78
+ "rewards/mix_margin": 0.024542126804590225,
79
+ "rewards/real_percentage": 7.400000095367432,
80
+ "rewards/rejected": 0.02411056123673916,
81
+ "step": 30
82
+ },
83
+ {
84
+ "epoch": 0.09,
85
+ "learning_rate": 8.695652173913043e-07,
86
+ "logits/chosen": -2.276813507080078,
87
+ "logits/rejected": -2.2625725269317627,
88
+ "logps/chosen": -302.2676696777344,
89
+ "logps/rejected": -285.21478271484375,
90
+ "loss": 0.6483,
91
+ "rewards/accuracies": 0.706250011920929,
92
+ "rewards/chosen": 0.041187357157468796,
93
+ "rewards/confidence": -0.08288822323083878,
94
+ "rewards/confidence_mean_diff": 0.08288822323083878,
95
+ "rewards/confidence_moving_diff": 0.0016981459921225905,
96
+ "rewards/margins": 0.11979661136865616,
97
+ "rewards/mix_margin": 0.06444107741117477,
98
+ "rewards/real_percentage": 5.599999904632568,
99
+ "rewards/rejected": -0.07860924303531647,
100
+ "step": 40
101
+ },
102
+ {
103
+ "epoch": 0.11,
104
+ "learning_rate": 9.997685665512417e-07,
105
+ "logits/chosen": -2.103567600250244,
106
+ "logits/rejected": -2.0963990688323975,
107
+ "logps/chosen": -310.5840759277344,
108
+ "logps/rejected": -280.5057373046875,
109
+ "loss": 0.6262,
110
+ "rewards/accuracies": 0.675000011920929,
111
+ "rewards/chosen": -0.07748699188232422,
112
+ "rewards/confidence": -0.15422135591506958,
113
+ "rewards/confidence_mean_diff": 0.15422135591506958,
114
+ "rewards/confidence_moving_diff": 0.007699769921600819,
115
+ "rewards/margins": 0.17081865668296814,
116
+ "rewards/mix_margin": 0.09792973846197128,
117
+ "rewards/real_percentage": 6.599999904632568,
118
+ "rewards/rejected": -0.24830564856529236,
119
+ "step": 50
120
+ },
121
+ {
122
+ "epoch": 0.13,
123
+ "learning_rate": 9.971674001050686e-07,
124
+ "logits/chosen": -1.7708213329315186,
125
+ "logits/rejected": -1.7266209125518799,
126
+ "logps/chosen": -368.86602783203125,
127
+ "logps/rejected": -356.1971740722656,
128
+ "loss": 0.5962,
129
+ "rewards/accuracies": 0.7250000238418579,
130
+ "rewards/chosen": -0.36815953254699707,
131
+ "rewards/confidence": -0.21500273048877716,
132
+ "rewards/confidence_mean_diff": 0.21500273048877716,
133
+ "rewards/confidence_moving_diff": 0.00331355188973248,
134
+ "rewards/margins": 0.3264981508255005,
135
+ "rewards/mix_margin": 0.19259774684906006,
136
+ "rewards/real_percentage": 6.800000190734863,
137
+ "rewards/rejected": -0.6946576833724976,
138
+ "step": 60
139
+ },
140
+ {
141
+ "epoch": 0.15,
142
+ "learning_rate": 9.916908694464492e-07,
143
+ "logits/chosen": -1.764725685119629,
144
+ "logits/rejected": -1.739040732383728,
145
+ "logps/chosen": -355.16314697265625,
146
+ "logps/rejected": -341.5367126464844,
147
+ "loss": 0.5938,
148
+ "rewards/accuracies": 0.75,
149
+ "rewards/chosen": -0.7925843000411987,
150
+ "rewards/confidence": -0.2245199978351593,
151
+ "rewards/confidence_mean_diff": 0.2245199978351593,
152
+ "rewards/confidence_moving_diff": 0.017536107450723648,
153
+ "rewards/margins": 0.3716357350349426,
154
+ "rewards/mix_margin": 0.21924924850463867,
155
+ "rewards/real_percentage": 5.800000190734863,
156
+ "rewards/rejected": -1.1642199754714966,
157
+ "step": 70
158
+ },
159
+ {
160
+ "epoch": 0.17,
161
+ "learning_rate": 9.833706480524877e-07,
162
+ "logits/chosen": -1.933165192604065,
163
+ "logits/rejected": -1.9561519622802734,
164
+ "logps/chosen": -402.5296325683594,
165
+ "logps/rejected": -389.52899169921875,
166
+ "loss": 0.5797,
167
+ "rewards/accuracies": 0.668749988079071,
168
+ "rewards/chosen": -1.0171725749969482,
169
+ "rewards/confidence": -0.4024506211280823,
170
+ "rewards/confidence_mean_diff": 0.4024506211280823,
171
+ "rewards/confidence_moving_diff": -0.013792142271995544,
172
+ "rewards/margins": 0.32845553755760193,
173
+ "rewards/mix_margin": 0.17139557003974915,
174
+ "rewards/real_percentage": 6.199999809265137,
175
+ "rewards/rejected": -1.345628023147583,
176
+ "step": 80
177
+ },
178
+ {
179
+ "epoch": 0.2,
180
+ "learning_rate": 9.722548558659455e-07,
181
+ "logits/chosen": -1.8798820972442627,
182
+ "logits/rejected": -1.87497878074646,
183
+ "logps/chosen": -400.71405029296875,
184
+ "logps/rejected": -409.4770812988281,
185
+ "loss": 0.5751,
186
+ "rewards/accuracies": 0.65625,
187
+ "rewards/chosen": -1.0970375537872314,
188
+ "rewards/confidence": -0.29926934838294983,
189
+ "rewards/confidence_mean_diff": 0.29926934838294983,
190
+ "rewards/confidence_moving_diff": 0.015591544099152088,
191
+ "rewards/margins": 0.3549976646900177,
192
+ "rewards/mix_margin": 0.20659112930297852,
193
+ "rewards/real_percentage": 6.800000190734863,
194
+ "rewards/rejected": -1.4520353078842163,
195
+ "step": 90
196
+ },
197
+ {
198
+ "epoch": 0.22,
199
+ "learning_rate": 9.584077809938854e-07,
200
+ "logits/chosen": -1.9064958095550537,
201
+ "logits/rejected": -1.8910179138183594,
202
+ "logps/chosen": -380.4066467285156,
203
+ "logps/rejected": -376.3758850097656,
204
+ "loss": 0.5538,
205
+ "rewards/accuracies": 0.737500011920929,
206
+ "rewards/chosen": -0.8957662582397461,
207
+ "rewards/confidence": -0.21787028014659882,
208
+ "rewards/confidence_mean_diff": 0.21787028014659882,
209
+ "rewards/confidence_moving_diff": -0.01832030713558197,
210
+ "rewards/margins": 0.5051447749137878,
211
+ "rewards/mix_margin": 0.28336718678474426,
212
+ "rewards/real_percentage": 5.599999904632568,
213
+ "rewards/rejected": -1.4009110927581787,
214
+ "step": 100
215
+ },
216
+ {
217
+ "epoch": 0.24,
218
+ "learning_rate": 9.419095078978505e-07,
219
+ "logits/chosen": -1.9809643030166626,
220
+ "logits/rejected": -1.9503233432769775,
221
+ "logps/chosen": -386.52081298828125,
222
+ "logps/rejected": -377.4222717285156,
223
+ "loss": 0.5741,
224
+ "rewards/accuracies": 0.699999988079071,
225
+ "rewards/chosen": -0.7311219573020935,
226
+ "rewards/confidence": -0.30758336186408997,
227
+ "rewards/confidence_mean_diff": 0.30758336186408997,
228
+ "rewards/confidence_moving_diff": 0.007133707404136658,
229
+ "rewards/margins": 0.41949161887168884,
230
+ "rewards/mix_margin": 0.2570333480834961,
231
+ "rewards/real_percentage": 6.800000190734863,
232
+ "rewards/rejected": -1.15061354637146,
233
+ "step": 110
234
+ },
235
+ {
236
+ "epoch": 0.26,
237
+ "learning_rate": 9.228554542259359e-07,
238
+ "logits/chosen": -2.112670421600342,
239
+ "logits/rejected": -2.097276210784912,
240
+ "logps/chosen": -356.1893615722656,
241
+ "logps/rejected": -397.5374450683594,
242
+ "loss": 0.5385,
243
+ "rewards/accuracies": 0.762499988079071,
244
+ "rewards/chosen": -0.8588116765022278,
245
+ "rewards/confidence": -0.3459985852241516,
246
+ "rewards/confidence_mean_diff": 0.3459985852241516,
247
+ "rewards/confidence_moving_diff": 0.003056983696296811,
248
+ "rewards/margins": 0.68636155128479,
249
+ "rewards/mix_margin": 0.2696496844291687,
250
+ "rewards/real_percentage": 6.400000095367432,
251
+ "rewards/rejected": -1.545173168182373,
252
+ "step": 120
253
+ },
254
+ {
255
+ "epoch": 0.28,
256
+ "learning_rate": 9.013558189654817e-07,
257
+ "logits/chosen": -1.9888393878936768,
258
+ "logits/rejected": -1.9572227001190186,
259
+ "logps/chosen": -455.81561279296875,
260
+ "logps/rejected": -461.1678161621094,
261
+ "loss": 0.5316,
262
+ "rewards/accuracies": 0.7562500238418579,
263
+ "rewards/chosen": -1.197947382926941,
264
+ "rewards/confidence": -0.27709904313087463,
265
+ "rewards/confidence_mean_diff": 0.27709904313087463,
266
+ "rewards/confidence_moving_diff": 0.008776068687438965,
267
+ "rewards/margins": 0.6648138165473938,
268
+ "rewards/mix_margin": 0.34475114941596985,
269
+ "rewards/real_percentage": 5.400000095367432,
270
+ "rewards/rejected": -1.86276113986969,
271
+ "step": 130
272
+ },
273
+ {
274
+ "epoch": 0.3,
275
+ "learning_rate": 8.775349451079948e-07,
276
+ "logits/chosen": -2.085186004638672,
277
+ "logits/rejected": -2.0593903064727783,
278
+ "logps/chosen": -428.7594299316406,
279
+ "logps/rejected": -433.14324951171875,
280
+ "loss": 0.536,
281
+ "rewards/accuracies": 0.7749999761581421,
282
+ "rewards/chosen": -1.1666946411132812,
283
+ "rewards/confidence": -0.3819279074668884,
284
+ "rewards/confidence_mean_diff": 0.3819279074668884,
285
+ "rewards/confidence_moving_diff": -0.0010703206062316895,
286
+ "rewards/margins": 0.5809696912765503,
287
+ "rewards/mix_margin": 0.281490296125412,
288
+ "rewards/real_percentage": 5.199999809265137,
289
+ "rewards/rejected": -1.747664451599121,
290
+ "step": 140
291
+ },
292
+ {
293
+ "epoch": 0.33,
294
+ "learning_rate": 8.515306005123179e-07,
295
+ "logits/chosen": -2.1095032691955566,
296
+ "logits/rejected": -2.10186505317688,
297
+ "logps/chosen": -407.43963623046875,
298
+ "logps/rejected": -430.080322265625,
299
+ "loss": 0.5424,
300
+ "rewards/accuracies": 0.612500011920929,
301
+ "rewards/chosen": -1.9244325160980225,
302
+ "rewards/confidence": -0.3746878504753113,
303
+ "rewards/confidence_mean_diff": 0.3746878504753113,
304
+ "rewards/confidence_moving_diff": 0.001715996884740889,
305
+ "rewards/margins": 0.4158695340156555,
306
+ "rewards/mix_margin": 0.18484942615032196,
307
+ "rewards/real_percentage": 5.800000190734863,
308
+ "rewards/rejected": -2.340301990509033,
309
+ "step": 150
310
+ },
311
+ {
312
+ "epoch": 0.35,
313
+ "learning_rate": 8.234931811251738e-07,
314
+ "logits/chosen": -2.0886027812957764,
315
+ "logits/rejected": -2.0623221397399902,
316
+ "logps/chosen": -434.42889404296875,
317
+ "logps/rejected": -436.99591064453125,
318
+ "loss": 0.5399,
319
+ "rewards/accuracies": 0.71875,
320
+ "rewards/chosen": -1.801783561706543,
321
+ "rewards/confidence": -0.35509809851646423,
322
+ "rewards/confidence_mean_diff": 0.35509809851646423,
323
+ "rewards/confidence_moving_diff": -0.013272729702293873,
324
+ "rewards/margins": 0.5741153955459595,
325
+ "rewards/mix_margin": 0.3070274591445923,
326
+ "rewards/real_percentage": 6.400000095367432,
327
+ "rewards/rejected": -2.375898838043213,
328
+ "step": 160
329
+ },
330
+ {
331
+ "epoch": 0.37,
332
+ "learning_rate": 7.935848411672499e-07,
333
+ "logits/chosen": -2.0530495643615723,
334
+ "logits/rejected": -2.0262792110443115,
335
+ "logps/chosen": -421.6261291503906,
336
+ "logps/rejected": -443.7255859375,
337
+ "loss": 0.5362,
338
+ "rewards/accuracies": 0.737500011920929,
339
+ "rewards/chosen": -1.8145627975463867,
340
+ "rewards/confidence": -0.3444231450557709,
341
+ "rewards/confidence_mean_diff": 0.3444231450557709,
342
+ "rewards/confidence_moving_diff": 0.011829972267150879,
343
+ "rewards/margins": 0.5010958909988403,
344
+ "rewards/mix_margin": 0.2390981912612915,
345
+ "rewards/real_percentage": 6.199999809265137,
346
+ "rewards/rejected": -2.3156585693359375,
347
+ "step": 170
348
+ },
349
+ {
350
+ "epoch": 0.39,
351
+ "learning_rate": 7.619785553153864e-07,
352
+ "logits/chosen": -1.9314157962799072,
353
+ "logits/rejected": -1.9014371633529663,
354
+ "logps/chosen": -421.787353515625,
355
+ "logps/rejected": -416.24273681640625,
356
+ "loss": 0.5448,
357
+ "rewards/accuracies": 0.762499988079071,
358
+ "rewards/chosen": -1.6310911178588867,
359
+ "rewards/confidence": -0.34312066435813904,
360
+ "rewards/confidence_mean_diff": 0.34312066435813904,
361
+ "rewards/confidence_moving_diff": -0.008907085284590721,
362
+ "rewards/margins": 0.6062546968460083,
363
+ "rewards/mix_margin": 0.31984299421310425,
364
+ "rewards/real_percentage": 6.0,
365
+ "rewards/rejected": -2.2373456954956055,
366
+ "step": 180
367
+ },
368
+ {
369
+ "epoch": 0.41,
370
+ "learning_rate": 7.288571183047321e-07,
371
+ "logits/chosen": -1.8551080226898193,
372
+ "logits/rejected": -1.8303585052490234,
373
+ "logps/chosen": -430.4051208496094,
374
+ "logps/rejected": -433.05743408203125,
375
+ "loss": 0.5427,
376
+ "rewards/accuracies": 0.8125,
377
+ "rewards/chosen": -1.7081422805786133,
378
+ "rewards/confidence": -0.3352188169956207,
379
+ "rewards/confidence_mean_diff": 0.3352188169956207,
380
+ "rewards/confidence_moving_diff": 0.016140591353178024,
381
+ "rewards/margins": 0.6280410289764404,
382
+ "rewards/mix_margin": 0.3198699355125427,
383
+ "rewards/real_percentage": 6.199999809265137,
384
+ "rewards/rejected": -2.3361833095550537,
385
+ "step": 190
386
+ },
387
+ {
388
+ "epoch": 0.44,
389
+ "learning_rate": 6.944120877366604e-07,
390
+ "logits/chosen": -1.8757364749908447,
391
+ "logits/rejected": -1.862011194229126,
392
+ "logps/chosen": -449.3797302246094,
393
+ "logps/rejected": -497.9132385253906,
394
+ "loss": 0.5307,
395
+ "rewards/accuracies": 0.793749988079071,
396
+ "rewards/chosen": -2.089892864227295,
397
+ "rewards/confidence": -0.4818331301212311,
398
+ "rewards/confidence_mean_diff": 0.4818331301212311,
399
+ "rewards/confidence_moving_diff": 0.015796076506376266,
400
+ "rewards/margins": 0.6883096694946289,
401
+ "rewards/mix_margin": 0.28093141317367554,
402
+ "rewards/real_percentage": 6.400000095367432,
403
+ "rewards/rejected": -2.778202533721924,
404
+ "step": 200
405
+ },
406
+ {
407
+ "epoch": 0.46,
408
+ "learning_rate": 6.588426762067099e-07,
409
+ "logits/chosen": -1.7704166173934937,
410
+ "logits/rejected": -1.7364553213119507,
411
+ "logps/chosen": -506.0335998535156,
412
+ "logps/rejected": -564.422607421875,
413
+ "loss": 0.5411,
414
+ "rewards/accuracies": 0.8187500238418579,
415
+ "rewards/chosen": -2.170118570327759,
416
+ "rewards/confidence": -0.4565798342227936,
417
+ "rewards/confidence_mean_diff": 0.4565798342227936,
418
+ "rewards/confidence_moving_diff": -0.0030906558968126774,
419
+ "rewards/margins": 0.7355421781539917,
420
+ "rewards/mix_margin": 0.2624685764312744,
421
+ "rewards/real_percentage": 6.0,
422
+ "rewards/rejected": -2.905660390853882,
423
+ "step": 210
424
+ },
425
+ {
426
+ "epoch": 0.48,
427
+ "learning_rate": 6.223545991599182e-07,
428
+ "logits/chosen": -1.8121325969696045,
429
+ "logits/rejected": -1.7733787298202515,
430
+ "logps/chosen": -460.2236328125,
431
+ "logps/rejected": -474.4139709472656,
432
+ "loss": 0.5638,
433
+ "rewards/accuracies": 0.699999988079071,
434
+ "rewards/chosen": -1.9526455402374268,
435
+ "rewards/confidence": -0.44291096925735474,
436
+ "rewards/confidence_mean_diff": 0.44291096925735474,
437
+ "rewards/confidence_moving_diff": -0.026620090007781982,
438
+ "rewards/margins": 0.5833476185798645,
439
+ "rewards/mix_margin": 0.28964775800704956,
440
+ "rewards/real_percentage": 5.599999904632568,
441
+ "rewards/rejected": -2.5359930992126465,
442
+ "step": 220
443
+ },
444
+ {
445
+ "epoch": 0.5,
446
+ "learning_rate": 5.85158885136973e-07,
447
+ "logits/chosen": -1.9277960062026978,
448
+ "logits/rejected": -1.8827558755874634,
449
+ "logps/chosen": -423.71197509765625,
450
+ "logps/rejected": -446.4361267089844,
451
+ "loss": 0.5302,
452
+ "rewards/accuracies": 0.75,
453
+ "rewards/chosen": -1.8649892807006836,
454
+ "rewards/confidence": -0.3467513918876648,
455
+ "rewards/confidence_mean_diff": 0.3467513918876648,
456
+ "rewards/confidence_moving_diff": -0.005762821529060602,
457
+ "rewards/margins": 0.5457550883293152,
458
+ "rewards/mix_margin": 0.22559857368469238,
459
+ "rewards/real_percentage": 5.400000095367432,
460
+ "rewards/rejected": -2.4107441902160645,
461
+ "step": 230
462
+ },
463
+ {
464
+ "epoch": 0.52,
465
+ "learning_rate": 5.474706552921074e-07,
466
+ "logits/chosen": -1.935349464416504,
467
+ "logits/rejected": -1.8973939418792725,
468
+ "logps/chosen": -395.6863708496094,
469
+ "logps/rejected": -423.58782958984375,
470
+ "loss": 0.5307,
471
+ "rewards/accuracies": 0.8187500238418579,
472
+ "rewards/chosen": -1.5539497137069702,
473
+ "rewards/confidence": -0.293173611164093,
474
+ "rewards/confidence_mean_diff": 0.293173611164093,
475
+ "rewards/confidence_moving_diff": -0.00015986264043021947,
476
+ "rewards/margins": 0.6723454594612122,
477
+ "rewards/mix_margin": 0.30846795439720154,
478
+ "rewards/real_percentage": 6.199999809265137,
479
+ "rewards/rejected": -2.226295232772827,
480
+ "step": 240
481
+ },
482
+ {
483
+ "epoch": 0.54,
484
+ "learning_rate": 5.095078792413976e-07,
485
+ "logits/chosen": -1.9067703485488892,
486
+ "logits/rejected": -1.8853263854980469,
487
+ "logps/chosen": -412.2330627441406,
488
+ "logps/rejected": -446.505859375,
489
+ "loss": 0.5122,
490
+ "rewards/accuracies": 0.75,
491
+ "rewards/chosen": -1.381047010421753,
492
+ "rewards/confidence": -0.4467477798461914,
493
+ "rewards/confidence_mean_diff": 0.4467477798461914,
494
+ "rewards/confidence_moving_diff": 0.001344837248325348,
495
+ "rewards/margins": 0.7658092975616455,
496
+ "rewards/mix_margin": 0.30207857489585876,
497
+ "rewards/real_percentage": 7.0,
498
+ "rewards/rejected": -2.1468563079833984,
499
+ "step": 250
500
+ },
501
+ {
502
+ "epoch": 0.57,
503
+ "learning_rate": 4.714901144369981e-07,
504
+ "logits/chosen": -1.9301033020019531,
505
+ "logits/rejected": -1.8551814556121826,
506
+ "logps/chosen": -422.6144104003906,
507
+ "logps/rejected": -459.46405029296875,
508
+ "loss": 0.5135,
509
+ "rewards/accuracies": 0.8187500238418579,
510
+ "rewards/chosen": -1.9298696517944336,
511
+ "rewards/confidence": -0.27530989050865173,
512
+ "rewards/confidence_mean_diff": 0.27530989050865173,
513
+ "rewards/confidence_moving_diff": -0.011671778745949268,
514
+ "rewards/margins": 0.7381618618965149,
515
+ "rewards/mix_margin": 0.3380785584449768,
516
+ "rewards/real_percentage": 6.199999809265137,
517
+ "rewards/rejected": -2.668031930923462,
518
+ "step": 260
519
+ },
520
+ {
521
+ "epoch": 0.59,
522
+ "learning_rate": 4.33637236358139e-07,
523
+ "logits/chosen": -1.9588849544525146,
524
+ "logits/rejected": -1.9613529443740845,
525
+ "logps/chosen": -454.5,
526
+ "logps/rejected": -473.86376953125,
527
+ "loss": 0.5282,
528
+ "rewards/accuracies": 0.731249988079071,
529
+ "rewards/chosen": -1.8807973861694336,
530
+ "rewards/confidence": -0.46479693055152893,
531
+ "rewards/confidence_mean_diff": 0.46479693055152893,
532
+ "rewards/confidence_moving_diff": 0.034666456282138824,
533
+ "rewards/margins": 0.520012378692627,
534
+ "rewards/mix_margin": 0.2387557029724121,
535
+ "rewards/real_percentage": 7.0,
536
+ "rewards/rejected": -2.4008097648620605,
537
+ "step": 270
538
+ },
539
+ {
540
+ "epoch": 0.61,
541
+ "learning_rate": 3.9616816686281635e-07,
542
+ "logits/chosen": -1.82375168800354,
543
+ "logits/rejected": -1.8041833639144897,
544
+ "logps/chosen": -437.76922607421875,
545
+ "logps/rejected": -447.7157287597656,
546
+ "loss": 0.4995,
547
+ "rewards/accuracies": 0.7875000238418579,
548
+ "rewards/chosen": -1.6059573888778687,
549
+ "rewards/confidence": -0.39664340019226074,
550
+ "rewards/confidence_mean_diff": 0.39664340019226074,
551
+ "rewards/confidence_moving_diff": -0.011675971560180187,
552
+ "rewards/margins": 0.5647900104522705,
553
+ "rewards/mix_margin": 0.2936910390853882,
554
+ "rewards/real_percentage": 6.199999809265137,
555
+ "rewards/rejected": -2.1707472801208496,
556
+ "step": 280
557
+ },
558
+ {
559
+ "epoch": 0.63,
560
+ "learning_rate": 3.592996080547438e-07,
561
+ "logits/chosen": -1.786231279373169,
562
+ "logits/rejected": -1.7279274463653564,
563
+ "logps/chosen": -411.953125,
564
+ "logps/rejected": -432.6134338378906,
565
+ "loss": 0.5355,
566
+ "rewards/accuracies": 0.7749999761581421,
567
+ "rewards/chosen": -1.9004911184310913,
568
+ "rewards/confidence": -0.3363880217075348,
569
+ "rewards/confidence_mean_diff": 0.3363880217075348,
570
+ "rewards/confidence_moving_diff": 0.010175123810768127,
571
+ "rewards/margins": 0.7472087144851685,
572
+ "rewards/mix_margin": 0.35827359557151794,
573
+ "rewards/real_percentage": 6.800000190734863,
574
+ "rewards/rejected": -2.6476995944976807,
575
+ "step": 290
576
+ },
577
+ {
578
+ "epoch": 0.65,
579
+ "learning_rate": 3.23244788988237e-07,
580
+ "logits/chosen": -1.7489144802093506,
581
+ "logits/rejected": -1.6973133087158203,
582
+ "logps/chosen": -410.25103759765625,
583
+ "logps/rejected": -413.9371032714844,
584
+ "loss": 0.4997,
585
+ "rewards/accuracies": 0.7875000238418579,
586
+ "rewards/chosen": -2.2288432121276855,
587
+ "rewards/confidence": -0.4106927514076233,
588
+ "rewards/confidence_mean_diff": 0.4106927514076233,
589
+ "rewards/confidence_moving_diff": 0.02881682850420475,
590
+ "rewards/margins": 0.6875227689743042,
591
+ "rewards/mix_margin": 0.32858434319496155,
592
+ "rewards/real_percentage": 6.400000095367432,
593
+ "rewards/rejected": -2.9163661003112793,
594
+ "step": 300
595
+ },
596
+ {
597
+ "epoch": 0.67,
598
+ "learning_rate": 2.8821223245945747e-07,
599
+ "logits/chosen": -1.6137434244155884,
600
+ "logits/rejected": -1.552942156791687,
601
+ "logps/chosen": -464.08477783203125,
602
+ "logps/rejected": -489.72833251953125,
603
+ "loss": 0.5231,
604
+ "rewards/accuracies": 0.78125,
605
+ "rewards/chosen": -2.016467332839966,
606
+ "rewards/confidence": -0.37749093770980835,
607
+ "rewards/confidence_mean_diff": 0.37749093770980835,
608
+ "rewards/confidence_moving_diff": -0.04578464850783348,
609
+ "rewards/margins": 0.7573314905166626,
610
+ "rewards/mix_margin": 0.3511705994606018,
611
+ "rewards/real_percentage": 6.599999904632568,
612
+ "rewards/rejected": -2.773798704147339,
613
+ "step": 310
614
+ },
615
+ {
616
+ "epoch": 0.7,
617
+ "learning_rate": 2.5440454901626487e-07,
618
+ "logits/chosen": -1.6135238409042358,
619
+ "logits/rejected": -1.5706686973571777,
620
+ "logps/chosen": -462.1180725097656,
621
+ "logps/rejected": -497.38104248046875,
622
+ "loss": 0.5229,
623
+ "rewards/accuracies": 0.7749999761581421,
624
+ "rewards/chosen": -2.4821410179138184,
625
+ "rewards/confidence": -0.35573774576187134,
626
+ "rewards/confidence_mean_diff": 0.35573774576187134,
627
+ "rewards/confidence_moving_diff": 0.010884365066885948,
628
+ "rewards/margins": 0.6469907760620117,
629
+ "rewards/mix_margin": 0.28311434388160706,
630
+ "rewards/real_percentage": 6.800000190734863,
631
+ "rewards/rejected": -3.12913179397583,
632
+ "step": 320
633
+ },
634
+ {
635
+ "epoch": 0.72,
636
+ "learning_rate": 2.220172651615188e-07,
637
+ "logits/chosen": -1.5926752090454102,
638
+ "logits/rejected": -1.5634915828704834,
639
+ "logps/chosen": -456.4151306152344,
640
+ "logps/rejected": -458.0672912597656,
641
+ "loss": 0.4975,
642
+ "rewards/accuracies": 0.7749999761581421,
643
+ "rewards/chosen": -1.9481618404388428,
644
+ "rewards/confidence": -0.35025110840797424,
645
+ "rewards/confidence_mean_diff": 0.35025110840797424,
646
+ "rewards/confidence_moving_diff": -0.0056252507492899895,
647
+ "rewards/margins": 0.725985050201416,
648
+ "rewards/mix_margin": 0.387787401676178,
649
+ "rewards/real_percentage": 5.400000095367432,
650
+ "rewards/rejected": -2.674147129058838,
651
+ "step": 330
652
+ },
653
+ {
654
+ "epoch": 0.74,
655
+ "learning_rate": 1.9123769252690407e-07,
656
+ "logits/chosen": -1.6937768459320068,
657
+ "logits/rejected": -1.6783796548843384,
658
+ "logps/chosen": -435.18768310546875,
659
+ "logps/rejected": -442.1273498535156,
660
+ "loss": 0.4829,
661
+ "rewards/accuracies": 0.768750011920929,
662
+ "rewards/chosen": -1.9751695394515991,
663
+ "rewards/confidence": -0.37928253412246704,
664
+ "rewards/confidence_mean_diff": 0.37928253412246704,
665
+ "rewards/confidence_moving_diff": -0.027161384001374245,
666
+ "rewards/margins": 0.6305011510848999,
667
+ "rewards/mix_margin": 0.3498138189315796,
668
+ "rewards/real_percentage": 5.800000190734863,
669
+ "rewards/rejected": -2.60567045211792,
670
+ "step": 340
671
+ },
672
+ {
673
+ "epoch": 0.76,
674
+ "learning_rate": 1.6224384455740787e-07,
675
+ "logits/chosen": -1.710269570350647,
676
+ "logits/rejected": -1.652931809425354,
677
+ "logps/chosen": -467.62591552734375,
678
+ "logps/rejected": -491.04217529296875,
679
+ "loss": 0.5033,
680
+ "rewards/accuracies": 0.737500011920929,
681
+ "rewards/chosen": -2.6455416679382324,
682
+ "rewards/confidence": -0.3581121265888214,
683
+ "rewards/confidence_mean_diff": 0.3581121265888214,
684
+ "rewards/confidence_moving_diff": 0.025283757597208023,
685
+ "rewards/margins": 0.623335063457489,
686
+ "rewards/mix_margin": 0.28707486391067505,
687
+ "rewards/real_percentage": 7.599999904632568,
688
+ "rewards/rejected": -3.268876552581787,
689
+ "step": 350
690
+ },
691
+ {
692
+ "epoch": 0.78,
693
+ "learning_rate": 1.3520340697179406e-07,
694
+ "logits/chosen": -1.6668322086334229,
695
+ "logits/rejected": -1.6106383800506592,
696
+ "logps/chosen": -469.1095275878906,
697
+ "logps/rejected": -478.17449951171875,
698
+ "loss": 0.5166,
699
+ "rewards/accuracies": 0.75,
700
+ "rewards/chosen": -2.123115062713623,
701
+ "rewards/confidence": -0.4552002549171448,
702
+ "rewards/confidence_mean_diff": 0.4552002549171448,
703
+ "rewards/confidence_moving_diff": 0.0014173805247992277,
704
+ "rewards/margins": 0.6338737607002258,
705
+ "rewards/mix_margin": 0.3297014832496643,
706
+ "rewards/real_percentage": 6.0,
707
+ "rewards/rejected": -2.756988763809204,
708
+ "step": 360
709
+ },
710
+ {
711
+ "epoch": 0.81,
712
+ "learning_rate": 1.1027276795341134e-07,
713
+ "logits/chosen": -1.5943629741668701,
714
+ "logits/rejected": -1.6161006689071655,
715
+ "logps/chosen": -467.4046325683594,
716
+ "logps/rejected": -490.90777587890625,
717
+ "loss": 0.5108,
718
+ "rewards/accuracies": 0.731249988079071,
719
+ "rewards/chosen": -2.3414371013641357,
720
+ "rewards/confidence": -0.44518932700157166,
721
+ "rewards/confidence_mean_diff": 0.44518932700157166,
722
+ "rewards/confidence_moving_diff": 0.0043337224051356316,
723
+ "rewards/margins": 0.6999163627624512,
724
+ "rewards/mix_margin": 0.32578808069229126,
725
+ "rewards/real_percentage": 6.0,
726
+ "rewards/rejected": -3.041353464126587,
727
+ "step": 370
728
+ },
729
+ {
730
+ "epoch": 0.83,
731
+ "learning_rate": 8.75961136802183e-08,
732
+ "logits/chosen": -1.6814724206924438,
733
+ "logits/rejected": -1.6028457880020142,
734
+ "logps/chosen": -474.9368591308594,
735
+ "logps/rejected": -513.1763916015625,
736
+ "loss": 0.4979,
737
+ "rewards/accuracies": 0.7437499761581421,
738
+ "rewards/chosen": -2.2711257934570312,
739
+ "rewards/confidence": -0.42974844574928284,
740
+ "rewards/confidence_mean_diff": 0.42974844574928284,
741
+ "rewards/confidence_moving_diff": 0.018308240920305252,
742
+ "rewards/margins": 0.8098933100700378,
743
+ "rewards/mix_margin": 0.3282521367073059,
744
+ "rewards/real_percentage": 7.0,
745
+ "rewards/rejected": -3.081019163131714,
746
+ "step": 380
747
+ },
748
+ {
749
+ "epoch": 0.85,
750
+ "learning_rate": 6.730459442502328e-08,
751
+ "logits/chosen": -1.6732743978500366,
752
+ "logits/rejected": -1.6020500659942627,
753
+ "logps/chosen": -492.012939453125,
754
+ "logps/rejected": -516.7794189453125,
755
+ "loss": 0.5067,
756
+ "rewards/accuracies": 0.8062499761581421,
757
+ "rewards/chosen": -2.0890817642211914,
758
+ "rewards/confidence": -0.4012150168418884,
759
+ "rewards/confidence_mean_diff": 0.4012150168418884,
760
+ "rewards/confidence_moving_diff": -0.030136477202177048,
761
+ "rewards/margins": 0.7823335528373718,
762
+ "rewards/mix_margin": 0.3618648648262024,
763
+ "rewards/real_percentage": 5.800000190734863,
764
+ "rewards/rejected": -2.871415376663208,
765
+ "step": 390
766
+ },
767
+ {
768
+ "epoch": 0.87,
769
+ "learning_rate": 4.951556604879048e-08,
770
+ "logits/chosen": -1.6667470932006836,
771
+ "logits/rejected": -1.634762167930603,
772
+ "logps/chosen": -512.33251953125,
773
+ "logps/rejected": -515.0343017578125,
774
+ "loss": 0.5576,
775
+ "rewards/accuracies": 0.737500011920929,
776
+ "rewards/chosen": -2.8803839683532715,
777
+ "rewards/confidence": -0.4185534119606018,
778
+ "rewards/confidence_mean_diff": 0.4185534119606018,
779
+ "rewards/confidence_moving_diff": 0.005899274256080389,
780
+ "rewards/margins": 0.5610276460647583,
781
+ "rewards/mix_margin": 0.29152485728263855,
782
+ "rewards/real_percentage": 6.400000095367432,
783
+ "rewards/rejected": -3.4414114952087402,
784
+ "step": 400
785
+ },
786
+ {
787
+ "epoch": 0.89,
788
+ "learning_rate": 3.4331911273830784e-08,
789
+ "logits/chosen": -1.7730276584625244,
790
+ "logits/rejected": -1.7303050756454468,
791
+ "logps/chosen": -459.1253967285156,
792
+ "logps/rejected": -487.03375244140625,
793
+ "loss": 0.5318,
794
+ "rewards/accuracies": 0.768750011920929,
795
+ "rewards/chosen": -2.33591365814209,
796
+ "rewards/confidence": -0.42727431654930115,
797
+ "rewards/confidence_mean_diff": 0.42727431654930115,
798
+ "rewards/confidence_moving_diff": 0.02859329618513584,
799
+ "rewards/margins": 0.6499987244606018,
800
+ "rewards/mix_margin": 0.3042239546775818,
801
+ "rewards/real_percentage": 7.199999809265137,
802
+ "rewards/rejected": -2.985912322998047,
803
+ "step": 410
804
+ },
805
+ {
806
+ "epoch": 0.91,
807
+ "learning_rate": 2.184144466229154e-08,
808
+ "logits/chosen": -1.7364966869354248,
809
+ "logits/rejected": -1.716314673423767,
810
+ "logps/chosen": -482.951416015625,
811
+ "logps/rejected": -509.6161193847656,
812
+ "loss": 0.4949,
813
+ "rewards/accuracies": 0.78125,
814
+ "rewards/chosen": -2.3884456157684326,
815
+ "rewards/confidence": -0.4644384980201721,
816
+ "rewards/confidence_mean_diff": 0.4644384980201721,
817
+ "rewards/confidence_moving_diff": 0.015452265739440918,
818
+ "rewards/margins": 0.7966889142990112,
819
+ "rewards/mix_margin": 0.33610981702804565,
820
+ "rewards/real_percentage": 6.400000095367432,
821
+ "rewards/rejected": -3.1851344108581543,
822
+ "step": 420
823
+ },
824
+ {
825
+ "epoch": 0.94,
826
+ "learning_rate": 1.2116404741244202e-08,
827
+ "logits/chosen": -1.6397075653076172,
828
+ "logits/rejected": -1.581413984298706,
829
+ "logps/chosen": -482.9574279785156,
830
+ "logps/rejected": -507.77972412109375,
831
+ "loss": 0.4865,
832
+ "rewards/accuracies": 0.8187500238418579,
833
+ "rewards/chosen": -2.1570003032684326,
834
+ "rewards/confidence": -0.5490925908088684,
835
+ "rewards/confidence_mean_diff": 0.5490925908088684,
836
+ "rewards/confidence_moving_diff": -0.04263176769018173,
837
+ "rewards/margins": 0.6673628091812134,
838
+ "rewards/mix_margin": 0.2867775559425354,
839
+ "rewards/real_percentage": 5.599999904632568,
840
+ "rewards/rejected": -2.8243632316589355,
841
+ "step": 430
842
+ },
843
+ {
844
+ "epoch": 0.96,
845
+ "learning_rate": 5.2130362116641905e-09,
846
+ "logits/chosen": -1.661386251449585,
847
+ "logits/rejected": -1.655612587928772,
848
+ "logps/chosen": -480.28643798828125,
849
+ "logps/rejected": -493.21697998046875,
850
+ "loss": 0.5137,
851
+ "rewards/accuracies": 0.7749999761581421,
852
+ "rewards/chosen": -2.342162609100342,
853
+ "rewards/confidence": -0.37716978788375854,
854
+ "rewards/confidence_mean_diff": 0.37716978788375854,
855
+ "rewards/confidence_moving_diff": 0.004406067542731762,
856
+ "rewards/margins": 0.6741171479225159,
857
+ "rewards/mix_margin": 0.2875633239746094,
858
+ "rewards/real_percentage": 6.599999904632568,
859
+ "rewards/rejected": -3.016279935836792,
860
+ "step": 440
861
+ },
862
+ {
863
+ "epoch": 0.98,
864
+ "learning_rate": 1.1712646575922636e-09,
865
+ "logits/chosen": -1.7303485870361328,
866
+ "logits/rejected": -1.7037239074707031,
867
+ "logps/chosen": -473.63946533203125,
868
+ "logps/rejected": -522.4127197265625,
869
+ "loss": 0.4808,
870
+ "rewards/accuracies": 0.824999988079071,
871
+ "rewards/chosen": -2.1520164012908936,
872
+ "rewards/confidence": -0.3412160277366638,
873
+ "rewards/confidence_mean_diff": 0.3412160277366638,
874
+ "rewards/confidence_moving_diff": 0.019679775461554527,
875
+ "rewards/margins": 1.036991000175476,
876
+ "rewards/mix_margin": 0.3758584260940552,
877
+ "rewards/real_percentage": 6.800000190734863,
878
+ "rewards/rejected": -3.189007520675659,
879
+ "step": 450
880
+ },
881
+ {
882
+ "epoch": 1.0,
883
+ "step": 459,
884
+ "total_flos": 0.0,
885
+ "train_loss": 0.5442263817735228,
886
+ "train_runtime": 33688.2171,
887
+ "train_samples_per_second": 0.873,
888
+ "train_steps_per_second": 0.014
889
+ }
890
+ ],
891
+ "logging_steps": 10,
892
+ "max_steps": 459,
893
+ "num_input_tokens_seen": 0,
894
+ "num_train_epochs": 1,
895
+ "save_steps": 200,
896
+ "total_flos": 0.0,
897
+ "train_batch_size": 1,
898
+ "trial_name": null,
899
+ "trial_params": null
900
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da3eb928776786e44106432d6b86ad9ea55535f232801cc5d780f06ca84011f
3
+ size 6200