ydeng9 commited on
Commit
a36a681
1 Parent(s): 8f86417

Model first version

Browse files
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "UCLA-AGI/zephyr-7b-sft-full-SPIN-iter0",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 10000.0,
19
+ "sliding_window": 4096,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.35.0",
23
+ "use_cache": false,
24
+ "vocab_size": 32000
25
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.35.0"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2515f61594b642dfd75582db9d92f54be8abf4e7ff701c4960f8f0b7db26003e
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dc433836aafde71b58f7eddab07652674034f3ba508146425b4a4f5e08ee582
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2632184e8f07705e4e1f3e9ee3d3377c9ec1985941d29c24533229cde030efc
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "bos_token": "<s>",
30
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": true,
34
+ "model_max_length": 2048,
35
+ "pad_token": "</s>",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": true
41
+ }
trainer_state.json ADDED
@@ -0,0 +1,2443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 100,
6
+ "global_step": 1556,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.0706638115631692e-09,
14
+ "logits/chosen": -3.0633435249328613,
15
+ "logits/rejected": -3.0370049476623535,
16
+ "logps/chosen": -237.29315185546875,
17
+ "logps/rejected": -251.69747924804688,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 1.070663811563169e-08,
28
+ "logits/chosen": -2.990461826324463,
29
+ "logits/rejected": -3.0024797916412354,
30
+ "logps/chosen": -356.6201171875,
31
+ "logps/rejected": -390.87042236328125,
32
+ "loss": 0.6911,
33
+ "rewards/accuracies": 0.5555555820465088,
34
+ "rewards/chosen": -0.004924382548779249,
35
+ "rewards/margins": 0.009135871194303036,
36
+ "rewards/rejected": -0.014060255140066147,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.03,
41
+ "learning_rate": 2.141327623126338e-08,
42
+ "logits/chosen": -3.002528429031372,
43
+ "logits/rejected": -3.0017483234405518,
44
+ "logps/chosen": -350.7555847167969,
45
+ "logps/rejected": -393.46014404296875,
46
+ "loss": 0.6801,
47
+ "rewards/accuracies": 0.6625000238418579,
48
+ "rewards/chosen": 0.050124846398830414,
49
+ "rewards/margins": 0.028588850051164627,
50
+ "rewards/rejected": 0.02153599075973034,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.04,
55
+ "learning_rate": 3.2119914346895076e-08,
56
+ "logits/chosen": -2.975447416305542,
57
+ "logits/rejected": -3.0126380920410156,
58
+ "logps/chosen": -375.95391845703125,
59
+ "logps/rejected": -432.83587646484375,
60
+ "loss": 0.6435,
61
+ "rewards/accuracies": 0.737500011920929,
62
+ "rewards/chosen": 0.2032477855682373,
63
+ "rewards/margins": 0.1010356992483139,
64
+ "rewards/rejected": 0.1022120863199234,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.05,
69
+ "learning_rate": 4.282655246252676e-08,
70
+ "logits/chosen": -3.0026869773864746,
71
+ "logits/rejected": -2.9945485591888428,
72
+ "logps/chosen": -383.3456115722656,
73
+ "logps/rejected": -392.7911376953125,
74
+ "loss": 0.5784,
75
+ "rewards/accuracies": 0.7875000238418579,
76
+ "rewards/chosen": 0.4273909628391266,
77
+ "rewards/margins": 0.30088725686073303,
78
+ "rewards/rejected": 0.12650372087955475,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.06,
83
+ "learning_rate": 5.353319057815846e-08,
84
+ "logits/chosen": -2.989891529083252,
85
+ "logits/rejected": -2.996675968170166,
86
+ "logps/chosen": -339.07513427734375,
87
+ "logps/rejected": -373.727783203125,
88
+ "loss": 0.5345,
89
+ "rewards/accuracies": 0.75,
90
+ "rewards/chosen": 0.6149066686630249,
91
+ "rewards/margins": 0.39920732378959656,
92
+ "rewards/rejected": 0.21569931507110596,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.08,
97
+ "learning_rate": 6.423982869379015e-08,
98
+ "logits/chosen": -3.026094913482666,
99
+ "logits/rejected": -2.9982128143310547,
100
+ "logps/chosen": -327.8692321777344,
101
+ "logps/rejected": -375.9877624511719,
102
+ "loss": 0.4485,
103
+ "rewards/accuracies": 0.875,
104
+ "rewards/chosen": 0.6913961172103882,
105
+ "rewards/margins": 0.7190420031547546,
106
+ "rewards/rejected": -0.02764584682881832,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.09,
111
+ "learning_rate": 7.494646680942184e-08,
112
+ "logits/chosen": -2.974823474884033,
113
+ "logits/rejected": -2.980032444000244,
114
+ "logps/chosen": -351.2728576660156,
115
+ "logps/rejected": -395.68609619140625,
116
+ "loss": 0.3966,
117
+ "rewards/accuracies": 0.887499988079071,
118
+ "rewards/chosen": 0.8828132748603821,
119
+ "rewards/margins": 0.9640719294548035,
120
+ "rewards/rejected": -0.08125858008861542,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.1,
125
+ "learning_rate": 8.565310492505352e-08,
126
+ "logits/chosen": -2.977529287338257,
127
+ "logits/rejected": -2.9725558757781982,
128
+ "logps/chosen": -359.2842712402344,
129
+ "logps/rejected": -405.7890625,
130
+ "loss": 0.3519,
131
+ "rewards/accuracies": 0.887499988079071,
132
+ "rewards/chosen": 1.091180682182312,
133
+ "rewards/margins": 1.2520115375518799,
134
+ "rewards/rejected": -0.16083075106143951,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.12,
139
+ "learning_rate": 9.635974304068522e-08,
140
+ "logits/chosen": -2.979015827178955,
141
+ "logits/rejected": -2.9813497066497803,
142
+ "logps/chosen": -309.3511047363281,
143
+ "logps/rejected": -358.91607666015625,
144
+ "loss": 0.3201,
145
+ "rewards/accuracies": 0.949999988079071,
146
+ "rewards/chosen": 1.2030521631240845,
147
+ "rewards/margins": 1.6773903369903564,
148
+ "rewards/rejected": -0.4743381440639496,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.13,
153
+ "learning_rate": 1.0706638115631692e-07,
154
+ "logits/chosen": -2.941194534301758,
155
+ "logits/rejected": -2.9548678398132324,
156
+ "logps/chosen": -343.6178894042969,
157
+ "logps/rejected": -463.1512145996094,
158
+ "loss": 0.2696,
159
+ "rewards/accuracies": 0.925000011920929,
160
+ "rewards/chosen": 1.2106283903121948,
161
+ "rewards/margins": 1.9713561534881592,
162
+ "rewards/rejected": -0.7607278823852539,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.13,
167
+ "eval_logits/chosen": -2.977161169052124,
168
+ "eval_logits/rejected": -2.957442045211792,
169
+ "eval_logps/chosen": -296.8330383300781,
170
+ "eval_logps/rejected": -349.66558837890625,
171
+ "eval_loss": 0.2511790990829468,
172
+ "eval_rewards/accuracies": 0.921875,
173
+ "eval_rewards/chosen": 1.1878268718719482,
174
+ "eval_rewards/margins": 1.8798556327819824,
175
+ "eval_rewards/rejected": -0.6920287609100342,
176
+ "eval_runtime": 38.7534,
177
+ "eval_samples_per_second": 12.902,
178
+ "eval_steps_per_second": 0.413,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 0.14,
183
+ "learning_rate": 1.177730192719486e-07,
184
+ "logits/chosen": -2.9442899227142334,
185
+ "logits/rejected": -2.9481866359710693,
186
+ "logps/chosen": -346.63873291015625,
187
+ "logps/rejected": -406.31964111328125,
188
+ "loss": 0.2493,
189
+ "rewards/accuracies": 0.949999988079071,
190
+ "rewards/chosen": 1.2421057224273682,
191
+ "rewards/margins": 2.18147873878479,
192
+ "rewards/rejected": -0.9393728971481323,
193
+ "step": 110
194
+ },
195
+ {
196
+ "epoch": 0.15,
197
+ "learning_rate": 1.284796573875803e-07,
198
+ "logits/chosen": -2.94069242477417,
199
+ "logits/rejected": -2.9417574405670166,
200
+ "logps/chosen": -351.788330078125,
201
+ "logps/rejected": -379.61065673828125,
202
+ "loss": 0.2406,
203
+ "rewards/accuracies": 0.9125000238418579,
204
+ "rewards/chosen": 0.7772680521011353,
205
+ "rewards/margins": 1.8036502599716187,
206
+ "rewards/rejected": -1.0263820886611938,
207
+ "step": 120
208
+ },
209
+ {
210
+ "epoch": 0.17,
211
+ "learning_rate": 1.3918629550321198e-07,
212
+ "logits/chosen": -2.926699638366699,
213
+ "logits/rejected": -2.911668300628662,
214
+ "logps/chosen": -327.4112548828125,
215
+ "logps/rejected": -408.2745361328125,
216
+ "loss": 0.2073,
217
+ "rewards/accuracies": 0.887499988079071,
218
+ "rewards/chosen": 0.5646601915359497,
219
+ "rewards/margins": 2.2064461708068848,
220
+ "rewards/rejected": -1.6417862176895142,
221
+ "step": 130
222
+ },
223
+ {
224
+ "epoch": 0.18,
225
+ "learning_rate": 1.4989293361884367e-07,
226
+ "logits/chosen": -2.904219150543213,
227
+ "logits/rejected": -2.921232223510742,
228
+ "logps/chosen": -311.6190185546875,
229
+ "logps/rejected": -411.2701110839844,
230
+ "loss": 0.1967,
231
+ "rewards/accuracies": 0.887499988079071,
232
+ "rewards/chosen": 0.46902722120285034,
233
+ "rewards/margins": 2.7694640159606934,
234
+ "rewards/rejected": -2.3004367351531982,
235
+ "step": 140
236
+ },
237
+ {
238
+ "epoch": 0.19,
239
+ "learning_rate": 1.6059957173447535e-07,
240
+ "logits/chosen": -2.901981830596924,
241
+ "logits/rejected": -2.9112467765808105,
242
+ "logps/chosen": -301.6145324707031,
243
+ "logps/rejected": -391.1957092285156,
244
+ "loss": 0.1723,
245
+ "rewards/accuracies": 0.949999988079071,
246
+ "rewards/chosen": 0.218230202794075,
247
+ "rewards/margins": 3.2492637634277344,
248
+ "rewards/rejected": -3.031033992767334,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.21,
253
+ "learning_rate": 1.7130620985010704e-07,
254
+ "logits/chosen": -2.8996052742004395,
255
+ "logits/rejected": -2.8838694095611572,
256
+ "logps/chosen": -312.6499938964844,
257
+ "logps/rejected": -447.8002014160156,
258
+ "loss": 0.1554,
259
+ "rewards/accuracies": 0.949999988079071,
260
+ "rewards/chosen": 0.3209637701511383,
261
+ "rewards/margins": 4.501524925231934,
262
+ "rewards/rejected": -4.180561065673828,
263
+ "step": 160
264
+ },
265
+ {
266
+ "epoch": 0.22,
267
+ "learning_rate": 1.8201284796573874e-07,
268
+ "logits/chosen": -2.8928513526916504,
269
+ "logits/rejected": -2.9001543521881104,
270
+ "logps/chosen": -329.20953369140625,
271
+ "logps/rejected": -423.6446228027344,
272
+ "loss": 0.1566,
273
+ "rewards/accuracies": 0.8999999761581421,
274
+ "rewards/chosen": 0.1461164504289627,
275
+ "rewards/margins": 4.050145626068115,
276
+ "rewards/rejected": -3.904029130935669,
277
+ "step": 170
278
+ },
279
+ {
280
+ "epoch": 0.23,
281
+ "learning_rate": 1.9271948608137044e-07,
282
+ "logits/chosen": -2.8557610511779785,
283
+ "logits/rejected": -2.855731725692749,
284
+ "logps/chosen": -338.60076904296875,
285
+ "logps/rejected": -448.8922424316406,
286
+ "loss": 0.1421,
287
+ "rewards/accuracies": 0.925000011920929,
288
+ "rewards/chosen": 0.21355919539928436,
289
+ "rewards/margins": 4.191808223724365,
290
+ "rewards/rejected": -3.9782490730285645,
291
+ "step": 180
292
+ },
293
+ {
294
+ "epoch": 0.24,
295
+ "learning_rate": 2.0342612419700214e-07,
296
+ "logits/chosen": -2.8638434410095215,
297
+ "logits/rejected": -2.877293825149536,
298
+ "logps/chosen": -347.19573974609375,
299
+ "logps/rejected": -469.17755126953125,
300
+ "loss": 0.1381,
301
+ "rewards/accuracies": 1.0,
302
+ "rewards/chosen": -0.14256651699543,
303
+ "rewards/margins": 3.96684193611145,
304
+ "rewards/rejected": -4.109408378601074,
305
+ "step": 190
306
+ },
307
+ {
308
+ "epoch": 0.26,
309
+ "learning_rate": 2.1413276231263384e-07,
310
+ "logits/chosen": -2.829555034637451,
311
+ "logits/rejected": -2.85453462600708,
312
+ "logps/chosen": -364.0372009277344,
313
+ "logps/rejected": -442.7489318847656,
314
+ "loss": 0.1427,
315
+ "rewards/accuracies": 0.9624999761581421,
316
+ "rewards/chosen": 0.18970072269439697,
317
+ "rewards/margins": 5.029218673706055,
318
+ "rewards/rejected": -4.839517593383789,
319
+ "step": 200
320
+ },
321
+ {
322
+ "epoch": 0.26,
323
+ "eval_logits/chosen": -2.8512933254241943,
324
+ "eval_logits/rejected": -2.8302505016326904,
325
+ "eval_logps/chosen": -305.8147888183594,
326
+ "eval_logps/rejected": -387.1728210449219,
327
+ "eval_loss": 0.12157174944877625,
328
+ "eval_rewards/accuracies": 0.96875,
329
+ "eval_rewards/chosen": 0.28965064883232117,
330
+ "eval_rewards/margins": 4.73240327835083,
331
+ "eval_rewards/rejected": -4.442752361297607,
332
+ "eval_runtime": 38.702,
333
+ "eval_samples_per_second": 12.919,
334
+ "eval_steps_per_second": 0.413,
335
+ "step": 200
336
+ },
337
+ {
338
+ "epoch": 0.27,
339
+ "learning_rate": 2.248394004282655e-07,
340
+ "logits/chosen": -2.817666530609131,
341
+ "logits/rejected": -2.8465371131896973,
342
+ "logps/chosen": -325.3854675292969,
343
+ "logps/rejected": -439.5003356933594,
344
+ "loss": 0.1413,
345
+ "rewards/accuracies": 0.9375,
346
+ "rewards/chosen": 0.21560493111610413,
347
+ "rewards/margins": 4.514598369598389,
348
+ "rewards/rejected": -4.298993110656738,
349
+ "step": 210
350
+ },
351
+ {
352
+ "epoch": 0.28,
353
+ "learning_rate": 2.355460385438972e-07,
354
+ "logits/chosen": -2.7650692462921143,
355
+ "logits/rejected": -2.7801504135131836,
356
+ "logps/chosen": -326.321533203125,
357
+ "logps/rejected": -456.98663330078125,
358
+ "loss": 0.1332,
359
+ "rewards/accuracies": 0.9750000238418579,
360
+ "rewards/chosen": 0.027444612234830856,
361
+ "rewards/margins": 5.077801704406738,
362
+ "rewards/rejected": -5.050357818603516,
363
+ "step": 220
364
+ },
365
+ {
366
+ "epoch": 0.3,
367
+ "learning_rate": 2.462526766595289e-07,
368
+ "logits/chosen": -2.788020610809326,
369
+ "logits/rejected": -2.7895946502685547,
370
+ "logps/chosen": -324.4822998046875,
371
+ "logps/rejected": -439.76397705078125,
372
+ "loss": 0.1356,
373
+ "rewards/accuracies": 0.9375,
374
+ "rewards/chosen": 0.29427874088287354,
375
+ "rewards/margins": 5.166212558746338,
376
+ "rewards/rejected": -4.871933460235596,
377
+ "step": 230
378
+ },
379
+ {
380
+ "epoch": 0.31,
381
+ "learning_rate": 2.569593147751606e-07,
382
+ "logits/chosen": -2.6995949745178223,
383
+ "logits/rejected": -2.7345399856567383,
384
+ "logps/chosen": -356.4814758300781,
385
+ "logps/rejected": -490.60931396484375,
386
+ "loss": 0.1074,
387
+ "rewards/accuracies": 0.9750000238418579,
388
+ "rewards/chosen": -0.35535210371017456,
389
+ "rewards/margins": 6.1955766677856445,
390
+ "rewards/rejected": -6.550928592681885,
391
+ "step": 240
392
+ },
393
+ {
394
+ "epoch": 0.32,
395
+ "learning_rate": 2.676659528907923e-07,
396
+ "logits/chosen": -2.6892549991607666,
397
+ "logits/rejected": -2.694087505340576,
398
+ "logps/chosen": -305.6263122558594,
399
+ "logps/rejected": -387.88543701171875,
400
+ "loss": 0.0979,
401
+ "rewards/accuracies": 0.9750000238418579,
402
+ "rewards/chosen": -0.6652821898460388,
403
+ "rewards/margins": 4.929129600524902,
404
+ "rewards/rejected": -5.5944108963012695,
405
+ "step": 250
406
+ },
407
+ {
408
+ "epoch": 0.33,
409
+ "learning_rate": 2.7837259100642395e-07,
410
+ "logits/chosen": -2.73167085647583,
411
+ "logits/rejected": -2.7620654106140137,
412
+ "logps/chosen": -408.2175598144531,
413
+ "logps/rejected": -449.8201599121094,
414
+ "loss": 0.1298,
415
+ "rewards/accuracies": 0.9624999761581421,
416
+ "rewards/chosen": -0.9623678922653198,
417
+ "rewards/margins": 5.48039436340332,
418
+ "rewards/rejected": -6.4427618980407715,
419
+ "step": 260
420
+ },
421
+ {
422
+ "epoch": 0.35,
423
+ "learning_rate": 2.890792291220557e-07,
424
+ "logits/chosen": -2.7657806873321533,
425
+ "logits/rejected": -2.802060604095459,
426
+ "logps/chosen": -384.2090148925781,
427
+ "logps/rejected": -481.82696533203125,
428
+ "loss": 0.1181,
429
+ "rewards/accuracies": 0.9624999761581421,
430
+ "rewards/chosen": 0.16663847863674164,
431
+ "rewards/margins": 5.502591133117676,
432
+ "rewards/rejected": -5.335952281951904,
433
+ "step": 270
434
+ },
435
+ {
436
+ "epoch": 0.36,
437
+ "learning_rate": 2.9978586723768735e-07,
438
+ "logits/chosen": -2.673283815383911,
439
+ "logits/rejected": -2.707296848297119,
440
+ "logps/chosen": -312.5271911621094,
441
+ "logps/rejected": -411.64031982421875,
442
+ "loss": 0.0947,
443
+ "rewards/accuracies": 0.9375,
444
+ "rewards/chosen": -0.9499552845954895,
445
+ "rewards/margins": 4.852605819702148,
446
+ "rewards/rejected": -5.802561283111572,
447
+ "step": 280
448
+ },
449
+ {
450
+ "epoch": 0.37,
451
+ "learning_rate": 3.1049250535331905e-07,
452
+ "logits/chosen": -2.623725175857544,
453
+ "logits/rejected": -2.7073614597320557,
454
+ "logps/chosen": -391.2462158203125,
455
+ "logps/rejected": -474.2684631347656,
456
+ "loss": 0.1168,
457
+ "rewards/accuracies": 0.9375,
458
+ "rewards/chosen": -1.0791637897491455,
459
+ "rewards/margins": 7.065374851226807,
460
+ "rewards/rejected": -8.144537925720215,
461
+ "step": 290
462
+ },
463
+ {
464
+ "epoch": 0.39,
465
+ "learning_rate": 3.211991434689507e-07,
466
+ "logits/chosen": -2.6202073097229004,
467
+ "logits/rejected": -2.652608633041382,
468
+ "logps/chosen": -341.9140319824219,
469
+ "logps/rejected": -462.9012145996094,
470
+ "loss": 0.0944,
471
+ "rewards/accuracies": 0.987500011920929,
472
+ "rewards/chosen": -0.275757372379303,
473
+ "rewards/margins": 5.93372106552124,
474
+ "rewards/rejected": -6.20947790145874,
475
+ "step": 300
476
+ },
477
+ {
478
+ "epoch": 0.39,
479
+ "eval_logits/chosen": -2.6932637691497803,
480
+ "eval_logits/rejected": -2.6872053146362305,
481
+ "eval_logps/chosen": -311.619873046875,
482
+ "eval_logps/rejected": -409.2980041503906,
483
+ "eval_loss": 0.11095032095909119,
484
+ "eval_rewards/accuracies": 0.90625,
485
+ "eval_rewards/chosen": -0.29085665941238403,
486
+ "eval_rewards/margins": 6.364411354064941,
487
+ "eval_rewards/rejected": -6.65526819229126,
488
+ "eval_runtime": 38.7504,
489
+ "eval_samples_per_second": 12.903,
490
+ "eval_steps_per_second": 0.413,
491
+ "step": 300
492
+ },
493
+ {
494
+ "epoch": 0.4,
495
+ "learning_rate": 3.3190578158458244e-07,
496
+ "logits/chosen": -2.6386542320251465,
497
+ "logits/rejected": -2.7159385681152344,
498
+ "logps/chosen": -368.5979919433594,
499
+ "logps/rejected": -466.84783935546875,
500
+ "loss": 0.131,
501
+ "rewards/accuracies": 0.9750000238418579,
502
+ "rewards/chosen": -0.8093490600585938,
503
+ "rewards/margins": 7.050684452056885,
504
+ "rewards/rejected": -7.8600335121154785,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.41,
509
+ "learning_rate": 3.426124197002141e-07,
510
+ "logits/chosen": -2.571882486343384,
511
+ "logits/rejected": -2.6551308631896973,
512
+ "logps/chosen": -361.48394775390625,
513
+ "logps/rejected": -489.70989990234375,
514
+ "loss": 0.0905,
515
+ "rewards/accuracies": 0.987500011920929,
516
+ "rewards/chosen": -0.7510203123092651,
517
+ "rewards/margins": 8.015697479248047,
518
+ "rewards/rejected": -8.766717910766602,
519
+ "step": 320
520
+ },
521
+ {
522
+ "epoch": 0.42,
523
+ "learning_rate": 3.533190578158458e-07,
524
+ "logits/chosen": -2.5930895805358887,
525
+ "logits/rejected": -2.6723227500915527,
526
+ "logps/chosen": -384.87664794921875,
527
+ "logps/rejected": -509.010986328125,
528
+ "loss": 0.1232,
529
+ "rewards/accuracies": 0.9750000238418579,
530
+ "rewards/chosen": -0.44466814398765564,
531
+ "rewards/margins": 6.863368988037109,
532
+ "rewards/rejected": -7.308036804199219,
533
+ "step": 330
534
+ },
535
+ {
536
+ "epoch": 0.44,
537
+ "learning_rate": 3.640256959314775e-07,
538
+ "logits/chosen": -2.5658717155456543,
539
+ "logits/rejected": -2.62716007232666,
540
+ "logps/chosen": -304.2865295410156,
541
+ "logps/rejected": -435.2959899902344,
542
+ "loss": 0.0874,
543
+ "rewards/accuracies": 0.9750000238418579,
544
+ "rewards/chosen": -0.5686666369438171,
545
+ "rewards/margins": 6.669247627258301,
546
+ "rewards/rejected": -7.237914085388184,
547
+ "step": 340
548
+ },
549
+ {
550
+ "epoch": 0.45,
551
+ "learning_rate": 3.747323340471092e-07,
552
+ "logits/chosen": -2.584165096282959,
553
+ "logits/rejected": -2.70393967628479,
554
+ "logps/chosen": -364.13262939453125,
555
+ "logps/rejected": -477.5604553222656,
556
+ "loss": 0.1015,
557
+ "rewards/accuracies": 0.9750000238418579,
558
+ "rewards/chosen": -0.5823951363563538,
559
+ "rewards/margins": 7.484101295471191,
560
+ "rewards/rejected": -8.066494941711426,
561
+ "step": 350
562
+ },
563
+ {
564
+ "epoch": 0.46,
565
+ "learning_rate": 3.854389721627409e-07,
566
+ "logits/chosen": -2.5895907878875732,
567
+ "logits/rejected": -2.646876573562622,
568
+ "logps/chosen": -355.0018005371094,
569
+ "logps/rejected": -442.65948486328125,
570
+ "loss": 0.0896,
571
+ "rewards/accuracies": 0.987500011920929,
572
+ "rewards/chosen": -0.8058759570121765,
573
+ "rewards/margins": 8.065896987915039,
574
+ "rewards/rejected": -8.871771812438965,
575
+ "step": 360
576
+ },
577
+ {
578
+ "epoch": 0.48,
579
+ "learning_rate": 3.961456102783726e-07,
580
+ "logits/chosen": -2.615499973297119,
581
+ "logits/rejected": -2.6612184047698975,
582
+ "logps/chosen": -308.342041015625,
583
+ "logps/rejected": -432.08319091796875,
584
+ "loss": 0.0821,
585
+ "rewards/accuracies": 0.9375,
586
+ "rewards/chosen": -0.5296161770820618,
587
+ "rewards/margins": 7.243483066558838,
588
+ "rewards/rejected": -7.773098945617676,
589
+ "step": 370
590
+ },
591
+ {
592
+ "epoch": 0.49,
593
+ "learning_rate": 4.068522483940043e-07,
594
+ "logits/chosen": -2.6956448554992676,
595
+ "logits/rejected": -2.7061805725097656,
596
+ "logps/chosen": -346.4541931152344,
597
+ "logps/rejected": -481.19989013671875,
598
+ "loss": 0.1104,
599
+ "rewards/accuracies": 0.949999988079071,
600
+ "rewards/chosen": -0.6448992490768433,
601
+ "rewards/margins": 7.711002349853516,
602
+ "rewards/rejected": -8.355902671813965,
603
+ "step": 380
604
+ },
605
+ {
606
+ "epoch": 0.5,
607
+ "learning_rate": 4.175588865096359e-07,
608
+ "logits/chosen": -2.6077234745025635,
609
+ "logits/rejected": -2.6278557777404785,
610
+ "logps/chosen": -353.8262634277344,
611
+ "logps/rejected": -447.3440856933594,
612
+ "loss": 0.0958,
613
+ "rewards/accuracies": 0.987500011920929,
614
+ "rewards/chosen": -0.5978514552116394,
615
+ "rewards/margins": 7.370479583740234,
616
+ "rewards/rejected": -7.968331336975098,
617
+ "step": 390
618
+ },
619
+ {
620
+ "epoch": 0.51,
621
+ "learning_rate": 4.282655246252677e-07,
622
+ "logits/chosen": -2.603065252304077,
623
+ "logits/rejected": -2.675497531890869,
624
+ "logps/chosen": -355.2611999511719,
625
+ "logps/rejected": -411.75732421875,
626
+ "loss": 0.1039,
627
+ "rewards/accuracies": 0.9624999761581421,
628
+ "rewards/chosen": -0.2750840187072754,
629
+ "rewards/margins": 7.0222907066345215,
630
+ "rewards/rejected": -7.2973737716674805,
631
+ "step": 400
632
+ },
633
+ {
634
+ "epoch": 0.51,
635
+ "eval_logits/chosen": -2.6301259994506836,
636
+ "eval_logits/rejected": -2.6286230087280273,
637
+ "eval_logps/chosen": -315.64288330078125,
638
+ "eval_logps/rejected": -421.1318359375,
639
+ "eval_loss": 0.07803654670715332,
640
+ "eval_rewards/accuracies": 0.984375,
641
+ "eval_rewards/chosen": -0.6931607723236084,
642
+ "eval_rewards/margins": 7.145491600036621,
643
+ "eval_rewards/rejected": -7.83865213394165,
644
+ "eval_runtime": 38.7861,
645
+ "eval_samples_per_second": 12.891,
646
+ "eval_steps_per_second": 0.413,
647
+ "step": 400
648
+ },
649
+ {
650
+ "epoch": 0.53,
651
+ "learning_rate": 4.389721627408993e-07,
652
+ "logits/chosen": -2.5576305389404297,
653
+ "logits/rejected": -2.602813243865967,
654
+ "logps/chosen": -361.10797119140625,
655
+ "logps/rejected": -468.213134765625,
656
+ "loss": 0.1042,
657
+ "rewards/accuracies": 0.9375,
658
+ "rewards/chosen": -0.9836179614067078,
659
+ "rewards/margins": 6.6080522537231445,
660
+ "rewards/rejected": -7.591670989990234,
661
+ "step": 410
662
+ },
663
+ {
664
+ "epoch": 0.54,
665
+ "learning_rate": 4.49678800856531e-07,
666
+ "logits/chosen": -2.521080732345581,
667
+ "logits/rejected": -2.5644307136535645,
668
+ "logps/chosen": -325.7511901855469,
669
+ "logps/rejected": -407.7994384765625,
670
+ "loss": 0.1057,
671
+ "rewards/accuracies": 0.9375,
672
+ "rewards/chosen": -0.45771685242652893,
673
+ "rewards/margins": 7.0977678298950195,
674
+ "rewards/rejected": -7.555483818054199,
675
+ "step": 420
676
+ },
677
+ {
678
+ "epoch": 0.55,
679
+ "learning_rate": 4.603854389721627e-07,
680
+ "logits/chosen": -2.5245959758758545,
681
+ "logits/rejected": -2.559770107269287,
682
+ "logps/chosen": -340.15087890625,
683
+ "logps/rejected": -485.052490234375,
684
+ "loss": 0.084,
685
+ "rewards/accuracies": 0.9375,
686
+ "rewards/chosen": -0.994246780872345,
687
+ "rewards/margins": 7.357940673828125,
688
+ "rewards/rejected": -8.35218620300293,
689
+ "step": 430
690
+ },
691
+ {
692
+ "epoch": 0.57,
693
+ "learning_rate": 4.710920770877944e-07,
694
+ "logits/chosen": -2.401303768157959,
695
+ "logits/rejected": -2.548125743865967,
696
+ "logps/chosen": -358.9648742675781,
697
+ "logps/rejected": -462.87890625,
698
+ "loss": 0.1172,
699
+ "rewards/accuracies": 0.949999988079071,
700
+ "rewards/chosen": -1.293526530265808,
701
+ "rewards/margins": 7.095101833343506,
702
+ "rewards/rejected": -8.388628005981445,
703
+ "step": 440
704
+ },
705
+ {
706
+ "epoch": 0.58,
707
+ "learning_rate": 4.817987152034261e-07,
708
+ "logits/chosen": -2.4654183387756348,
709
+ "logits/rejected": -2.560048818588257,
710
+ "logps/chosen": -291.2701721191406,
711
+ "logps/rejected": -362.7830505371094,
712
+ "loss": 0.0959,
713
+ "rewards/accuracies": 0.925000011920929,
714
+ "rewards/chosen": -1.4040035009384155,
715
+ "rewards/margins": 5.726696968078613,
716
+ "rewards/rejected": -7.130700588226318,
717
+ "step": 450
718
+ },
719
+ {
720
+ "epoch": 0.59,
721
+ "learning_rate": 4.925053533190578e-07,
722
+ "logits/chosen": -2.489262104034424,
723
+ "logits/rejected": -2.5457305908203125,
724
+ "logps/chosen": -356.9480285644531,
725
+ "logps/rejected": -435.594970703125,
726
+ "loss": 0.1132,
727
+ "rewards/accuracies": 0.9750000238418579,
728
+ "rewards/chosen": -1.3584586381912231,
729
+ "rewards/margins": 6.3141889572143555,
730
+ "rewards/rejected": -7.672647953033447,
731
+ "step": 460
732
+ },
733
+ {
734
+ "epoch": 0.6,
735
+ "learning_rate": 4.996429421566293e-07,
736
+ "logits/chosen": -2.5229034423828125,
737
+ "logits/rejected": -2.565725326538086,
738
+ "logps/chosen": -326.0317077636719,
739
+ "logps/rejected": -448.7723083496094,
740
+ "loss": 0.1051,
741
+ "rewards/accuracies": 0.9375,
742
+ "rewards/chosen": -1.5931789875030518,
743
+ "rewards/margins": 7.049294471740723,
744
+ "rewards/rejected": -8.642473220825195,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 0.62,
749
+ "learning_rate": 4.98452749345394e-07,
750
+ "logits/chosen": -2.5022709369659424,
751
+ "logits/rejected": -2.555453062057495,
752
+ "logps/chosen": -361.46563720703125,
753
+ "logps/rejected": -498.7660217285156,
754
+ "loss": 0.1386,
755
+ "rewards/accuracies": 0.9375,
756
+ "rewards/chosen": -1.8986074924468994,
757
+ "rewards/margins": 6.340726375579834,
758
+ "rewards/rejected": -8.239333152770996,
759
+ "step": 480
760
+ },
761
+ {
762
+ "epoch": 0.63,
763
+ "learning_rate": 4.972625565341585e-07,
764
+ "logits/chosen": -2.4549243450164795,
765
+ "logits/rejected": -2.5045337677001953,
766
+ "logps/chosen": -320.4005432128906,
767
+ "logps/rejected": -437.33612060546875,
768
+ "loss": 0.0958,
769
+ "rewards/accuracies": 0.9750000238418579,
770
+ "rewards/chosen": -1.8746875524520874,
771
+ "rewards/margins": 6.6805620193481445,
772
+ "rewards/rejected": -8.555249214172363,
773
+ "step": 490
774
+ },
775
+ {
776
+ "epoch": 0.64,
777
+ "learning_rate": 4.960723637229232e-07,
778
+ "logits/chosen": -2.448908567428589,
779
+ "logits/rejected": -2.458101272583008,
780
+ "logps/chosen": -355.0153503417969,
781
+ "logps/rejected": -504.32330322265625,
782
+ "loss": 0.0762,
783
+ "rewards/accuracies": 0.987500011920929,
784
+ "rewards/chosen": -1.4554470777511597,
785
+ "rewards/margins": 8.177068710327148,
786
+ "rewards/rejected": -9.632516860961914,
787
+ "step": 500
788
+ },
789
+ {
790
+ "epoch": 0.64,
791
+ "eval_logits/chosen": -2.504735231399536,
792
+ "eval_logits/rejected": -2.5092720985412598,
793
+ "eval_logps/chosen": -323.16851806640625,
794
+ "eval_logps/rejected": -433.9158020019531,
795
+ "eval_loss": 0.08059512078762054,
796
+ "eval_rewards/accuracies": 0.953125,
797
+ "eval_rewards/chosen": -1.4457205533981323,
798
+ "eval_rewards/margins": 7.671328544616699,
799
+ "eval_rewards/rejected": -9.117048263549805,
800
+ "eval_runtime": 38.7512,
801
+ "eval_samples_per_second": 12.903,
802
+ "eval_steps_per_second": 0.413,
803
+ "step": 500
804
+ },
805
+ {
806
+ "epoch": 0.66,
807
+ "learning_rate": 4.948821709116876e-07,
808
+ "logits/chosen": -2.376183032989502,
809
+ "logits/rejected": -2.455298900604248,
810
+ "logps/chosen": -485.12603759765625,
811
+ "logps/rejected": -551.7554931640625,
812
+ "loss": 0.1056,
813
+ "rewards/accuracies": 0.949999988079071,
814
+ "rewards/chosen": -1.5901005268096924,
815
+ "rewards/margins": 8.771623611450195,
816
+ "rewards/rejected": -10.361722946166992,
817
+ "step": 510
818
+ },
819
+ {
820
+ "epoch": 0.67,
821
+ "learning_rate": 4.936919781004522e-07,
822
+ "logits/chosen": -2.470151424407959,
823
+ "logits/rejected": -2.5587172508239746,
824
+ "logps/chosen": -377.3062438964844,
825
+ "logps/rejected": -507.6141052246094,
826
+ "loss": 0.0955,
827
+ "rewards/accuracies": 1.0,
828
+ "rewards/chosen": -1.233569860458374,
829
+ "rewards/margins": 8.123286247253418,
830
+ "rewards/rejected": -9.356857299804688,
831
+ "step": 520
832
+ },
833
+ {
834
+ "epoch": 0.68,
835
+ "learning_rate": 4.925017852892168e-07,
836
+ "logits/chosen": -2.5230183601379395,
837
+ "logits/rejected": -2.603940725326538,
838
+ "logps/chosen": -362.92333984375,
839
+ "logps/rejected": -481.7613220214844,
840
+ "loss": 0.0683,
841
+ "rewards/accuracies": 0.9624999761581421,
842
+ "rewards/chosen": -1.9068357944488525,
843
+ "rewards/margins": 7.921334743499756,
844
+ "rewards/rejected": -9.828168869018555,
845
+ "step": 530
846
+ },
847
+ {
848
+ "epoch": 0.69,
849
+ "learning_rate": 4.913115924779814e-07,
850
+ "logits/chosen": -2.438596248626709,
851
+ "logits/rejected": -2.562830924987793,
852
+ "logps/chosen": -386.5306701660156,
853
+ "logps/rejected": -499.86444091796875,
854
+ "loss": 0.0677,
855
+ "rewards/accuracies": 1.0,
856
+ "rewards/chosen": -1.2040196657180786,
857
+ "rewards/margins": 8.880427360534668,
858
+ "rewards/rejected": -10.084446907043457,
859
+ "step": 540
860
+ },
861
+ {
862
+ "epoch": 0.71,
863
+ "learning_rate": 4.90121399666746e-07,
864
+ "logits/chosen": -2.4589312076568604,
865
+ "logits/rejected": -2.524345874786377,
866
+ "logps/chosen": -332.1251220703125,
867
+ "logps/rejected": -433.63787841796875,
868
+ "loss": 0.1309,
869
+ "rewards/accuracies": 0.9375,
870
+ "rewards/chosen": -1.1124681234359741,
871
+ "rewards/margins": 7.2715253829956055,
872
+ "rewards/rejected": -8.383993148803711,
873
+ "step": 550
874
+ },
875
+ {
876
+ "epoch": 0.72,
877
+ "learning_rate": 4.889312068555106e-07,
878
+ "logits/chosen": -2.58622407913208,
879
+ "logits/rejected": -2.60271954536438,
880
+ "logps/chosen": -271.59014892578125,
881
+ "logps/rejected": -417.29833984375,
882
+ "loss": 0.1275,
883
+ "rewards/accuracies": 0.9125000238418579,
884
+ "rewards/chosen": -1.1244533061981201,
885
+ "rewards/margins": 6.384497165679932,
886
+ "rewards/rejected": -7.508950710296631,
887
+ "step": 560
888
+ },
889
+ {
890
+ "epoch": 0.73,
891
+ "learning_rate": 4.877410140442752e-07,
892
+ "logits/chosen": -2.4364261627197266,
893
+ "logits/rejected": -2.4858317375183105,
894
+ "logps/chosen": -350.3711853027344,
895
+ "logps/rejected": -449.4051818847656,
896
+ "loss": 0.0982,
897
+ "rewards/accuracies": 0.9375,
898
+ "rewards/chosen": -1.0685746669769287,
899
+ "rewards/margins": 7.424908638000488,
900
+ "rewards/rejected": -8.49348258972168,
901
+ "step": 570
902
+ },
903
+ {
904
+ "epoch": 0.75,
905
+ "learning_rate": 4.865508212330398e-07,
906
+ "logits/chosen": -2.441240072250366,
907
+ "logits/rejected": -2.527020215988159,
908
+ "logps/chosen": -366.98150634765625,
909
+ "logps/rejected": -525.4156494140625,
910
+ "loss": 0.0867,
911
+ "rewards/accuracies": 1.0,
912
+ "rewards/chosen": -0.9126319885253906,
913
+ "rewards/margins": 9.27831745147705,
914
+ "rewards/rejected": -10.190949440002441,
915
+ "step": 580
916
+ },
917
+ {
918
+ "epoch": 0.76,
919
+ "learning_rate": 4.853606284218044e-07,
920
+ "logits/chosen": -2.3090662956237793,
921
+ "logits/rejected": -2.3255538940429688,
922
+ "logps/chosen": -371.3923034667969,
923
+ "logps/rejected": -526.1776123046875,
924
+ "loss": 0.1095,
925
+ "rewards/accuracies": 0.9375,
926
+ "rewards/chosen": -2.190389633178711,
927
+ "rewards/margins": 9.64104175567627,
928
+ "rewards/rejected": -11.831432342529297,
929
+ "step": 590
930
+ },
931
+ {
932
+ "epoch": 0.77,
933
+ "learning_rate": 4.841704356105689e-07,
934
+ "logits/chosen": -2.334197521209717,
935
+ "logits/rejected": -2.423285484313965,
936
+ "logps/chosen": -369.0033264160156,
937
+ "logps/rejected": -506.4518127441406,
938
+ "loss": 0.0959,
939
+ "rewards/accuracies": 0.8999999761581421,
940
+ "rewards/chosen": -1.7217298746109009,
941
+ "rewards/margins": 8.07056999206543,
942
+ "rewards/rejected": -9.7923002243042,
943
+ "step": 600
944
+ },
945
+ {
946
+ "epoch": 0.77,
947
+ "eval_logits/chosen": -2.467820405960083,
948
+ "eval_logits/rejected": -2.440288782119751,
949
+ "eval_logps/chosen": -318.6737060546875,
950
+ "eval_logps/rejected": -428.9326171875,
951
+ "eval_loss": 0.07413332909345627,
952
+ "eval_rewards/accuracies": 0.984375,
953
+ "eval_rewards/chosen": -0.9962404370307922,
954
+ "eval_rewards/margins": 7.622487545013428,
955
+ "eval_rewards/rejected": -8.618727684020996,
956
+ "eval_runtime": 38.7439,
957
+ "eval_samples_per_second": 12.905,
958
+ "eval_steps_per_second": 0.413,
959
+ "step": 600
960
+ },
961
+ {
962
+ "epoch": 0.78,
963
+ "learning_rate": 4.829802427993334e-07,
964
+ "logits/chosen": -2.3268227577209473,
965
+ "logits/rejected": -2.3746628761291504,
966
+ "logps/chosen": -404.0111083984375,
967
+ "logps/rejected": -492.5167541503906,
968
+ "loss": 0.0859,
969
+ "rewards/accuracies": 0.9750000238418579,
970
+ "rewards/chosen": -0.8345616459846497,
971
+ "rewards/margins": 8.07560920715332,
972
+ "rewards/rejected": -8.910171508789062,
973
+ "step": 610
974
+ },
975
+ {
976
+ "epoch": 0.8,
977
+ "learning_rate": 4.81790049988098e-07,
978
+ "logits/chosen": -2.415301561355591,
979
+ "logits/rejected": -2.4919333457946777,
980
+ "logps/chosen": -388.5622253417969,
981
+ "logps/rejected": -531.6051025390625,
982
+ "loss": 0.0631,
983
+ "rewards/accuracies": 0.987500011920929,
984
+ "rewards/chosen": -1.3940558433532715,
985
+ "rewards/margins": 7.842892646789551,
986
+ "rewards/rejected": -9.23694896697998,
987
+ "step": 620
988
+ },
989
+ {
990
+ "epoch": 0.81,
991
+ "learning_rate": 4.805998571768626e-07,
992
+ "logits/chosen": -2.310925245285034,
993
+ "logits/rejected": -2.42446231842041,
994
+ "logps/chosen": -342.0956115722656,
995
+ "logps/rejected": -516.9351196289062,
996
+ "loss": 0.1142,
997
+ "rewards/accuracies": 0.925000011920929,
998
+ "rewards/chosen": -1.3800750970840454,
999
+ "rewards/margins": 8.762998580932617,
1000
+ "rewards/rejected": -10.143075942993164,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 0.82,
1005
+ "learning_rate": 4.794096643656272e-07,
1006
+ "logits/chosen": -2.280027151107788,
1007
+ "logits/rejected": -2.31703782081604,
1008
+ "logps/chosen": -409.70379638671875,
1009
+ "logps/rejected": -529.5406494140625,
1010
+ "loss": 0.0723,
1011
+ "rewards/accuracies": 0.987500011920929,
1012
+ "rewards/chosen": -1.3363559246063232,
1013
+ "rewards/margins": 10.320574760437012,
1014
+ "rewards/rejected": -11.656930923461914,
1015
+ "step": 640
1016
+ },
1017
+ {
1018
+ "epoch": 0.84,
1019
+ "learning_rate": 4.782194715543918e-07,
1020
+ "logits/chosen": -2.276779890060425,
1021
+ "logits/rejected": -2.343441963195801,
1022
+ "logps/chosen": -348.50531005859375,
1023
+ "logps/rejected": -521.2000122070312,
1024
+ "loss": 0.0902,
1025
+ "rewards/accuracies": 0.9624999761581421,
1026
+ "rewards/chosen": -1.5391457080841064,
1027
+ "rewards/margins": 9.673690795898438,
1028
+ "rewards/rejected": -11.212836265563965,
1029
+ "step": 650
1030
+ },
1031
+ {
1032
+ "epoch": 0.85,
1033
+ "learning_rate": 4.770292787431564e-07,
1034
+ "logits/chosen": -2.3436553478240967,
1035
+ "logits/rejected": -2.3175175189971924,
1036
+ "logps/chosen": -386.4251403808594,
1037
+ "logps/rejected": -530.1958618164062,
1038
+ "loss": 0.0787,
1039
+ "rewards/accuracies": 0.9624999761581421,
1040
+ "rewards/chosen": -2.0843443870544434,
1041
+ "rewards/margins": 9.766562461853027,
1042
+ "rewards/rejected": -11.850906372070312,
1043
+ "step": 660
1044
+ },
1045
+ {
1046
+ "epoch": 0.86,
1047
+ "learning_rate": 4.7583908593192097e-07,
1048
+ "logits/chosen": -2.2515616416931152,
1049
+ "logits/rejected": -2.2762718200683594,
1050
+ "logps/chosen": -396.88751220703125,
1051
+ "logps/rejected": -541.3609619140625,
1052
+ "loss": 0.0841,
1053
+ "rewards/accuracies": 0.9750000238418579,
1054
+ "rewards/chosen": -2.3059911727905273,
1055
+ "rewards/margins": 9.442736625671387,
1056
+ "rewards/rejected": -11.748727798461914,
1057
+ "step": 670
1058
+ },
1059
+ {
1060
+ "epoch": 0.87,
1061
+ "learning_rate": 4.746488931206855e-07,
1062
+ "logits/chosen": -2.304055690765381,
1063
+ "logits/rejected": -2.3429813385009766,
1064
+ "logps/chosen": -353.8645935058594,
1065
+ "logps/rejected": -520.8157348632812,
1066
+ "loss": 0.0793,
1067
+ "rewards/accuracies": 0.9750000238418579,
1068
+ "rewards/chosen": -2.6302597522735596,
1069
+ "rewards/margins": 10.208868980407715,
1070
+ "rewards/rejected": -12.839129447937012,
1071
+ "step": 680
1072
+ },
1073
+ {
1074
+ "epoch": 0.89,
1075
+ "learning_rate": 4.734587003094501e-07,
1076
+ "logits/chosen": -2.326953887939453,
1077
+ "logits/rejected": -2.4166040420532227,
1078
+ "logps/chosen": -377.34356689453125,
1079
+ "logps/rejected": -494.58782958984375,
1080
+ "loss": 0.1041,
1081
+ "rewards/accuracies": 0.9375,
1082
+ "rewards/chosen": -2.1931746006011963,
1083
+ "rewards/margins": 9.344148635864258,
1084
+ "rewards/rejected": -11.537323951721191,
1085
+ "step": 690
1086
+ },
1087
+ {
1088
+ "epoch": 0.9,
1089
+ "learning_rate": 4.722685074982147e-07,
1090
+ "logits/chosen": -2.3279807567596436,
1091
+ "logits/rejected": -2.38569974899292,
1092
+ "logps/chosen": -320.0870056152344,
1093
+ "logps/rejected": -498.17706298828125,
1094
+ "loss": 0.0814,
1095
+ "rewards/accuracies": 1.0,
1096
+ "rewards/chosen": -1.5494163036346436,
1097
+ "rewards/margins": 10.105340957641602,
1098
+ "rewards/rejected": -11.654756546020508,
1099
+ "step": 700
1100
+ },
1101
+ {
1102
+ "epoch": 0.9,
1103
+ "eval_logits/chosen": -2.498293399810791,
1104
+ "eval_logits/rejected": -2.4712274074554443,
1105
+ "eval_logps/chosen": -323.183837890625,
1106
+ "eval_logps/rejected": -441.4797058105469,
1107
+ "eval_loss": 0.055789634585380554,
1108
+ "eval_rewards/accuracies": 1.0,
1109
+ "eval_rewards/chosen": -1.4472523927688599,
1110
+ "eval_rewards/margins": 8.426188468933105,
1111
+ "eval_rewards/rejected": -9.87343978881836,
1112
+ "eval_runtime": 38.7758,
1113
+ "eval_samples_per_second": 12.895,
1114
+ "eval_steps_per_second": 0.413,
1115
+ "step": 700
1116
+ },
1117
+ {
1118
+ "epoch": 0.91,
1119
+ "learning_rate": 4.710783146869793e-07,
1120
+ "logits/chosen": -2.3991808891296387,
1121
+ "logits/rejected": -2.4218363761901855,
1122
+ "logps/chosen": -314.1746520996094,
1123
+ "logps/rejected": -519.7462768554688,
1124
+ "loss": 0.0819,
1125
+ "rewards/accuracies": 0.949999988079071,
1126
+ "rewards/chosen": -1.8268877267837524,
1127
+ "rewards/margins": 10.331625938415527,
1128
+ "rewards/rejected": -12.158514022827148,
1129
+ "step": 710
1130
+ },
1131
+ {
1132
+ "epoch": 0.93,
1133
+ "learning_rate": 4.698881218757438e-07,
1134
+ "logits/chosen": -2.363438606262207,
1135
+ "logits/rejected": -2.3997836112976074,
1136
+ "logps/chosen": -305.2399597167969,
1137
+ "logps/rejected": -481.65582275390625,
1138
+ "loss": 0.0786,
1139
+ "rewards/accuracies": 0.9375,
1140
+ "rewards/chosen": -1.5438249111175537,
1141
+ "rewards/margins": 8.62690544128418,
1142
+ "rewards/rejected": -10.17072868347168,
1143
+ "step": 720
1144
+ },
1145
+ {
1146
+ "epoch": 0.94,
1147
+ "learning_rate": 4.6869792906450845e-07,
1148
+ "logits/chosen": -2.3670878410339355,
1149
+ "logits/rejected": -2.4363322257995605,
1150
+ "logps/chosen": -342.06622314453125,
1151
+ "logps/rejected": -468.9805603027344,
1152
+ "loss": 0.0719,
1153
+ "rewards/accuracies": 0.9375,
1154
+ "rewards/chosen": -1.5114291906356812,
1155
+ "rewards/margins": 8.608851432800293,
1156
+ "rewards/rejected": -10.120282173156738,
1157
+ "step": 730
1158
+ },
1159
+ {
1160
+ "epoch": 0.95,
1161
+ "learning_rate": 4.67507736253273e-07,
1162
+ "logits/chosen": -2.2785589694976807,
1163
+ "logits/rejected": -2.3089492321014404,
1164
+ "logps/chosen": -407.75048828125,
1165
+ "logps/rejected": -557.4127197265625,
1166
+ "loss": 0.0903,
1167
+ "rewards/accuracies": 0.9750000238418579,
1168
+ "rewards/chosen": -1.8981235027313232,
1169
+ "rewards/margins": 10.704629898071289,
1170
+ "rewards/rejected": -12.602753639221191,
1171
+ "step": 740
1172
+ },
1173
+ {
1174
+ "epoch": 0.96,
1175
+ "learning_rate": 4.6631754344203763e-07,
1176
+ "logits/chosen": -2.3073747158050537,
1177
+ "logits/rejected": -2.383291244506836,
1178
+ "logps/chosen": -357.61492919921875,
1179
+ "logps/rejected": -522.1990356445312,
1180
+ "loss": 0.1043,
1181
+ "rewards/accuracies": 0.925000011920929,
1182
+ "rewards/chosen": -2.5501503944396973,
1183
+ "rewards/margins": 8.703204154968262,
1184
+ "rewards/rejected": -11.253355026245117,
1185
+ "step": 750
1186
+ },
1187
+ {
1188
+ "epoch": 0.98,
1189
+ "learning_rate": 4.6512735063080217e-07,
1190
+ "logits/chosen": -2.492027521133423,
1191
+ "logits/rejected": -2.534536361694336,
1192
+ "logps/chosen": -430.7220764160156,
1193
+ "logps/rejected": -559.482666015625,
1194
+ "loss": 0.0971,
1195
+ "rewards/accuracies": 0.925000011920929,
1196
+ "rewards/chosen": -1.385508418083191,
1197
+ "rewards/margins": 9.584807395935059,
1198
+ "rewards/rejected": -10.970315933227539,
1199
+ "step": 760
1200
+ },
1201
+ {
1202
+ "epoch": 0.99,
1203
+ "learning_rate": 4.6393715781956676e-07,
1204
+ "logits/chosen": -2.3780312538146973,
1205
+ "logits/rejected": -2.37473201751709,
1206
+ "logps/chosen": -326.2506103515625,
1207
+ "logps/rejected": -496.7969665527344,
1208
+ "loss": 0.0865,
1209
+ "rewards/accuracies": 0.9750000238418579,
1210
+ "rewards/chosen": -1.0338951349258423,
1211
+ "rewards/margins": 9.33600902557373,
1212
+ "rewards/rejected": -10.369903564453125,
1213
+ "step": 770
1214
+ },
1215
+ {
1216
+ "epoch": 1.0,
1217
+ "learning_rate": 4.6274696500833135e-07,
1218
+ "logits/chosen": -2.4264612197875977,
1219
+ "logits/rejected": -2.45288348197937,
1220
+ "logps/chosen": -368.6007385253906,
1221
+ "logps/rejected": -534.6527709960938,
1222
+ "loss": 0.0645,
1223
+ "rewards/accuracies": 0.949999988079071,
1224
+ "rewards/chosen": -0.9812146425247192,
1225
+ "rewards/margins": 9.224861145019531,
1226
+ "rewards/rejected": -10.206075668334961,
1227
+ "step": 780
1228
+ },
1229
+ {
1230
+ "epoch": 1.02,
1231
+ "learning_rate": 4.6155677219709594e-07,
1232
+ "logits/chosen": -2.383737087249756,
1233
+ "logits/rejected": -2.4557416439056396,
1234
+ "logps/chosen": -401.9710388183594,
1235
+ "logps/rejected": -555.4797973632812,
1236
+ "loss": 0.0216,
1237
+ "rewards/accuracies": 1.0,
1238
+ "rewards/chosen": -1.5994548797607422,
1239
+ "rewards/margins": 12.170892715454102,
1240
+ "rewards/rejected": -13.770347595214844,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 1.03,
1245
+ "learning_rate": 4.603665793858605e-07,
1246
+ "logits/chosen": -2.4060428142547607,
1247
+ "logits/rejected": -2.4426844120025635,
1248
+ "logps/chosen": -366.8950500488281,
1249
+ "logps/rejected": -558.5940551757812,
1250
+ "loss": 0.0164,
1251
+ "rewards/accuracies": 0.987500011920929,
1252
+ "rewards/chosen": -1.232177972793579,
1253
+ "rewards/margins": 12.297248840332031,
1254
+ "rewards/rejected": -13.529426574707031,
1255
+ "step": 800
1256
+ },
1257
+ {
1258
+ "epoch": 1.03,
1259
+ "eval_logits/chosen": -2.492385149002075,
1260
+ "eval_logits/rejected": -2.468630313873291,
1261
+ "eval_logps/chosen": -324.3902587890625,
1262
+ "eval_logps/rejected": -453.6976623535156,
1263
+ "eval_loss": 0.06341304630041122,
1264
+ "eval_rewards/accuracies": 0.984375,
1265
+ "eval_rewards/chosen": -1.5678963661193848,
1266
+ "eval_rewards/margins": 9.527338981628418,
1267
+ "eval_rewards/rejected": -11.095235824584961,
1268
+ "eval_runtime": 38.5408,
1269
+ "eval_samples_per_second": 12.973,
1270
+ "eval_steps_per_second": 0.415,
1271
+ "step": 800
1272
+ },
1273
+ {
1274
+ "epoch": 1.04,
1275
+ "learning_rate": 4.5917638657462507e-07,
1276
+ "logits/chosen": -2.33616042137146,
1277
+ "logits/rejected": -2.3640098571777344,
1278
+ "logps/chosen": -373.46905517578125,
1279
+ "logps/rejected": -514.2394409179688,
1280
+ "loss": 0.0209,
1281
+ "rewards/accuracies": 1.0,
1282
+ "rewards/chosen": -1.4009530544281006,
1283
+ "rewards/margins": 10.871899604797363,
1284
+ "rewards/rejected": -12.272851943969727,
1285
+ "step": 810
1286
+ },
1287
+ {
1288
+ "epoch": 1.05,
1289
+ "learning_rate": 4.5798619376338966e-07,
1290
+ "logits/chosen": -2.4044508934020996,
1291
+ "logits/rejected": -2.420480966567993,
1292
+ "logps/chosen": -347.3623962402344,
1293
+ "logps/rejected": -556.5758056640625,
1294
+ "loss": 0.0227,
1295
+ "rewards/accuracies": 1.0,
1296
+ "rewards/chosen": -1.055593490600586,
1297
+ "rewards/margins": 11.897196769714355,
1298
+ "rewards/rejected": -12.952789306640625,
1299
+ "step": 820
1300
+ },
1301
+ {
1302
+ "epoch": 1.07,
1303
+ "learning_rate": 4.567960009521542e-07,
1304
+ "logits/chosen": -2.359771490097046,
1305
+ "logits/rejected": -2.4249939918518066,
1306
+ "logps/chosen": -370.0980529785156,
1307
+ "logps/rejected": -567.7897338867188,
1308
+ "loss": 0.0131,
1309
+ "rewards/accuracies": 0.987500011920929,
1310
+ "rewards/chosen": -2.0004942417144775,
1311
+ "rewards/margins": 12.14315414428711,
1312
+ "rewards/rejected": -14.143648147583008,
1313
+ "step": 830
1314
+ },
1315
+ {
1316
+ "epoch": 1.08,
1317
+ "learning_rate": 4.5560580814091884e-07,
1318
+ "logits/chosen": -2.3424394130706787,
1319
+ "logits/rejected": -2.342963457107544,
1320
+ "logps/chosen": -385.192626953125,
1321
+ "logps/rejected": -510.11749267578125,
1322
+ "loss": 0.0098,
1323
+ "rewards/accuracies": 1.0,
1324
+ "rewards/chosen": -2.1284375190734863,
1325
+ "rewards/margins": 11.841325759887695,
1326
+ "rewards/rejected": -13.969762802124023,
1327
+ "step": 840
1328
+ },
1329
+ {
1330
+ "epoch": 1.09,
1331
+ "learning_rate": 4.5441561532968337e-07,
1332
+ "logits/chosen": -2.3772830963134766,
1333
+ "logits/rejected": -2.414663791656494,
1334
+ "logps/chosen": -375.8727722167969,
1335
+ "logps/rejected": -580.7897338867188,
1336
+ "loss": 0.0093,
1337
+ "rewards/accuracies": 1.0,
1338
+ "rewards/chosen": -3.0805163383483887,
1339
+ "rewards/margins": 12.892430305480957,
1340
+ "rewards/rejected": -15.972944259643555,
1341
+ "step": 850
1342
+ },
1343
+ {
1344
+ "epoch": 1.11,
1345
+ "learning_rate": 4.5322542251844796e-07,
1346
+ "logits/chosen": -2.3776564598083496,
1347
+ "logits/rejected": -2.409484386444092,
1348
+ "logps/chosen": -331.92431640625,
1349
+ "logps/rejected": -500.89739990234375,
1350
+ "loss": 0.0143,
1351
+ "rewards/accuracies": 1.0,
1352
+ "rewards/chosen": -2.3345754146575928,
1353
+ "rewards/margins": 11.422739028930664,
1354
+ "rewards/rejected": -13.757314682006836,
1355
+ "step": 860
1356
+ },
1357
+ {
1358
+ "epoch": 1.12,
1359
+ "learning_rate": 4.5203522970721255e-07,
1360
+ "logits/chosen": -2.3700737953186035,
1361
+ "logits/rejected": -2.397162914276123,
1362
+ "logps/chosen": -340.53094482421875,
1363
+ "logps/rejected": -506.8477478027344,
1364
+ "loss": 0.0146,
1365
+ "rewards/accuracies": 0.987500011920929,
1366
+ "rewards/chosen": -0.4118890166282654,
1367
+ "rewards/margins": 12.948440551757812,
1368
+ "rewards/rejected": -13.360328674316406,
1369
+ "step": 870
1370
+ },
1371
+ {
1372
+ "epoch": 1.13,
1373
+ "learning_rate": 4.5084503689597714e-07,
1374
+ "logits/chosen": -2.41035795211792,
1375
+ "logits/rejected": -2.4271979331970215,
1376
+ "logps/chosen": -329.87933349609375,
1377
+ "logps/rejected": -537.0123291015625,
1378
+ "loss": 0.0135,
1379
+ "rewards/accuracies": 1.0,
1380
+ "rewards/chosen": -1.4996788501739502,
1381
+ "rewards/margins": 11.888396263122559,
1382
+ "rewards/rejected": -13.388073921203613,
1383
+ "step": 880
1384
+ },
1385
+ {
1386
+ "epoch": 1.14,
1387
+ "learning_rate": 4.496548440847417e-07,
1388
+ "logits/chosen": -2.401721477508545,
1389
+ "logits/rejected": -2.447669506072998,
1390
+ "logps/chosen": -366.2709045410156,
1391
+ "logps/rejected": -519.80224609375,
1392
+ "loss": 0.0139,
1393
+ "rewards/accuracies": 1.0,
1394
+ "rewards/chosen": -1.3627954721450806,
1395
+ "rewards/margins": 12.356982231140137,
1396
+ "rewards/rejected": -13.71977710723877,
1397
+ "step": 890
1398
+ },
1399
+ {
1400
+ "epoch": 1.16,
1401
+ "learning_rate": 4.484646512735063e-07,
1402
+ "logits/chosen": -2.4436986446380615,
1403
+ "logits/rejected": -2.5449397563934326,
1404
+ "logps/chosen": -384.5765686035156,
1405
+ "logps/rejected": -555.2340087890625,
1406
+ "loss": 0.0172,
1407
+ "rewards/accuracies": 0.987500011920929,
1408
+ "rewards/chosen": -0.7870714068412781,
1409
+ "rewards/margins": 11.903576850891113,
1410
+ "rewards/rejected": -12.690648078918457,
1411
+ "step": 900
1412
+ },
1413
+ {
1414
+ "epoch": 1.16,
1415
+ "eval_logits/chosen": -2.5417840480804443,
1416
+ "eval_logits/rejected": -2.5121681690216064,
1417
+ "eval_logps/chosen": -326.2882080078125,
1418
+ "eval_logps/rejected": -464.37054443359375,
1419
+ "eval_loss": 0.06124735251069069,
1420
+ "eval_rewards/accuracies": 0.984375,
1421
+ "eval_rewards/chosen": -1.7576879262924194,
1422
+ "eval_rewards/margins": 10.404834747314453,
1423
+ "eval_rewards/rejected": -12.162521362304688,
1424
+ "eval_runtime": 38.6563,
1425
+ "eval_samples_per_second": 12.934,
1426
+ "eval_steps_per_second": 0.414,
1427
+ "step": 900
1428
+ },
1429
+ {
1430
+ "epoch": 1.17,
1431
+ "learning_rate": 4.4727445846227086e-07,
1432
+ "logits/chosen": -2.438345432281494,
1433
+ "logits/rejected": -2.4737024307250977,
1434
+ "logps/chosen": -369.38397216796875,
1435
+ "logps/rejected": -519.6220703125,
1436
+ "loss": 0.011,
1437
+ "rewards/accuracies": 0.987500011920929,
1438
+ "rewards/chosen": -1.9280792474746704,
1439
+ "rewards/margins": 12.675816535949707,
1440
+ "rewards/rejected": -14.60389518737793,
1441
+ "step": 910
1442
+ },
1443
+ {
1444
+ "epoch": 1.18,
1445
+ "learning_rate": 4.4608426565103545e-07,
1446
+ "logits/chosen": -2.450275182723999,
1447
+ "logits/rejected": -2.462500810623169,
1448
+ "logps/chosen": -343.4928283691406,
1449
+ "logps/rejected": -515.9462280273438,
1450
+ "loss": 0.0221,
1451
+ "rewards/accuracies": 0.987500011920929,
1452
+ "rewards/chosen": -2.3710033893585205,
1453
+ "rewards/margins": 13.644805908203125,
1454
+ "rewards/rejected": -16.015810012817383,
1455
+ "step": 920
1456
+ },
1457
+ {
1458
+ "epoch": 1.2,
1459
+ "learning_rate": 4.4489407283980004e-07,
1460
+ "logits/chosen": -2.423760414123535,
1461
+ "logits/rejected": -2.385545253753662,
1462
+ "logps/chosen": -370.15985107421875,
1463
+ "logps/rejected": -515.8549194335938,
1464
+ "loss": 0.0097,
1465
+ "rewards/accuracies": 1.0,
1466
+ "rewards/chosen": -2.4730286598205566,
1467
+ "rewards/margins": 12.967801094055176,
1468
+ "rewards/rejected": -15.440831184387207,
1469
+ "step": 930
1470
+ },
1471
+ {
1472
+ "epoch": 1.21,
1473
+ "learning_rate": 4.437038800285646e-07,
1474
+ "logits/chosen": -2.399423360824585,
1475
+ "logits/rejected": -2.418363094329834,
1476
+ "logps/chosen": -384.27984619140625,
1477
+ "logps/rejected": -549.5245971679688,
1478
+ "loss": 0.0156,
1479
+ "rewards/accuracies": 0.9750000238418579,
1480
+ "rewards/chosen": -3.72330904006958,
1481
+ "rewards/margins": 12.818862915039062,
1482
+ "rewards/rejected": -16.542171478271484,
1483
+ "step": 940
1484
+ },
1485
+ {
1486
+ "epoch": 1.22,
1487
+ "learning_rate": 4.4251368721732916e-07,
1488
+ "logits/chosen": -2.5278353691101074,
1489
+ "logits/rejected": -2.5364837646484375,
1490
+ "logps/chosen": -329.5386657714844,
1491
+ "logps/rejected": -519.6696166992188,
1492
+ "loss": 0.0223,
1493
+ "rewards/accuracies": 1.0,
1494
+ "rewards/chosen": -1.4351348876953125,
1495
+ "rewards/margins": 11.446606636047363,
1496
+ "rewards/rejected": -12.881741523742676,
1497
+ "step": 950
1498
+ },
1499
+ {
1500
+ "epoch": 1.23,
1501
+ "learning_rate": 4.413234944060938e-07,
1502
+ "logits/chosen": -2.527299165725708,
1503
+ "logits/rejected": -2.5759024620056152,
1504
+ "logps/chosen": -403.71063232421875,
1505
+ "logps/rejected": -589.4862670898438,
1506
+ "loss": 0.0147,
1507
+ "rewards/accuracies": 1.0,
1508
+ "rewards/chosen": -0.19414202868938446,
1509
+ "rewards/margins": 12.035063743591309,
1510
+ "rewards/rejected": -12.229207038879395,
1511
+ "step": 960
1512
+ },
1513
+ {
1514
+ "epoch": 1.25,
1515
+ "learning_rate": 4.4013330159485834e-07,
1516
+ "logits/chosen": -2.4672398567199707,
1517
+ "logits/rejected": -2.4999210834503174,
1518
+ "logps/chosen": -334.6300048828125,
1519
+ "logps/rejected": -534.4932250976562,
1520
+ "loss": 0.0255,
1521
+ "rewards/accuracies": 0.987500011920929,
1522
+ "rewards/chosen": -1.7743580341339111,
1523
+ "rewards/margins": 12.416712760925293,
1524
+ "rewards/rejected": -14.191072463989258,
1525
+ "step": 970
1526
+ },
1527
+ {
1528
+ "epoch": 1.26,
1529
+ "learning_rate": 4.3894310878362293e-07,
1530
+ "logits/chosen": -2.447817087173462,
1531
+ "logits/rejected": -2.5005249977111816,
1532
+ "logps/chosen": -338.5157470703125,
1533
+ "logps/rejected": -544.09423828125,
1534
+ "loss": 0.0229,
1535
+ "rewards/accuracies": 0.987500011920929,
1536
+ "rewards/chosen": -1.8672630786895752,
1537
+ "rewards/margins": 12.040175437927246,
1538
+ "rewards/rejected": -13.907438278198242,
1539
+ "step": 980
1540
+ },
1541
+ {
1542
+ "epoch": 1.27,
1543
+ "learning_rate": 4.377529159723875e-07,
1544
+ "logits/chosen": -2.4685416221618652,
1545
+ "logits/rejected": -2.49491548538208,
1546
+ "logps/chosen": -366.1611022949219,
1547
+ "logps/rejected": -518.9093627929688,
1548
+ "loss": 0.0079,
1549
+ "rewards/accuracies": 1.0,
1550
+ "rewards/chosen": -1.9218127727508545,
1551
+ "rewards/margins": 11.573265075683594,
1552
+ "rewards/rejected": -13.495076179504395,
1553
+ "step": 990
1554
+ },
1555
+ {
1556
+ "epoch": 1.29,
1557
+ "learning_rate": 4.365627231611521e-07,
1558
+ "logits/chosen": -2.470853805541992,
1559
+ "logits/rejected": -2.497331380844116,
1560
+ "logps/chosen": -405.1899719238281,
1561
+ "logps/rejected": -591.7445068359375,
1562
+ "loss": 0.0057,
1563
+ "rewards/accuracies": 1.0,
1564
+ "rewards/chosen": -1.9404414892196655,
1565
+ "rewards/margins": 13.470489501953125,
1566
+ "rewards/rejected": -15.410931587219238,
1567
+ "step": 1000
1568
+ },
1569
+ {
1570
+ "epoch": 1.29,
1571
+ "eval_logits/chosen": -2.5345709323883057,
1572
+ "eval_logits/rejected": -2.507004737854004,
1573
+ "eval_logps/chosen": -336.10919189453125,
1574
+ "eval_logps/rejected": -476.1966552734375,
1575
+ "eval_loss": 0.0556936077773571,
1576
+ "eval_rewards/accuracies": 0.984375,
1577
+ "eval_rewards/chosen": -2.7397918701171875,
1578
+ "eval_rewards/margins": 10.605344772338867,
1579
+ "eval_rewards/rejected": -13.345136642456055,
1580
+ "eval_runtime": 38.7118,
1581
+ "eval_samples_per_second": 12.916,
1582
+ "eval_steps_per_second": 0.413,
1583
+ "step": 1000
1584
+ },
1585
+ {
1586
+ "epoch": 1.3,
1587
+ "learning_rate": 4.3537253034991665e-07,
1588
+ "logits/chosen": -2.441990852355957,
1589
+ "logits/rejected": -2.4507715702056885,
1590
+ "logps/chosen": -329.62542724609375,
1591
+ "logps/rejected": -574.9547729492188,
1592
+ "loss": 0.0214,
1593
+ "rewards/accuracies": 0.9750000238418579,
1594
+ "rewards/chosen": -3.120880603790283,
1595
+ "rewards/margins": 13.88032054901123,
1596
+ "rewards/rejected": -17.001201629638672,
1597
+ "step": 1010
1598
+ },
1599
+ {
1600
+ "epoch": 1.31,
1601
+ "learning_rate": 4.3418233753868124e-07,
1602
+ "logits/chosen": -2.3679394721984863,
1603
+ "logits/rejected": -2.410681962966919,
1604
+ "logps/chosen": -341.8808898925781,
1605
+ "logps/rejected": -532.3084106445312,
1606
+ "loss": 0.0303,
1607
+ "rewards/accuracies": 0.987500011920929,
1608
+ "rewards/chosen": -2.787487030029297,
1609
+ "rewards/margins": 11.951956748962402,
1610
+ "rewards/rejected": -14.739442825317383,
1611
+ "step": 1020
1612
+ },
1613
+ {
1614
+ "epoch": 1.32,
1615
+ "learning_rate": 4.3299214472744583e-07,
1616
+ "logits/chosen": -2.4356143474578857,
1617
+ "logits/rejected": -2.484920024871826,
1618
+ "logps/chosen": -378.17376708984375,
1619
+ "logps/rejected": -561.7147216796875,
1620
+ "loss": 0.0212,
1621
+ "rewards/accuracies": 1.0,
1622
+ "rewards/chosen": -2.4539060592651367,
1623
+ "rewards/margins": 12.572771072387695,
1624
+ "rewards/rejected": -15.026677131652832,
1625
+ "step": 1030
1626
+ },
1627
+ {
1628
+ "epoch": 1.34,
1629
+ "learning_rate": 4.3180195191621036e-07,
1630
+ "logits/chosen": -2.4165291786193848,
1631
+ "logits/rejected": -2.3931941986083984,
1632
+ "logps/chosen": -377.8540344238281,
1633
+ "logps/rejected": -555.7592163085938,
1634
+ "loss": 0.0254,
1635
+ "rewards/accuracies": 1.0,
1636
+ "rewards/chosen": -1.2512832880020142,
1637
+ "rewards/margins": 12.33320426940918,
1638
+ "rewards/rejected": -13.58448600769043,
1639
+ "step": 1040
1640
+ },
1641
+ {
1642
+ "epoch": 1.35,
1643
+ "learning_rate": 4.30611759104975e-07,
1644
+ "logits/chosen": -2.3533992767333984,
1645
+ "logits/rejected": -2.3296687602996826,
1646
+ "logps/chosen": -418.5027770996094,
1647
+ "logps/rejected": -600.8396606445312,
1648
+ "loss": 0.0201,
1649
+ "rewards/accuracies": 1.0,
1650
+ "rewards/chosen": -2.496593952178955,
1651
+ "rewards/margins": 13.320207595825195,
1652
+ "rewards/rejected": -15.816801071166992,
1653
+ "step": 1050
1654
+ },
1655
+ {
1656
+ "epoch": 1.36,
1657
+ "learning_rate": 4.2942156629373954e-07,
1658
+ "logits/chosen": -2.246854782104492,
1659
+ "logits/rejected": -2.3130173683166504,
1660
+ "logps/chosen": -396.1013488769531,
1661
+ "logps/rejected": -553.8746337890625,
1662
+ "loss": 0.0209,
1663
+ "rewards/accuracies": 1.0,
1664
+ "rewards/chosen": -2.86772084236145,
1665
+ "rewards/margins": 13.22656536102295,
1666
+ "rewards/rejected": -16.094287872314453,
1667
+ "step": 1060
1668
+ },
1669
+ {
1670
+ "epoch": 1.38,
1671
+ "learning_rate": 4.2823137348250413e-07,
1672
+ "logits/chosen": -2.1099252700805664,
1673
+ "logits/rejected": -2.1625306606292725,
1674
+ "logps/chosen": -439.188232421875,
1675
+ "logps/rejected": -567.4981689453125,
1676
+ "loss": 0.0195,
1677
+ "rewards/accuracies": 0.987500011920929,
1678
+ "rewards/chosen": -3.4562058448791504,
1679
+ "rewards/margins": 11.824674606323242,
1680
+ "rewards/rejected": -15.280881881713867,
1681
+ "step": 1070
1682
+ },
1683
+ {
1684
+ "epoch": 1.39,
1685
+ "learning_rate": 4.270411806712687e-07,
1686
+ "logits/chosen": -2.182868480682373,
1687
+ "logits/rejected": -2.140045642852783,
1688
+ "logps/chosen": -414.1625061035156,
1689
+ "logps/rejected": -590.7791748046875,
1690
+ "loss": 0.0203,
1691
+ "rewards/accuracies": 1.0,
1692
+ "rewards/chosen": -2.0633182525634766,
1693
+ "rewards/margins": 13.505340576171875,
1694
+ "rewards/rejected": -15.568659782409668,
1695
+ "step": 1080
1696
+ },
1697
+ {
1698
+ "epoch": 1.4,
1699
+ "learning_rate": 4.258509878600333e-07,
1700
+ "logits/chosen": -2.301701068878174,
1701
+ "logits/rejected": -2.3724331855773926,
1702
+ "logps/chosen": -318.6136779785156,
1703
+ "logps/rejected": -549.11572265625,
1704
+ "loss": 0.0162,
1705
+ "rewards/accuracies": 1.0,
1706
+ "rewards/chosen": -1.7360296249389648,
1707
+ "rewards/margins": 12.463074684143066,
1708
+ "rewards/rejected": -14.199106216430664,
1709
+ "step": 1090
1710
+ },
1711
+ {
1712
+ "epoch": 1.41,
1713
+ "learning_rate": 4.2466079504879785e-07,
1714
+ "logits/chosen": -2.3375637531280518,
1715
+ "logits/rejected": -2.371568202972412,
1716
+ "logps/chosen": -355.43218994140625,
1717
+ "logps/rejected": -497.6923828125,
1718
+ "loss": 0.0296,
1719
+ "rewards/accuracies": 0.9750000238418579,
1720
+ "rewards/chosen": -1.8382488489151,
1721
+ "rewards/margins": 11.133204460144043,
1722
+ "rewards/rejected": -12.971455574035645,
1723
+ "step": 1100
1724
+ },
1725
+ {
1726
+ "epoch": 1.41,
1727
+ "eval_logits/chosen": -2.422253131866455,
1728
+ "eval_logits/rejected": -2.3856472969055176,
1729
+ "eval_logps/chosen": -327.49688720703125,
1730
+ "eval_logps/rejected": -458.99761962890625,
1731
+ "eval_loss": 0.0712868794798851,
1732
+ "eval_rewards/accuracies": 0.953125,
1733
+ "eval_rewards/chosen": -1.8785579204559326,
1734
+ "eval_rewards/margins": 9.746674537658691,
1735
+ "eval_rewards/rejected": -11.625232696533203,
1736
+ "eval_runtime": 38.5688,
1737
+ "eval_samples_per_second": 12.964,
1738
+ "eval_steps_per_second": 0.415,
1739
+ "step": 1100
1740
+ },
1741
+ {
1742
+ "epoch": 1.43,
1743
+ "learning_rate": 4.234706022375625e-07,
1744
+ "logits/chosen": -2.335549831390381,
1745
+ "logits/rejected": -2.3633885383605957,
1746
+ "logps/chosen": -334.0445251464844,
1747
+ "logps/rejected": -532.0367431640625,
1748
+ "loss": 0.0173,
1749
+ "rewards/accuracies": 0.987500011920929,
1750
+ "rewards/chosen": -1.1568909883499146,
1751
+ "rewards/margins": 12.151830673217773,
1752
+ "rewards/rejected": -13.308721542358398,
1753
+ "step": 1110
1754
+ },
1755
+ {
1756
+ "epoch": 1.44,
1757
+ "learning_rate": 4.2228040942632703e-07,
1758
+ "logits/chosen": -2.2730376720428467,
1759
+ "logits/rejected": -2.279794931411743,
1760
+ "logps/chosen": -372.47711181640625,
1761
+ "logps/rejected": -565.377197265625,
1762
+ "loss": 0.0135,
1763
+ "rewards/accuracies": 0.987500011920929,
1764
+ "rewards/chosen": -1.948188066482544,
1765
+ "rewards/margins": 12.926470756530762,
1766
+ "rewards/rejected": -14.874661445617676,
1767
+ "step": 1120
1768
+ },
1769
+ {
1770
+ "epoch": 1.45,
1771
+ "learning_rate": 4.210902166150916e-07,
1772
+ "logits/chosen": -2.1850171089172363,
1773
+ "logits/rejected": -2.2554237842559814,
1774
+ "logps/chosen": -330.89398193359375,
1775
+ "logps/rejected": -572.4408569335938,
1776
+ "loss": 0.0152,
1777
+ "rewards/accuracies": 0.987500011920929,
1778
+ "rewards/chosen": -2.2409050464630127,
1779
+ "rewards/margins": 15.152783393859863,
1780
+ "rewards/rejected": -17.393688201904297,
1781
+ "step": 1130
1782
+ },
1783
+ {
1784
+ "epoch": 1.47,
1785
+ "learning_rate": 4.199000238038562e-07,
1786
+ "logits/chosen": -2.2348155975341797,
1787
+ "logits/rejected": -2.276552200317383,
1788
+ "logps/chosen": -391.0440673828125,
1789
+ "logps/rejected": -562.8758544921875,
1790
+ "loss": 0.0083,
1791
+ "rewards/accuracies": 0.987500011920929,
1792
+ "rewards/chosen": -2.518620014190674,
1793
+ "rewards/margins": 13.422780990600586,
1794
+ "rewards/rejected": -15.941401481628418,
1795
+ "step": 1140
1796
+ },
1797
+ {
1798
+ "epoch": 1.48,
1799
+ "learning_rate": 4.187098309926208e-07,
1800
+ "logits/chosen": -2.234314441680908,
1801
+ "logits/rejected": -2.273665428161621,
1802
+ "logps/chosen": -379.77752685546875,
1803
+ "logps/rejected": -609.7650146484375,
1804
+ "loss": 0.0167,
1805
+ "rewards/accuracies": 0.987500011920929,
1806
+ "rewards/chosen": -2.2126364707946777,
1807
+ "rewards/margins": 14.33509635925293,
1808
+ "rewards/rejected": -16.547733306884766,
1809
+ "step": 1150
1810
+ },
1811
+ {
1812
+ "epoch": 1.49,
1813
+ "learning_rate": 4.1751963818138534e-07,
1814
+ "logits/chosen": -2.2460713386535645,
1815
+ "logits/rejected": -2.28529953956604,
1816
+ "logps/chosen": -391.7981872558594,
1817
+ "logps/rejected": -584.82373046875,
1818
+ "loss": 0.0106,
1819
+ "rewards/accuracies": 1.0,
1820
+ "rewards/chosen": -2.583667278289795,
1821
+ "rewards/margins": 13.928072929382324,
1822
+ "rewards/rejected": -16.511741638183594,
1823
+ "step": 1160
1824
+ },
1825
+ {
1826
+ "epoch": 1.5,
1827
+ "learning_rate": 4.1632944537015e-07,
1828
+ "logits/chosen": -2.312187671661377,
1829
+ "logits/rejected": -2.313152313232422,
1830
+ "logps/chosen": -332.22418212890625,
1831
+ "logps/rejected": -550.9510498046875,
1832
+ "loss": 0.0151,
1833
+ "rewards/accuracies": 0.987500011920929,
1834
+ "rewards/chosen": -2.8060202598571777,
1835
+ "rewards/margins": 13.428415298461914,
1836
+ "rewards/rejected": -16.23443603515625,
1837
+ "step": 1170
1838
+ },
1839
+ {
1840
+ "epoch": 1.52,
1841
+ "learning_rate": 4.151392525589145e-07,
1842
+ "logits/chosen": -2.269207715988159,
1843
+ "logits/rejected": -2.2718236446380615,
1844
+ "logps/chosen": -332.3182067871094,
1845
+ "logps/rejected": -509.44085693359375,
1846
+ "loss": 0.0267,
1847
+ "rewards/accuracies": 0.987500011920929,
1848
+ "rewards/chosen": -3.1180636882781982,
1849
+ "rewards/margins": 12.261663436889648,
1850
+ "rewards/rejected": -15.379727363586426,
1851
+ "step": 1180
1852
+ },
1853
+ {
1854
+ "epoch": 1.53,
1855
+ "learning_rate": 4.139490597476791e-07,
1856
+ "logits/chosen": -2.2478084564208984,
1857
+ "logits/rejected": -2.3000128269195557,
1858
+ "logps/chosen": -337.1382141113281,
1859
+ "logps/rejected": -537.2418212890625,
1860
+ "loss": 0.0108,
1861
+ "rewards/accuracies": 1.0,
1862
+ "rewards/chosen": -2.009748935699463,
1863
+ "rewards/margins": 12.527368545532227,
1864
+ "rewards/rejected": -14.537118911743164,
1865
+ "step": 1190
1866
+ },
1867
+ {
1868
+ "epoch": 1.54,
1869
+ "learning_rate": 4.127588669364437e-07,
1870
+ "logits/chosen": -2.313680648803711,
1871
+ "logits/rejected": -2.327012538909912,
1872
+ "logps/chosen": -291.6064758300781,
1873
+ "logps/rejected": -546.3372802734375,
1874
+ "loss": 0.0148,
1875
+ "rewards/accuracies": 0.987500011920929,
1876
+ "rewards/chosen": -3.4880402088165283,
1877
+ "rewards/margins": 14.52784252166748,
1878
+ "rewards/rejected": -18.01588249206543,
1879
+ "step": 1200
1880
+ },
1881
+ {
1882
+ "epoch": 1.54,
1883
+ "eval_logits/chosen": -2.3877577781677246,
1884
+ "eval_logits/rejected": -2.35882568359375,
1885
+ "eval_logps/chosen": -347.202880859375,
1886
+ "eval_logps/rejected": -496.7171325683594,
1887
+ "eval_loss": 0.07778895646333694,
1888
+ "eval_rewards/accuracies": 0.953125,
1889
+ "eval_rewards/chosen": -3.8491578102111816,
1890
+ "eval_rewards/margins": 11.548023223876953,
1891
+ "eval_rewards/rejected": -15.397181510925293,
1892
+ "eval_runtime": 38.6215,
1893
+ "eval_samples_per_second": 12.946,
1894
+ "eval_steps_per_second": 0.414,
1895
+ "step": 1200
1896
+ },
1897
+ {
1898
+ "epoch": 1.56,
1899
+ "learning_rate": 4.115686741252083e-07,
1900
+ "logits/chosen": -2.292132616043091,
1901
+ "logits/rejected": -2.347907781600952,
1902
+ "logps/chosen": -362.74481201171875,
1903
+ "logps/rejected": -558.0933837890625,
1904
+ "loss": 0.0163,
1905
+ "rewards/accuracies": 1.0,
1906
+ "rewards/chosen": -2.724060297012329,
1907
+ "rewards/margins": 13.57036304473877,
1908
+ "rewards/rejected": -16.294422149658203,
1909
+ "step": 1210
1910
+ },
1911
+ {
1912
+ "epoch": 1.57,
1913
+ "learning_rate": 4.103784813139728e-07,
1914
+ "logits/chosen": -2.3167264461517334,
1915
+ "logits/rejected": -2.3449079990386963,
1916
+ "logps/chosen": -369.4256591796875,
1917
+ "logps/rejected": -566.0360107421875,
1918
+ "loss": 0.0155,
1919
+ "rewards/accuracies": 0.9750000238418579,
1920
+ "rewards/chosen": -3.4625415802001953,
1921
+ "rewards/margins": 13.401751518249512,
1922
+ "rewards/rejected": -16.86429214477539,
1923
+ "step": 1220
1924
+ },
1925
+ {
1926
+ "epoch": 1.58,
1927
+ "learning_rate": 4.091882885027374e-07,
1928
+ "logits/chosen": -2.3674325942993164,
1929
+ "logits/rejected": -2.455508232116699,
1930
+ "logps/chosen": -381.26068115234375,
1931
+ "logps/rejected": -550.90625,
1932
+ "loss": 0.0244,
1933
+ "rewards/accuracies": 0.987500011920929,
1934
+ "rewards/chosen": -3.13775897026062,
1935
+ "rewards/margins": 13.795980453491211,
1936
+ "rewards/rejected": -16.933740615844727,
1937
+ "step": 1230
1938
+ },
1939
+ {
1940
+ "epoch": 1.59,
1941
+ "learning_rate": 4.07998095691502e-07,
1942
+ "logits/chosen": -2.3083391189575195,
1943
+ "logits/rejected": -2.330939769744873,
1944
+ "logps/chosen": -362.44171142578125,
1945
+ "logps/rejected": -523.51171875,
1946
+ "loss": 0.0124,
1947
+ "rewards/accuracies": 1.0,
1948
+ "rewards/chosen": -3.1269755363464355,
1949
+ "rewards/margins": 12.574740409851074,
1950
+ "rewards/rejected": -15.701716423034668,
1951
+ "step": 1240
1952
+ },
1953
+ {
1954
+ "epoch": 1.61,
1955
+ "learning_rate": 4.0680790288026654e-07,
1956
+ "logits/chosen": -2.3918001651763916,
1957
+ "logits/rejected": -2.426542282104492,
1958
+ "logps/chosen": -420.2566833496094,
1959
+ "logps/rejected": -605.3551025390625,
1960
+ "loss": 0.0202,
1961
+ "rewards/accuracies": 0.987500011920929,
1962
+ "rewards/chosen": -2.9122402667999268,
1963
+ "rewards/margins": 13.67309856414795,
1964
+ "rewards/rejected": -16.585338592529297,
1965
+ "step": 1250
1966
+ },
1967
+ {
1968
+ "epoch": 1.62,
1969
+ "learning_rate": 4.056177100690312e-07,
1970
+ "logits/chosen": -2.2674708366394043,
1971
+ "logits/rejected": -2.2906508445739746,
1972
+ "logps/chosen": -390.3266296386719,
1973
+ "logps/rejected": -587.2613525390625,
1974
+ "loss": 0.011,
1975
+ "rewards/accuracies": 1.0,
1976
+ "rewards/chosen": -4.047337532043457,
1977
+ "rewards/margins": 13.966493606567383,
1978
+ "rewards/rejected": -18.013832092285156,
1979
+ "step": 1260
1980
+ },
1981
+ {
1982
+ "epoch": 1.63,
1983
+ "learning_rate": 4.044275172577957e-07,
1984
+ "logits/chosen": -2.256685733795166,
1985
+ "logits/rejected": -2.283980131149292,
1986
+ "logps/chosen": -307.6758728027344,
1987
+ "logps/rejected": -536.929931640625,
1988
+ "loss": 0.0251,
1989
+ "rewards/accuracies": 0.987500011920929,
1990
+ "rewards/chosen": -2.8640975952148438,
1991
+ "rewards/margins": 14.663250923156738,
1992
+ "rewards/rejected": -17.527347564697266,
1993
+ "step": 1270
1994
+ },
1995
+ {
1996
+ "epoch": 1.65,
1997
+ "learning_rate": 4.0323732444656036e-07,
1998
+ "logits/chosen": -2.2302117347717285,
1999
+ "logits/rejected": -2.319187641143799,
2000
+ "logps/chosen": -377.6014099121094,
2001
+ "logps/rejected": -592.4954223632812,
2002
+ "loss": 0.0208,
2003
+ "rewards/accuracies": 0.987500011920929,
2004
+ "rewards/chosen": -2.1300606727600098,
2005
+ "rewards/margins": 14.494562149047852,
2006
+ "rewards/rejected": -16.624622344970703,
2007
+ "step": 1280
2008
+ },
2009
+ {
2010
+ "epoch": 1.66,
2011
+ "learning_rate": 4.020471316353249e-07,
2012
+ "logits/chosen": -2.3077661991119385,
2013
+ "logits/rejected": -2.34450364112854,
2014
+ "logps/chosen": -384.89007568359375,
2015
+ "logps/rejected": -577.9298095703125,
2016
+ "loss": 0.0126,
2017
+ "rewards/accuracies": 0.987500011920929,
2018
+ "rewards/chosen": -1.4994373321533203,
2019
+ "rewards/margins": 12.733844757080078,
2020
+ "rewards/rejected": -14.233282089233398,
2021
+ "step": 1290
2022
+ },
2023
+ {
2024
+ "epoch": 1.67,
2025
+ "learning_rate": 4.008569388240895e-07,
2026
+ "logits/chosen": -2.230447292327881,
2027
+ "logits/rejected": -2.283294677734375,
2028
+ "logps/chosen": -346.1694641113281,
2029
+ "logps/rejected": -534.3992919921875,
2030
+ "loss": 0.019,
2031
+ "rewards/accuracies": 1.0,
2032
+ "rewards/chosen": -1.3882415294647217,
2033
+ "rewards/margins": 14.5983247756958,
2034
+ "rewards/rejected": -15.986566543579102,
2035
+ "step": 1300
2036
+ },
2037
+ {
2038
+ "epoch": 1.67,
2039
+ "eval_logits/chosen": -2.403440475463867,
2040
+ "eval_logits/rejected": -2.378675699234009,
2041
+ "eval_logps/chosen": -332.9962463378906,
2042
+ "eval_logps/rejected": -477.9118957519531,
2043
+ "eval_loss": 0.07047431915998459,
2044
+ "eval_rewards/accuracies": 0.9375,
2045
+ "eval_rewards/chosen": -2.4284939765930176,
2046
+ "eval_rewards/margins": 11.088165283203125,
2047
+ "eval_rewards/rejected": -13.516657829284668,
2048
+ "eval_runtime": 38.6695,
2049
+ "eval_samples_per_second": 12.93,
2050
+ "eval_steps_per_second": 0.414,
2051
+ "step": 1300
2052
+ },
2053
+ {
2054
+ "epoch": 1.68,
2055
+ "learning_rate": 3.996667460128541e-07,
2056
+ "logits/chosen": -2.31799578666687,
2057
+ "logits/rejected": -2.3302206993103027,
2058
+ "logps/chosen": -333.87261962890625,
2059
+ "logps/rejected": -506.0113220214844,
2060
+ "loss": 0.0166,
2061
+ "rewards/accuracies": 1.0,
2062
+ "rewards/chosen": -1.8233000040054321,
2063
+ "rewards/margins": 13.524618148803711,
2064
+ "rewards/rejected": -15.347920417785645,
2065
+ "step": 1310
2066
+ },
2067
+ {
2068
+ "epoch": 1.7,
2069
+ "learning_rate": 3.9847655320161867e-07,
2070
+ "logits/chosen": -2.3380274772644043,
2071
+ "logits/rejected": -2.3655543327331543,
2072
+ "logps/chosen": -330.939453125,
2073
+ "logps/rejected": -566.5387573242188,
2074
+ "loss": 0.0211,
2075
+ "rewards/accuracies": 1.0,
2076
+ "rewards/chosen": -3.293247938156128,
2077
+ "rewards/margins": 13.109285354614258,
2078
+ "rewards/rejected": -16.402530670166016,
2079
+ "step": 1320
2080
+ },
2081
+ {
2082
+ "epoch": 1.71,
2083
+ "learning_rate": 3.972863603903832e-07,
2084
+ "logits/chosen": -2.4296791553497314,
2085
+ "logits/rejected": -2.395019054412842,
2086
+ "logps/chosen": -368.58843994140625,
2087
+ "logps/rejected": -550.57177734375,
2088
+ "loss": 0.0147,
2089
+ "rewards/accuracies": 1.0,
2090
+ "rewards/chosen": -2.1659107208251953,
2091
+ "rewards/margins": 14.171772956848145,
2092
+ "rewards/rejected": -16.337684631347656,
2093
+ "step": 1330
2094
+ },
2095
+ {
2096
+ "epoch": 1.72,
2097
+ "learning_rate": 3.9609616757914784e-07,
2098
+ "logits/chosen": -2.386429786682129,
2099
+ "logits/rejected": -2.401638984680176,
2100
+ "logps/chosen": -347.26214599609375,
2101
+ "logps/rejected": -538.3074951171875,
2102
+ "loss": 0.0162,
2103
+ "rewards/accuracies": 0.987500011920929,
2104
+ "rewards/chosen": -2.806589126586914,
2105
+ "rewards/margins": 12.520380973815918,
2106
+ "rewards/rejected": -15.326970100402832,
2107
+ "step": 1340
2108
+ },
2109
+ {
2110
+ "epoch": 1.74,
2111
+ "learning_rate": 3.949059747679124e-07,
2112
+ "logits/chosen": -2.3784899711608887,
2113
+ "logits/rejected": -2.42669939994812,
2114
+ "logps/chosen": -364.69512939453125,
2115
+ "logps/rejected": -592.1053466796875,
2116
+ "loss": 0.0159,
2117
+ "rewards/accuracies": 0.987500011920929,
2118
+ "rewards/chosen": -3.1103992462158203,
2119
+ "rewards/margins": 15.538830757141113,
2120
+ "rewards/rejected": -18.649229049682617,
2121
+ "step": 1350
2122
+ },
2123
+ {
2124
+ "epoch": 1.75,
2125
+ "learning_rate": 3.9371578195667697e-07,
2126
+ "logits/chosen": -2.4179718494415283,
2127
+ "logits/rejected": -2.4337425231933594,
2128
+ "logps/chosen": -338.0289001464844,
2129
+ "logps/rejected": -522.47412109375,
2130
+ "loss": 0.0343,
2131
+ "rewards/accuracies": 0.9750000238418579,
2132
+ "rewards/chosen": -2.291858196258545,
2133
+ "rewards/margins": 14.658024787902832,
2134
+ "rewards/rejected": -16.949880599975586,
2135
+ "step": 1360
2136
+ },
2137
+ {
2138
+ "epoch": 1.76,
2139
+ "learning_rate": 3.9252558914544156e-07,
2140
+ "logits/chosen": -2.37274169921875,
2141
+ "logits/rejected": -2.376906633377075,
2142
+ "logps/chosen": -371.0089111328125,
2143
+ "logps/rejected": -562.0587158203125,
2144
+ "loss": 0.0236,
2145
+ "rewards/accuracies": 0.9750000238418579,
2146
+ "rewards/chosen": -2.699599504470825,
2147
+ "rewards/margins": 14.066309928894043,
2148
+ "rewards/rejected": -16.76590919494629,
2149
+ "step": 1370
2150
+ },
2151
+ {
2152
+ "epoch": 1.77,
2153
+ "learning_rate": 3.9133539633420615e-07,
2154
+ "logits/chosen": -2.3570303916931152,
2155
+ "logits/rejected": -2.4414098262786865,
2156
+ "logps/chosen": -347.50531005859375,
2157
+ "logps/rejected": -606.2113647460938,
2158
+ "loss": 0.015,
2159
+ "rewards/accuracies": 1.0,
2160
+ "rewards/chosen": -2.3379924297332764,
2161
+ "rewards/margins": 14.867982864379883,
2162
+ "rewards/rejected": -17.205974578857422,
2163
+ "step": 1380
2164
+ },
2165
+ {
2166
+ "epoch": 1.79,
2167
+ "learning_rate": 3.901452035229707e-07,
2168
+ "logits/chosen": -2.373347043991089,
2169
+ "logits/rejected": -2.4218459129333496,
2170
+ "logps/chosen": -421.48187255859375,
2171
+ "logps/rejected": -606.8762817382812,
2172
+ "loss": 0.0132,
2173
+ "rewards/accuracies": 1.0,
2174
+ "rewards/chosen": -2.6556594371795654,
2175
+ "rewards/margins": 14.492483139038086,
2176
+ "rewards/rejected": -17.148143768310547,
2177
+ "step": 1390
2178
+ },
2179
+ {
2180
+ "epoch": 1.8,
2181
+ "learning_rate": 3.8895501071173533e-07,
2182
+ "logits/chosen": -2.3142504692077637,
2183
+ "logits/rejected": -2.3538806438446045,
2184
+ "logps/chosen": -325.9708557128906,
2185
+ "logps/rejected": -511.67449951171875,
2186
+ "loss": 0.0214,
2187
+ "rewards/accuracies": 0.9750000238418579,
2188
+ "rewards/chosen": -2.7798726558685303,
2189
+ "rewards/margins": 13.109631538391113,
2190
+ "rewards/rejected": -15.889503479003906,
2191
+ "step": 1400
2192
+ },
2193
+ {
2194
+ "epoch": 1.8,
2195
+ "eval_logits/chosen": -2.3960964679718018,
2196
+ "eval_logits/rejected": -2.3517098426818848,
2197
+ "eval_logps/chosen": -346.35821533203125,
2198
+ "eval_logps/rejected": -495.85186767578125,
2199
+ "eval_loss": 0.07910314947366714,
2200
+ "eval_rewards/accuracies": 0.96875,
2201
+ "eval_rewards/chosen": -3.7646918296813965,
2202
+ "eval_rewards/margins": 11.545960426330566,
2203
+ "eval_rewards/rejected": -15.310651779174805,
2204
+ "eval_runtime": 38.7173,
2205
+ "eval_samples_per_second": 12.914,
2206
+ "eval_steps_per_second": 0.413,
2207
+ "step": 1400
2208
+ },
2209
+ {
2210
+ "epoch": 1.81,
2211
+ "learning_rate": 3.8776481790049987e-07,
2212
+ "logits/chosen": -2.3062312602996826,
2213
+ "logits/rejected": -2.3327383995056152,
2214
+ "logps/chosen": -354.59381103515625,
2215
+ "logps/rejected": -503.6541442871094,
2216
+ "loss": 0.0196,
2217
+ "rewards/accuracies": 0.987500011920929,
2218
+ "rewards/chosen": -3.5312271118164062,
2219
+ "rewards/margins": 13.276026725769043,
2220
+ "rewards/rejected": -16.807254791259766,
2221
+ "step": 1410
2222
+ },
2223
+ {
2224
+ "epoch": 1.83,
2225
+ "learning_rate": 3.865746250892644e-07,
2226
+ "logits/chosen": -2.396146774291992,
2227
+ "logits/rejected": -2.3744444847106934,
2228
+ "logps/chosen": -397.74609375,
2229
+ "logps/rejected": -583.1174926757812,
2230
+ "loss": 0.0162,
2231
+ "rewards/accuracies": 1.0,
2232
+ "rewards/chosen": -3.2743606567382812,
2233
+ "rewards/margins": 15.37347412109375,
2234
+ "rewards/rejected": -18.647836685180664,
2235
+ "step": 1420
2236
+ },
2237
+ {
2238
+ "epoch": 1.84,
2239
+ "learning_rate": 3.8538443227802905e-07,
2240
+ "logits/chosen": -2.3621578216552734,
2241
+ "logits/rejected": -2.3470935821533203,
2242
+ "logps/chosen": -374.19757080078125,
2243
+ "logps/rejected": -564.0121459960938,
2244
+ "loss": 0.022,
2245
+ "rewards/accuracies": 0.987500011920929,
2246
+ "rewards/chosen": -4.340083122253418,
2247
+ "rewards/margins": 13.78313159942627,
2248
+ "rewards/rejected": -18.123212814331055,
2249
+ "step": 1430
2250
+ },
2251
+ {
2252
+ "epoch": 1.85,
2253
+ "learning_rate": 3.841942394667936e-07,
2254
+ "logits/chosen": -2.304884672164917,
2255
+ "logits/rejected": -2.4029793739318848,
2256
+ "logps/chosen": -369.39898681640625,
2257
+ "logps/rejected": -578.387451171875,
2258
+ "loss": 0.0146,
2259
+ "rewards/accuracies": 0.987500011920929,
2260
+ "rewards/chosen": -2.7867379188537598,
2261
+ "rewards/margins": 14.443509101867676,
2262
+ "rewards/rejected": -17.23024559020996,
2263
+ "step": 1440
2264
+ },
2265
+ {
2266
+ "epoch": 1.86,
2267
+ "learning_rate": 3.8300404665555817e-07,
2268
+ "logits/chosen": -2.2816107273101807,
2269
+ "logits/rejected": -2.2829480171203613,
2270
+ "logps/chosen": -374.7585144042969,
2271
+ "logps/rejected": -540.5015869140625,
2272
+ "loss": 0.0164,
2273
+ "rewards/accuracies": 0.987500011920929,
2274
+ "rewards/chosen": -2.205556869506836,
2275
+ "rewards/margins": 14.464788436889648,
2276
+ "rewards/rejected": -16.670345306396484,
2277
+ "step": 1450
2278
+ },
2279
+ {
2280
+ "epoch": 1.88,
2281
+ "learning_rate": 3.8181385384432276e-07,
2282
+ "logits/chosen": -2.282743453979492,
2283
+ "logits/rejected": -2.2942354679107666,
2284
+ "logps/chosen": -394.46502685546875,
2285
+ "logps/rejected": -594.6571044921875,
2286
+ "loss": 0.0112,
2287
+ "rewards/accuracies": 1.0,
2288
+ "rewards/chosen": -2.892620325088501,
2289
+ "rewards/margins": 14.386013984680176,
2290
+ "rewards/rejected": -17.27863311767578,
2291
+ "step": 1460
2292
+ },
2293
+ {
2294
+ "epoch": 1.89,
2295
+ "learning_rate": 3.8062366103308735e-07,
2296
+ "logits/chosen": -2.2720725536346436,
2297
+ "logits/rejected": -2.245262622833252,
2298
+ "logps/chosen": -342.9836730957031,
2299
+ "logps/rejected": -546.7418212890625,
2300
+ "loss": 0.0365,
2301
+ "rewards/accuracies": 1.0,
2302
+ "rewards/chosen": -2.7557284832000732,
2303
+ "rewards/margins": 14.667689323425293,
2304
+ "rewards/rejected": -17.423416137695312,
2305
+ "step": 1470
2306
+ },
2307
+ {
2308
+ "epoch": 1.9,
2309
+ "learning_rate": 3.794334682218519e-07,
2310
+ "logits/chosen": -2.295213222503662,
2311
+ "logits/rejected": -2.3375067710876465,
2312
+ "logps/chosen": -370.61798095703125,
2313
+ "logps/rejected": -474.4059143066406,
2314
+ "loss": 0.0237,
2315
+ "rewards/accuracies": 0.987500011920929,
2316
+ "rewards/chosen": -3.196665048599243,
2317
+ "rewards/margins": 12.084269523620605,
2318
+ "rewards/rejected": -15.28093433380127,
2319
+ "step": 1480
2320
+ },
2321
+ {
2322
+ "epoch": 1.92,
2323
+ "learning_rate": 3.7824327541061653e-07,
2324
+ "logits/chosen": -2.4100170135498047,
2325
+ "logits/rejected": -2.4586007595062256,
2326
+ "logps/chosen": -358.7035217285156,
2327
+ "logps/rejected": -547.9478149414062,
2328
+ "loss": 0.0184,
2329
+ "rewards/accuracies": 1.0,
2330
+ "rewards/chosen": -3.731393337249756,
2331
+ "rewards/margins": 12.000238418579102,
2332
+ "rewards/rejected": -15.73162841796875,
2333
+ "step": 1490
2334
+ },
2335
+ {
2336
+ "epoch": 1.93,
2337
+ "learning_rate": 3.7705308259938107e-07,
2338
+ "logits/chosen": -2.432584047317505,
2339
+ "logits/rejected": -2.430572032928467,
2340
+ "logps/chosen": -400.4476318359375,
2341
+ "logps/rejected": -589.388427734375,
2342
+ "loss": 0.0124,
2343
+ "rewards/accuracies": 1.0,
2344
+ "rewards/chosen": -3.0731418132781982,
2345
+ "rewards/margins": 13.324457168579102,
2346
+ "rewards/rejected": -16.397600173950195,
2347
+ "step": 1500
2348
+ },
2349
+ {
2350
+ "epoch": 1.93,
2351
+ "eval_logits/chosen": -2.4233508110046387,
2352
+ "eval_logits/rejected": -2.3732004165649414,
2353
+ "eval_logps/chosen": -345.49517822265625,
2354
+ "eval_logps/rejected": -491.72662353515625,
2355
+ "eval_loss": 0.08803335577249527,
2356
+ "eval_rewards/accuracies": 0.9375,
2357
+ "eval_rewards/chosen": -3.678384304046631,
2358
+ "eval_rewards/margins": 11.219746589660645,
2359
+ "eval_rewards/rejected": -14.89813232421875,
2360
+ "eval_runtime": 38.608,
2361
+ "eval_samples_per_second": 12.951,
2362
+ "eval_steps_per_second": 0.414,
2363
+ "step": 1500
2364
+ },
2365
+ {
2366
+ "epoch": 1.94,
2367
+ "learning_rate": 3.7586288978814566e-07,
2368
+ "logits/chosen": -2.335282564163208,
2369
+ "logits/rejected": -2.330732583999634,
2370
+ "logps/chosen": -388.20806884765625,
2371
+ "logps/rejected": -580.2225341796875,
2372
+ "loss": 0.0118,
2373
+ "rewards/accuracies": 1.0,
2374
+ "rewards/chosen": -3.1078379154205322,
2375
+ "rewards/margins": 13.242405891418457,
2376
+ "rewards/rejected": -16.350242614746094,
2377
+ "step": 1510
2378
+ },
2379
+ {
2380
+ "epoch": 1.95,
2381
+ "learning_rate": 3.7467269697691025e-07,
2382
+ "logits/chosen": -2.3464579582214355,
2383
+ "logits/rejected": -2.3436694145202637,
2384
+ "logps/chosen": -335.885986328125,
2385
+ "logps/rejected": -532.0635986328125,
2386
+ "loss": 0.0328,
2387
+ "rewards/accuracies": 1.0,
2388
+ "rewards/chosen": -3.1488466262817383,
2389
+ "rewards/margins": 13.591397285461426,
2390
+ "rewards/rejected": -16.740243911743164,
2391
+ "step": 1520
2392
+ },
2393
+ {
2394
+ "epoch": 1.97,
2395
+ "learning_rate": 3.7348250416567484e-07,
2396
+ "logits/chosen": -2.2621750831604004,
2397
+ "logits/rejected": -2.2600533962249756,
2398
+ "logps/chosen": -415.00982666015625,
2399
+ "logps/rejected": -549.5345458984375,
2400
+ "loss": 0.0264,
2401
+ "rewards/accuracies": 0.987500011920929,
2402
+ "rewards/chosen": -2.42472767829895,
2403
+ "rewards/margins": 13.469167709350586,
2404
+ "rewards/rejected": -15.893896102905273,
2405
+ "step": 1530
2406
+ },
2407
+ {
2408
+ "epoch": 1.98,
2409
+ "learning_rate": 3.722923113544394e-07,
2410
+ "logits/chosen": -2.361262559890747,
2411
+ "logits/rejected": -2.315338611602783,
2412
+ "logps/chosen": -394.708740234375,
2413
+ "logps/rejected": -578.1019287109375,
2414
+ "loss": 0.0251,
2415
+ "rewards/accuracies": 0.987500011920929,
2416
+ "rewards/chosen": -2.082348585128784,
2417
+ "rewards/margins": 15.23118782043457,
2418
+ "rewards/rejected": -17.31353759765625,
2419
+ "step": 1540
2420
+ },
2421
+ {
2422
+ "epoch": 1.99,
2423
+ "learning_rate": 3.71102118543204e-07,
2424
+ "logits/chosen": -2.315455913543701,
2425
+ "logits/rejected": -2.284585952758789,
2426
+ "logps/chosen": -367.0815734863281,
2427
+ "logps/rejected": -577.2198486328125,
2428
+ "loss": 0.0113,
2429
+ "rewards/accuracies": 1.0,
2430
+ "rewards/chosen": -2.139265537261963,
2431
+ "rewards/margins": 14.051069259643555,
2432
+ "rewards/rejected": -16.19033432006836,
2433
+ "step": 1550
2434
+ }
2435
+ ],
2436
+ "logging_steps": 10,
2437
+ "max_steps": 4668,
2438
+ "num_train_epochs": 6,
2439
+ "save_steps": 500,
2440
+ "total_flos": 0.0,
2441
+ "trial_name": null,
2442
+ "trial_params": null
2443
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f09f6ebae60f5cf7aa9bc7b8e6d25455a48190b29ccdc77065805f0e5acf97ca
3
+ size 6008