notrichardren commited on
Commit
41bad26
1 Parent(s): 17d8f5d

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,12 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
 
9
  base_model: mistralai/Mistral-7B-v0.1
10
  datasets:
11
- - robust-control/PKU-alignment-better-safer-2
12
  model-index:
13
  - name: zephyr-7b-sft-qlora-alignment-10000
14
  results: []
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # zephyr-7b-sft-qlora-alignment-10000
21
 
22
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the robust-control/PKU-alignment-better-safer-2 dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.2067
25
 
26
  ## Model description
27
 
@@ -56,7 +55,7 @@ The following hyperparameters were used during training:
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
- | 1.1641 | 1.0 | 171 | 1.2067 |
60
 
61
 
62
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
+ - generated_from_trainer
8
  base_model: mistralai/Mistral-7B-v0.1
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: zephyr-7b-sft-qlora-alignment-10000
13
  results: []
 
18
 
19
  # zephyr-7b-sft-qlora-alignment-10000
20
 
21
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.2123
24
 
25
  ## Model description
26
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.1007 | 1.0 | 274 | 1.2123 |
59
 
60
 
61
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0a0e67a4cbe385556f2419aa87172cbd0f015851253480033dc03d20fad0e95
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b298414960be2dd3919b6f17ef2429c9937f9cd8739855ec9e6408fa449c49d0
3
  size 83946192
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.2067391872406006,
4
- "eval_runtime": 235.4446,
5
  "eval_samples": 9907,
6
- "eval_samples_per_second": 2.68,
7
- "eval_steps_per_second": 0.671,
8
- "train_loss": 1.2207316202029848,
9
- "train_runtime": 1132.3581,
10
- "train_samples": 10853,
11
- "train_samples_per_second": 0.604,
12
- "train_steps_per_second": 0.151
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.2122626304626465,
4
+ "eval_runtime": 236.2386,
5
  "eval_samples": 9907,
6
+ "eval_samples_per_second": 2.671,
7
+ "eval_steps_per_second": 0.669,
8
+ "train_loss": 1.1792847770844064,
9
+ "train_runtime": 1678.8973,
10
+ "train_samples": 17365,
11
+ "train_samples_per_second": 0.653,
12
+ "train_steps_per_second": 0.163
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.2067391872406006,
4
- "eval_runtime": 235.4446,
5
  "eval_samples": 9907,
6
- "eval_samples_per_second": 2.68,
7
- "eval_steps_per_second": 0.671
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 1.2122626304626465,
4
+ "eval_runtime": 236.2386,
5
  "eval_samples": 9907,
6
+ "eval_samples_per_second": 2.671,
7
+ "eval_steps_per_second": 0.669
8
  }
runs/May11_15-37-10_compute-permanent-node-269/events.out.tfevents.1715441861.compute-permanent-node-269.196531.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a11074319838279bc08e731f5169277ab772f24d1e4a43848f68b7287cfd0514
3
- size 11256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e36ea9b0b56d19d244b6cc429ae3148683eba2e36e66b7abb87553e0f1f8d7
3
+ size 14079
runs/May11_15-37-10_compute-permanent-node-269/events.out.tfevents.1715443776.compute-permanent-node-269.196531.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9601799c51fda3f03a7ba1b71d4421c05b80f0d9767301d5cb9ef25556bf76b
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.2207316202029848,
4
- "train_runtime": 1132.3581,
5
- "train_samples": 10853,
6
- "train_samples_per_second": 0.604,
7
- "train_steps_per_second": 0.151
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 1.1792847770844064,
4
+ "train_runtime": 1678.8973,
5
+ "train_samples": 17365,
6
+ "train_samples_per_second": 0.653,
7
+ "train_steps_per_second": 0.163
8
  }
trainer_state.json CHANGED
@@ -1,247 +1,367 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 171,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.01,
13
- "learning_rate": 1.1111111111111112e-05,
14
- "loss": 1.8643,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.03,
19
- "learning_rate": 5.555555555555556e-05,
20
- "loss": 1.7663,
21
  "step": 5
22
  },
23
  {
24
- "epoch": 0.06,
25
- "learning_rate": 0.00011111111111111112,
26
- "loss": 1.4851,
27
  "step": 10
28
  },
29
  {
30
- "epoch": 0.09,
31
- "learning_rate": 0.0001666666666666667,
32
- "loss": 1.3257,
33
  "step": 15
34
  },
35
  {
36
- "epoch": 0.12,
37
- "learning_rate": 0.0001999156886888064,
38
- "loss": 1.2837,
39
  "step": 20
40
  },
41
  {
42
- "epoch": 0.15,
43
- "learning_rate": 0.00019896881839082556,
44
- "loss": 1.2272,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.18,
49
- "learning_rate": 0.00019697969360350098,
50
- "loss": 1.2551,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 0.2,
55
- "learning_rate": 0.00019396926207859084,
56
- "loss": 1.2263,
57
  "step": 35
58
  },
59
  {
60
- "epoch": 0.23,
61
- "learning_rate": 0.00018996922709216455,
62
- "loss": 1.2222,
63
  "step": 40
64
  },
65
  {
66
- "epoch": 0.26,
67
- "learning_rate": 0.00018502171357296144,
68
- "loss": 1.242,
69
  "step": 45
70
  },
71
  {
72
- "epoch": 0.29,
73
- "learning_rate": 0.00017917882447886582,
74
- "loss": 1.214,
75
  "step": 50
76
  },
77
  {
78
- "epoch": 0.32,
79
- "learning_rate": 0.00017250209209335927,
80
- "loss": 1.2186,
81
  "step": 55
82
  },
83
  {
84
- "epoch": 0.35,
85
- "learning_rate": 0.0001650618300204242,
86
- "loss": 1.2202,
87
  "step": 60
88
  },
89
  {
90
- "epoch": 0.38,
91
- "learning_rate": 0.00015693639270213136,
92
- "loss": 1.1952,
93
  "step": 65
94
  },
95
  {
96
- "epoch": 0.41,
97
- "learning_rate": 0.0001482113502570349,
98
- "loss": 1.2162,
99
  "step": 70
100
  },
101
  {
102
- "epoch": 0.44,
103
- "learning_rate": 0.00013897858732926793,
104
- "loss": 1.1951,
105
  "step": 75
106
  },
107
  {
108
- "epoch": 0.47,
109
- "learning_rate": 0.00012933533543848461,
110
- "loss": 1.1806,
111
  "step": 80
112
  },
113
  {
114
- "epoch": 0.5,
115
- "learning_rate": 0.00011938314902110701,
116
- "loss": 1.2158,
117
  "step": 85
118
  },
119
  {
120
- "epoch": 0.53,
121
- "learning_rate": 0.00010922683594633021,
122
- "loss": 1.1702,
123
  "step": 90
124
  },
125
  {
126
- "epoch": 0.56,
127
- "learning_rate": 9.897335376977102e-05,
128
- "loss": 1.186,
129
  "step": 95
130
  },
131
  {
132
- "epoch": 0.58,
133
- "learning_rate": 8.87306833484679e-05,
134
- "loss": 1.1802,
135
  "step": 100
136
  },
137
  {
138
- "epoch": 0.61,
139
- "learning_rate": 7.860669167935028e-05,
140
- "loss": 1.1626,
141
  "step": 105
142
  },
143
  {
144
- "epoch": 0.64,
145
- "learning_rate": 6.870799593678459e-05,
146
- "loss": 1.177,
147
  "step": 110
148
  },
149
  {
150
- "epoch": 0.67,
151
- "learning_rate": 5.913884067217685e-05,
152
- "loss": 1.1495,
153
  "step": 115
154
  },
155
  {
156
- "epoch": 0.7,
157
- "learning_rate": 5.000000000000002e-05,
158
- "loss": 1.1842,
159
  "step": 120
160
  },
161
  {
162
- "epoch": 0.73,
163
- "learning_rate": 4.1387716331478565e-05,
164
- "loss": 1.1736,
165
  "step": 125
166
  },
167
  {
168
- "epoch": 0.76,
169
- "learning_rate": 3.339268683227499e-05,
170
- "loss": 1.1638,
171
  "step": 130
172
  },
173
  {
174
- "epoch": 0.79,
175
- "learning_rate": 2.6099108277934103e-05,
176
- "loss": 1.1465,
177
  "step": 135
178
  },
179
  {
180
- "epoch": 0.82,
181
- "learning_rate": 1.9583790365845822e-05,
182
- "loss": 1.1706,
183
  "step": 140
184
  },
185
  {
186
- "epoch": 0.85,
187
- "learning_rate": 1.3915346821563235e-05,
188
- "loss": 1.152,
189
  "step": 145
190
  },
191
  {
192
- "epoch": 0.88,
193
- "learning_rate": 9.153472818047625e-06,
194
- "loss": 1.1454,
195
  "step": 150
196
  },
197
  {
198
- "epoch": 0.91,
199
- "learning_rate": 5.348316317440549e-06,
200
- "loss": 1.1639,
201
  "step": 155
202
  },
203
  {
204
- "epoch": 0.94,
205
- "learning_rate": 2.539949955849985e-06,
206
- "loss": 1.1614,
207
  "step": 160
208
  },
209
  {
210
- "epoch": 0.96,
211
- "learning_rate": 7.579490328064265e-07,
212
- "loss": 1.1605,
213
  "step": 165
214
  },
215
  {
216
- "epoch": 0.99,
217
- "learning_rate": 2.108004964086474e-08,
218
- "loss": 1.1641,
219
  "step": 170
220
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  {
222
  "epoch": 1.0,
223
- "eval_loss": 1.2067391872406006,
224
- "eval_runtime": 235.4134,
225
- "eval_samples_per_second": 2.68,
226
- "eval_steps_per_second": 0.671,
227
- "step": 171
228
  },
229
  {
230
  "epoch": 1.0,
231
- "step": 171,
232
- "total_flos": 6.011757435184742e+16,
233
- "train_loss": 1.2207316202029848,
234
- "train_runtime": 1132.3581,
235
- "train_samples_per_second": 0.604,
236
- "train_steps_per_second": 0.151
237
  }
238
  ],
239
  "logging_steps": 5,
240
- "max_steps": 171,
241
  "num_input_tokens_seen": 0,
242
  "num_train_epochs": 1,
243
  "save_steps": 100,
244
- "total_flos": 6.011757435184742e+16,
245
  "train_batch_size": 2,
246
  "trial_name": null,
247
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9981785063752276,
5
  "eval_steps": 500,
6
+ "global_step": 274,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0,
13
+ "learning_rate": 7.142857142857143e-06,
14
+ "loss": 1.7833,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.02,
19
+ "learning_rate": 3.571428571428572e-05,
20
+ "loss": 1.7803,
21
  "step": 5
22
  },
23
  {
24
+ "epoch": 0.04,
25
+ "learning_rate": 7.142857142857143e-05,
26
+ "loss": 1.5541,
27
  "step": 10
28
  },
29
  {
30
+ "epoch": 0.05,
31
+ "learning_rate": 0.00010714285714285715,
32
+ "loss": 1.3651,
33
  "step": 15
34
  },
35
  {
36
+ "epoch": 0.07,
37
+ "learning_rate": 0.00014285714285714287,
38
+ "loss": 1.2684,
39
  "step": 20
40
  },
41
  {
42
+ "epoch": 0.09,
43
+ "learning_rate": 0.0001785714285714286,
44
+ "loss": 1.2601,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 0.11,
49
+ "learning_rate": 0.00019996738360808565,
50
+ "loss": 1.2498,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.13,
55
+ "learning_rate": 0.00019960069350344548,
56
+ "loss": 1.2452,
57
  "step": 35
58
  },
59
  {
60
+ "epoch": 0.15,
61
+ "learning_rate": 0.00019882804237803488,
62
+ "loss": 1.2473,
63
  "step": 40
64
  },
65
  {
66
+ "epoch": 0.16,
67
+ "learning_rate": 0.00019765257946935944,
68
+ "loss": 1.1842,
69
  "step": 45
70
  },
71
  {
72
+ "epoch": 0.18,
73
+ "learning_rate": 0.00019607909582962477,
74
+ "loss": 1.2224,
75
  "step": 50
76
  },
77
  {
78
+ "epoch": 0.2,
79
+ "learning_rate": 0.00019411400479795617,
80
+ "loss": 1.2516,
81
  "step": 55
82
  },
83
  {
84
+ "epoch": 0.22,
85
+ "learning_rate": 0.0001917653158603628,
86
+ "loss": 1.2017,
87
  "step": 60
88
  },
89
  {
90
+ "epoch": 0.24,
91
+ "learning_rate": 0.00018904260200399006,
92
+ "loss": 1.1943,
93
  "step": 65
94
  },
95
  {
96
+ "epoch": 0.26,
97
+ "learning_rate": 0.00018595696069872013,
98
+ "loss": 1.213,
99
  "step": 70
100
  },
101
  {
102
+ "epoch": 0.27,
103
+ "learning_rate": 0.00018252096866515558,
104
+ "loss": 1.1741,
105
  "step": 75
106
  },
107
  {
108
+ "epoch": 0.29,
109
+ "learning_rate": 0.00017874863061334657,
110
+ "loss": 1.1809,
111
  "step": 80
112
  },
113
  {
114
+ "epoch": 0.31,
115
+ "learning_rate": 0.00017465532216119625,
116
+ "loss": 1.1773,
117
  "step": 85
118
  },
119
  {
120
+ "epoch": 0.33,
121
+ "learning_rate": 0.00017025772716520323,
122
+ "loss": 1.1956,
123
  "step": 90
124
  },
125
  {
126
+ "epoch": 0.35,
127
+ "learning_rate": 0.00016557376971897266,
128
+ "loss": 1.1867,
129
  "step": 95
130
  },
131
  {
132
+ "epoch": 0.36,
133
+ "learning_rate": 0.0001606225410966638,
134
+ "loss": 1.182,
135
  "step": 100
136
  },
137
  {
138
+ "epoch": 0.38,
139
+ "learning_rate": 0.0001554242219391425,
140
+ "loss": 1.1625,
141
  "step": 105
142
  },
143
  {
144
+ "epoch": 0.4,
145
+ "learning_rate": 0.00015000000000000001,
146
+ "loss": 1.1626,
147
  "step": 110
148
  },
149
  {
150
+ "epoch": 0.42,
151
+ "learning_rate": 0.00014437198378669598,
152
+ "loss": 1.1716,
153
  "step": 115
154
  },
155
  {
156
+ "epoch": 0.44,
157
+ "learning_rate": 0.0001385631124488136,
158
+ "loss": 1.1372,
159
  "step": 120
160
  },
161
  {
162
+ "epoch": 0.46,
163
+ "learning_rate": 0.00013259706228071285,
164
+ "loss": 1.161,
165
  "step": 125
166
  },
167
  {
168
+ "epoch": 0.47,
169
+ "learning_rate": 0.0001264981502196662,
170
+ "loss": 1.1578,
171
  "step": 130
172
  },
173
  {
174
+ "epoch": 0.49,
175
+ "learning_rate": 0.00012029123473280668,
176
+ "loss": 1.1553,
177
  "step": 135
178
  },
179
  {
180
+ "epoch": 0.51,
181
+ "learning_rate": 0.00011400161449686293,
182
+ "loss": 1.1518,
183
  "step": 140
184
  },
185
  {
186
+ "epoch": 0.53,
187
+ "learning_rate": 0.0001076549252836496,
188
+ "loss": 1.15,
189
  "step": 145
190
  },
191
  {
192
+ "epoch": 0.55,
193
+ "learning_rate": 0.00010127703547159739,
194
+ "loss": 1.1362,
195
  "step": 150
196
  },
197
  {
198
+ "epoch": 0.56,
199
+ "learning_rate": 9.489394060920496e-05,
200
+ "loss": 1.1724,
201
  "step": 155
202
  },
203
  {
204
+ "epoch": 0.58,
205
+ "learning_rate": 8.853165746015997e-05,
206
+ "loss": 1.1116,
207
  "step": 160
208
  },
209
  {
210
+ "epoch": 0.6,
211
+ "learning_rate": 8.221611796198985e-05,
212
+ "loss": 1.1193,
213
  "step": 165
214
  },
215
  {
216
+ "epoch": 0.62,
217
+ "learning_rate": 7.597306353045393e-05,
218
+ "loss": 1.132,
219
  "step": 170
220
  },
221
+ {
222
+ "epoch": 0.64,
223
+ "learning_rate": 6.982794014048077e-05,
224
+ "loss": 1.1159,
225
+ "step": 175
226
+ },
227
+ {
228
+ "epoch": 0.66,
229
+ "learning_rate": 6.380579461128819e-05,
230
+ "loss": 1.1067,
231
+ "step": 180
232
+ },
233
+ {
234
+ "epoch": 0.67,
235
+ "learning_rate": 5.793117251841659e-05,
236
+ "loss": 1.1426,
237
+ "step": 185
238
+ },
239
+ {
240
+ "epoch": 0.69,
241
+ "learning_rate": 5.222801814877369e-05,
242
+ "loss": 1.1398,
243
+ "step": 190
244
+ },
245
+ {
246
+ "epoch": 0.71,
247
+ "learning_rate": 4.671957690646345e-05,
248
+ "loss": 1.1299,
249
+ "step": 195
250
+ },
251
+ {
252
+ "epoch": 0.73,
253
+ "learning_rate": 4.142830056718052e-05,
254
+ "loss": 1.1109,
255
+ "step": 200
256
+ },
257
+ {
258
+ "epoch": 0.75,
259
+ "learning_rate": 3.637575576734404e-05,
260
+ "loss": 1.0975,
261
+ "step": 205
262
+ },
263
+ {
264
+ "epoch": 0.77,
265
+ "learning_rate": 3.158253610095697e-05,
266
+ "loss": 1.1381,
267
+ "step": 210
268
+ },
269
+ {
270
+ "epoch": 0.78,
271
+ "learning_rate": 2.706817818247551e-05,
272
+ "loss": 1.1048,
273
+ "step": 215
274
+ },
275
+ {
276
+ "epoch": 0.8,
277
+ "learning_rate": 2.2851082017805703e-05,
278
+ "loss": 1.145,
279
+ "step": 220
280
+ },
281
+ {
282
+ "epoch": 0.82,
283
+ "learning_rate": 1.8948436007986546e-05,
284
+ "loss": 1.104,
285
+ "step": 225
286
+ },
287
+ {
288
+ "epoch": 0.84,
289
+ "learning_rate": 1.5376146891235598e-05,
290
+ "loss": 1.0886,
291
+ "step": 230
292
+ },
293
+ {
294
+ "epoch": 0.86,
295
+ "learning_rate": 1.214877490890578e-05,
296
+ "loss": 1.1076,
297
+ "step": 235
298
+ },
299
+ {
300
+ "epoch": 0.87,
301
+ "learning_rate": 9.279474459608805e-06,
302
+ "loss": 1.127,
303
+ "step": 240
304
+ },
305
+ {
306
+ "epoch": 0.89,
307
+ "learning_rate": 6.779940483393032e-06,
308
+ "loss": 1.1085,
309
+ "step": 245
310
+ },
311
+ {
312
+ "epoch": 0.91,
313
+ "learning_rate": 4.660360794506946e-06,
314
+ "loss": 1.1209,
315
+ "step": 250
316
+ },
317
+ {
318
+ "epoch": 0.93,
319
+ "learning_rate": 2.929374557035036e-06,
320
+ "loss": 1.0841,
321
+ "step": 255
322
+ },
323
+ {
324
+ "epoch": 0.95,
325
+ "learning_rate": 1.5940370726542863e-06,
326
+ "loss": 1.0937,
327
+ "step": 260
328
+ },
329
+ {
330
+ "epoch": 0.97,
331
+ "learning_rate": 6.597910240324967e-07,
332
+ "loss": 1.1317,
333
+ "step": 265
334
+ },
335
+ {
336
+ "epoch": 0.98,
337
+ "learning_rate": 1.3044429107700318e-07,
338
+ "loss": 1.1007,
339
+ "step": 270
340
+ },
341
  {
342
  "epoch": 1.0,
343
+ "eval_loss": 1.2122626304626465,
344
+ "eval_runtime": 236.2559,
345
+ "eval_samples_per_second": 2.671,
346
+ "eval_steps_per_second": 0.669,
347
+ "step": 274
348
  },
349
  {
350
  "epoch": 1.0,
351
+ "step": 274,
352
+ "total_flos": 9.632874487611392e+16,
353
+ "train_loss": 1.1792847770844064,
354
+ "train_runtime": 1678.8973,
355
+ "train_samples_per_second": 0.653,
356
+ "train_steps_per_second": 0.163
357
  }
358
  ],
359
  "logging_steps": 5,
360
+ "max_steps": 274,
361
  "num_input_tokens_seen": 0,
362
  "num_train_epochs": 1,
363
  "save_steps": 100,
364
+ "total_flos": 9.632874487611392e+16,
365
  "train_batch_size": 2,
366
  "trial_name": null,
367
  "trial_params": null