philschmid HF staff commited on
Commit
10c8a05
β€’
1 Parent(s): 8560e14

Training in progress, step 600

Browse files
Files changed (39) hide show
  1. checkpoint-200/latest +0 -1
  2. {checkpoint-200 β†’ checkpoint-600}/config.json +0 -0
  3. {checkpoint-200 β†’ checkpoint-600}/generation_config.json +0 -0
  4. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-600/latest +1 -0
  21. {checkpoint-200 β†’ checkpoint-600}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-200 β†’ checkpoint-600}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-200 β†’ checkpoint-600}/model.safetensors.index.json +0 -0
  24. {checkpoint-200 β†’ checkpoint-600}/rng_state_0.pth +0 -0
  25. {checkpoint-200 β†’ checkpoint-600}/rng_state_1.pth +0 -0
  26. {checkpoint-200 β†’ checkpoint-600}/rng_state_2.pth +0 -0
  27. {checkpoint-200 β†’ checkpoint-600}/rng_state_3.pth +0 -0
  28. {checkpoint-200 β†’ checkpoint-600}/rng_state_4.pth +0 -0
  29. {checkpoint-200 β†’ checkpoint-600}/rng_state_5.pth +0 -0
  30. {checkpoint-200 β†’ checkpoint-600}/rng_state_6.pth +0 -0
  31. {checkpoint-200 β†’ checkpoint-600}/rng_state_7.pth +0 -0
  32. {checkpoint-200 β†’ checkpoint-600}/special_tokens_map.json +0 -0
  33. {checkpoint-200 β†’ checkpoint-600}/tokenizer.json +0 -0
  34. {checkpoint-200 β†’ checkpoint-600}/tokenizer.model +0 -0
  35. {checkpoint-200 β†’ checkpoint-600}/tokenizer_config.json +0 -0
  36. {checkpoint-200 β†’ checkpoint-600}/trainer_state.json +243 -3
  37. {checkpoint-200 β†’ checkpoint-600}/training_args.bin +0 -0
  38. {checkpoint-200 β†’ checkpoint-600}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-200/latest DELETED
@@ -1 +0,0 @@
1
- global_step200
 
 
{checkpoint-200 β†’ checkpoint-600}/config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/generation_config.json RENAMED
File without changes
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71d2f9ca99b2c8752d5acd8c1555a01f48f2d977f249dd1f70f224809fc435f6
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66eb01a0e90463b9da96b92c71aa113fb0cc395d468d494cbaaf30b0c996c900
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f47da0e5552c15f333a9fd0db761ede06034ac687cdd21456980f759ee21e8a8
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a9c27857331e866aa180ca994d30ecdeb152ce200686244186589fd2b6c5399
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043a4a45cef8110b8e1abf415169f560134e1070f36c115e8b793d56d92a9857
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cc70fd746640a3708551bd27076cca0c4493fc0ec627adc422dc06e217b4f1
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d25278e6c3183c87ff05e24b5f73e31e348f5f49708ae5a6a2f379bed5d99274
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f785e4989207b0d802f03d21811f0310e6887f491becf18aab59dc2a8e34dc
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f6246741e64c84748a0a5021600606f684a38ed68eccec43754832e49d496e5
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:055b189cd86f4ab7885ffd7733ca11939b1c02c7b961887c23ff3765b85b2c26
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b647e38e89a8c78d6adf4116b9380a5818e3e5155a62f69820f9d50472b46b5a
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b598e84707f1b589d6e34c06fddfcfd1c635efdeb8c34cf5ca13a510f0949837
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9208254f9c3fd4a0d7bd799d8f304c16b364f1c33b54df10499e9572370dcb22
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15519b6df3b12db386d8c11e652af04badf693c055d7c3d49091aad19745e337
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb61950d5fc52bbbb13efa4dabadc3e4bdb7891654f30a7ee148674c610dbead
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a17fc5b79f53abceedb3cc4c49fba56335c0c00a6ffa5320a11cacac9ad4e106
3
  size 10107626487
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c34dac8a5115294c99ffe0ff0d6c8030d397e9e76a4c3cae10c6f57b5951ec26
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17dd6ac6428c5d94c99148aa50c12df93df8b2462cbf19e8e84d9d333fcbd1e
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:297f1b081b4b97fa75810d998d3c622a62cde737ec4895f3fb93596432090506
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dade817edf76921be5c0796cbcfdf915c904c183acf9c931777f623390eb1f3
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a5e2693442a9df7415519e489afddd763bc7416f8d2159d1f5ec200516f2de5
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c1c5de42abbdea85c5b809d6ad1ea6ea81c0292569eae30054d35b7568f9e0
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234a6a04b487bb30540f96fad960db6fa486d6cb3e11dca84b683d3455822ed4
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2bc5f999fb8ccc85ce968f9308e1832cc481d0a36fbfee21231ee0251223b9
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70cedfd022c0dd54d94c1f74f3f724b37365b3f63080c6b277275b92d068d938
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:287d7ae815a009508c9d5763cdb2b95cdccbcb41047461ddb5942fe4be828cd4
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25fa10cabd6744c9bb94903703359f4c94668a01727d8d601f3c9042de5edc34
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c563d457d97887b6b8ebca491ddbbb5f7d02940671baa85e856e70f2373dd1e8
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba591f0a7ee08808a1d41a17aa1fe43bfd502af61513671a76b2f3c84760bf8d
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbaa5dfcd7813a949984070a065f4d96fd979a8486c397c0fe82e466df55719e
3
  size 168086
{checkpoint-200/global_step200 β†’ checkpoint-600/global_step600}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acff605da5d216a99e47f8ca6229a4fbdf7c25932dd3b8b0ba4d45bfc72ccbaf
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cfe0ea806988e2741a7bc673de2fae1d546b08c34f0cf088d122f77c3098bac
3
  size 168086
checkpoint-600/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step600
{checkpoint-200 β†’ checkpoint-600}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06b4a7088da3def00aeba160f50fbccff94c4b493b84b0b2d4ca97dba29c300f
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5217aa0cdf2a3e998ac2276247c8b950f52424ef53ba6bedbb47fe566772c4
3
  size 9976576392
{checkpoint-200 β†’ checkpoint-600}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8a1f3f35590cedc71b241189f82c07f0f7391e458939e0fbe5763893ed0d349
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c35bdf2b64df17b13e8aa89bf76a22e3fe647fb7a427d6d581f7b18dbbea71
3
  size 3500296504
{checkpoint-200 β†’ checkpoint-600}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_0.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_1.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_2.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_3.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_4.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_5.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_6.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/rng_state_7.pth RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/special_tokens_map.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/tokenizer.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/tokenizer.model RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/tokenizer_config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06666666666666667,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -126,11 +126,251 @@
126
  "learning_rate": 0.0003,
127
  "loss": 4.9139,
128
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ],
131
  "max_steps": 3000,
132
  "num_train_epochs": 9223372036854775807,
133
- "total_flos": 83751862272000.0,
134
  "trial_name": null,
135
  "trial_params": null
136
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2,
5
+ "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
126
  "learning_rate": 0.0003,
127
  "loss": 4.9139,
128
  "step": 200
129
+ },
130
+ {
131
+ "epoch": 0.07,
132
+ "learning_rate": 0.0003,
133
+ "loss": 4.8974,
134
+ "step": 210
135
+ },
136
+ {
137
+ "epoch": 0.07,
138
+ "learning_rate": 0.0003,
139
+ "loss": 4.9036,
140
+ "step": 220
141
+ },
142
+ {
143
+ "epoch": 0.08,
144
+ "learning_rate": 0.0003,
145
+ "loss": 4.8635,
146
+ "step": 230
147
+ },
148
+ {
149
+ "epoch": 0.08,
150
+ "learning_rate": 0.0003,
151
+ "loss": 4.7883,
152
+ "step": 240
153
+ },
154
+ {
155
+ "epoch": 0.08,
156
+ "learning_rate": 0.0003,
157
+ "loss": 4.7681,
158
+ "step": 250
159
+ },
160
+ {
161
+ "epoch": 0.09,
162
+ "learning_rate": 0.0003,
163
+ "loss": 4.7572,
164
+ "step": 260
165
+ },
166
+ {
167
+ "epoch": 0.09,
168
+ "learning_rate": 0.0003,
169
+ "loss": 4.7552,
170
+ "step": 270
171
+ },
172
+ {
173
+ "epoch": 0.09,
174
+ "learning_rate": 0.0003,
175
+ "loss": 4.706,
176
+ "step": 280
177
+ },
178
+ {
179
+ "epoch": 0.1,
180
+ "learning_rate": 0.0003,
181
+ "loss": 4.7015,
182
+ "step": 290
183
+ },
184
+ {
185
+ "epoch": 0.1,
186
+ "learning_rate": 0.0003,
187
+ "loss": 4.695,
188
+ "step": 300
189
+ },
190
+ {
191
+ "epoch": 0.1,
192
+ "learning_rate": 0.0003,
193
+ "loss": 4.6808,
194
+ "step": 310
195
+ },
196
+ {
197
+ "epoch": 0.11,
198
+ "learning_rate": 0.0003,
199
+ "loss": 4.6423,
200
+ "step": 320
201
+ },
202
+ {
203
+ "epoch": 0.11,
204
+ "learning_rate": 0.0003,
205
+ "loss": 4.613,
206
+ "step": 330
207
+ },
208
+ {
209
+ "epoch": 0.11,
210
+ "learning_rate": 0.0003,
211
+ "loss": 4.5851,
212
+ "step": 340
213
+ },
214
+ {
215
+ "epoch": 0.12,
216
+ "learning_rate": 0.0003,
217
+ "loss": 4.5882,
218
+ "step": 350
219
+ },
220
+ {
221
+ "epoch": 0.12,
222
+ "learning_rate": 0.0003,
223
+ "loss": 4.6228,
224
+ "step": 360
225
+ },
226
+ {
227
+ "epoch": 0.12,
228
+ "learning_rate": 0.0003,
229
+ "loss": 4.6269,
230
+ "step": 370
231
+ },
232
+ {
233
+ "epoch": 0.13,
234
+ "learning_rate": 0.0003,
235
+ "loss": 4.5364,
236
+ "step": 380
237
+ },
238
+ {
239
+ "epoch": 0.13,
240
+ "learning_rate": 0.0003,
241
+ "loss": 4.4992,
242
+ "step": 390
243
+ },
244
+ {
245
+ "epoch": 0.13,
246
+ "learning_rate": 0.0003,
247
+ "loss": 4.4799,
248
+ "step": 400
249
+ },
250
+ {
251
+ "epoch": 0.14,
252
+ "learning_rate": 0.0003,
253
+ "loss": 4.3733,
254
+ "step": 410
255
+ },
256
+ {
257
+ "epoch": 0.14,
258
+ "learning_rate": 0.0003,
259
+ "loss": 4.1788,
260
+ "step": 420
261
+ },
262
+ {
263
+ "epoch": 0.14,
264
+ "learning_rate": 0.0003,
265
+ "loss": 3.6706,
266
+ "step": 430
267
+ },
268
+ {
269
+ "epoch": 0.15,
270
+ "learning_rate": 0.0003,
271
+ "loss": 2.8767,
272
+ "step": 440
273
+ },
274
+ {
275
+ "epoch": 0.15,
276
+ "learning_rate": 0.0003,
277
+ "loss": 2.3927,
278
+ "step": 450
279
+ },
280
+ {
281
+ "epoch": 0.15,
282
+ "learning_rate": 0.0003,
283
+ "loss": 2.3062,
284
+ "step": 460
285
+ },
286
+ {
287
+ "epoch": 0.16,
288
+ "learning_rate": 0.0003,
289
+ "loss": 2.2299,
290
+ "step": 470
291
+ },
292
+ {
293
+ "epoch": 0.16,
294
+ "learning_rate": 0.0003,
295
+ "loss": 2.1739,
296
+ "step": 480
297
+ },
298
+ {
299
+ "epoch": 0.16,
300
+ "learning_rate": 0.0003,
301
+ "loss": 2.1303,
302
+ "step": 490
303
+ },
304
+ {
305
+ "epoch": 0.17,
306
+ "learning_rate": 0.0003,
307
+ "loss": 2.1092,
308
+ "step": 500
309
+ },
310
+ {
311
+ "epoch": 0.17,
312
+ "learning_rate": 0.0003,
313
+ "loss": 2.0715,
314
+ "step": 510
315
+ },
316
+ {
317
+ "epoch": 0.17,
318
+ "learning_rate": 0.0003,
319
+ "loss": 2.0473,
320
+ "step": 520
321
+ },
322
+ {
323
+ "epoch": 0.18,
324
+ "learning_rate": 0.0003,
325
+ "loss": 2.0566,
326
+ "step": 530
327
+ },
328
+ {
329
+ "epoch": 0.18,
330
+ "learning_rate": 0.0003,
331
+ "loss": 1.9818,
332
+ "step": 540
333
+ },
334
+ {
335
+ "epoch": 0.18,
336
+ "learning_rate": 0.0003,
337
+ "loss": 1.9592,
338
+ "step": 550
339
+ },
340
+ {
341
+ "epoch": 0.19,
342
+ "learning_rate": 0.0003,
343
+ "loss": 1.8939,
344
+ "step": 560
345
+ },
346
+ {
347
+ "epoch": 0.19,
348
+ "learning_rate": 0.0003,
349
+ "loss": 1.8441,
350
+ "step": 570
351
+ },
352
+ {
353
+ "epoch": 0.19,
354
+ "learning_rate": 0.0003,
355
+ "loss": 1.808,
356
+ "step": 580
357
+ },
358
+ {
359
+ "epoch": 0.2,
360
+ "learning_rate": 0.0003,
361
+ "loss": 1.765,
362
+ "step": 590
363
+ },
364
+ {
365
+ "epoch": 0.2,
366
+ "learning_rate": 0.0003,
367
+ "loss": 1.706,
368
+ "step": 600
369
  }
370
  ],
371
  "max_steps": 3000,
372
  "num_train_epochs": 9223372036854775807,
373
+ "total_flos": 251255586816000.0,
374
  "trial_name": null,
375
  "trial_params": null
376
  }
{checkpoint-200 β†’ checkpoint-600}/training_args.bin RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-600}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7125e059489f5648613066e3ee25f5b3db395f89a0c3ea565999ab8326362e4e
3
- size 12001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accaaa15e2b69671590fc720552ed7a07108f7a4bcf6148faf4fd30c13b18e22
3
+ size 13571