philschmid HF staff commited on
Commit
42b7ff7
β€’
1 Parent(s): 70719e2

Training in progress, step 900

Browse files
Files changed (39) hide show
  1. checkpoint-500/latest +0 -1
  2. {checkpoint-500 β†’ checkpoint-900}/config.json +0 -0
  3. {checkpoint-500 β†’ checkpoint-900}/generation_config.json +0 -0
  4. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  12. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  19. {checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  20. checkpoint-900/latest +1 -0
  21. {checkpoint-500 β†’ checkpoint-900}/model-00001-of-00002.safetensors +1 -1
  22. {checkpoint-500 β†’ checkpoint-900}/model-00002-of-00002.safetensors +1 -1
  23. {checkpoint-500 β†’ checkpoint-900}/model.safetensors.index.json +0 -0
  24. {checkpoint-500 β†’ checkpoint-900}/rng_state_0.pth +0 -0
  25. {checkpoint-500 β†’ checkpoint-900}/rng_state_1.pth +0 -0
  26. {checkpoint-500 β†’ checkpoint-900}/rng_state_2.pth +0 -0
  27. {checkpoint-500 β†’ checkpoint-900}/rng_state_3.pth +0 -0
  28. {checkpoint-500 β†’ checkpoint-900}/rng_state_4.pth +0 -0
  29. {checkpoint-500 β†’ checkpoint-900}/rng_state_5.pth +0 -0
  30. {checkpoint-500 β†’ checkpoint-900}/rng_state_6.pth +0 -0
  31. {checkpoint-500 β†’ checkpoint-900}/rng_state_7.pth +0 -0
  32. {checkpoint-500 β†’ checkpoint-900}/special_tokens_map.json +0 -0
  33. {checkpoint-500 β†’ checkpoint-900}/tokenizer.json +0 -0
  34. {checkpoint-500 β†’ checkpoint-900}/tokenizer.model +0 -0
  35. {checkpoint-500 β†’ checkpoint-900}/tokenizer_config.json +0 -0
  36. {checkpoint-500 β†’ checkpoint-900}/trainer_state.json +243 -3
  37. {checkpoint-500 β†’ checkpoint-900}/training_args.bin +0 -0
  38. {checkpoint-500 β†’ checkpoint-900}/zero_to_fp32.py +0 -0
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-500/latest DELETED
@@ -1 +0,0 @@
1
- global_step500
 
 
{checkpoint-500 β†’ checkpoint-900}/config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/generation_config.json RENAMED
File without changes
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:784c0410b2a330cc433f981f1f5121bebabb1f3c1d781da96b8ef18c2e0db0c5
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c836b489002b4f2bc5a29c06d9ac1ff0ff9ee30d6de5c19ef31aa606835ed938
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab7a186e1928203b48ef1c8148819f63ee35e5e289f3d284193685d064f813fb
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4638b20ef605dce6f96e75096cbfbe87a7877b1abd3a9b5afa3af8cc52517378
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84d6ee17959a269d6d3e3b64d1b432370d3b086eb26b333e9a61a532db3253be
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfa69d6467f9bab3fe28d1cafc0998fc3f9c75a2e03996a04ce6fba11aeb676b
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c55b861ec085a2534503a9e5de7134d033ee8cb3625c35db2f3528370ae95c8d
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12fa247d8406d10eafbdbb9e572469bb46469fb29b3ac3c995838172c4d47bf
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c905c2c37c0f2f94f4a5ae2d10d79032961adea61c10e1cbb2614f6e20d67fda
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27053d51c07b64de70a3af83bd9016c36eb79b84a0dd2fb87265c9b049096f2d
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4824286c51d845b36a99e1044983ef94a086ad5d007203581ec617fd89edb112
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b618f502d939e19b4a622a3fef3afa25bf90479b8a9d95f720c791005f095f
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70ae89087b135570ef769d71356ac53cb1162eeecd4e647c2ea4069b8e19094b
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02425bbc1465b95b6560156504de32ec52855dd852eaf031c47a5568fe26e41d
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83612a4a188eb3093e946a69b95f9a0c3ddbdfe58f70767b69309388acb7cc83
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b125eda1defc963593bac1d3086653fad3dd4e8dc70af33e84bc47f2edb5c7
3
  size 10107626487
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8f31bdab7ff163280101e1923f64561cf4a6d974c46612977fe58c68ac04e22
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecaf7b76d8592c78c8d76cedb0651a24329208a93db287965af7bdfc9587e4cb
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3129eceffca8659953e6eac5b948129a0984740d232358cc99e9dab7d4426c27
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe3a2cfd541e47e579feddb0c9c70ece50f40de01bea95d993706f57708b0f10
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:251ecd563c2937490b8ba62da01d1c0bdd9597f24d5d788954b9c7073385b0ba
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c7b547ced115282b43a097bec3858ae4be3f9d8eacbfe01abbe9c124852b28
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:541652ce8ea40c93f93845b200b752fd459ea34dc30b036e763b7c0bbfe00041
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be2ade1bb19c43fa3475e66a5cac9425535095126eacd7834f3a050e3615189
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35ea0ff60f07ad502cd98ed64c92b5bf21728b26f8ec4ad810862d72b85b770f
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46d8ecc23ea617dfdf47c0cf31104948b3d98ae33c56c21b8064ff14b2b7b4e4
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85e23b7fcb735641a5db63cc2d01383e208488fea01050c3b293389297ce7410
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b541819e7f1bd67ada94b000d3d6ae85f17e1992c76dab6be7f76e14f3fd99ff
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80c0240d46abab60a4dda1cd710e6cd7d2379e125e99d7f698dc8732ee786ed
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752a19e2ff2dce2c503cd3c5d3a9a87205f3073b5dff8606dc8ceae6c5635e86
3
  size 168086
{checkpoint-500/global_step500 β†’ checkpoint-900/global_step900}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdeed66d696bbb7c4c3335a09232fd9fad0c780a4b1c78f93d28188e80446add
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0a0f4b76828975e030fcb18822398f75abb2aa403baaf7b4b0837b13cd85d3
3
  size 168086
checkpoint-900/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step900
{checkpoint-500 β†’ checkpoint-900}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1ee9759da30984c76dadddd1d8c7c94c3758ab4a57cf347cff3fa275944df4
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99a737b5b9f70216ba3b2df80ced780c4425ff708bbf429fde9862ab588ca6c4
3
  size 9976576392
{checkpoint-500 β†’ checkpoint-900}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8789b1f2769d0cd20c1574875ba9b08d058a97d8713b8a2a36e33e481dd3d1
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb8020484c2edfec4b31c0e0297c66b7bcc1add6fb17a98cf8c8de7f9aeef9bd
3
  size 3500296504
{checkpoint-500 β†’ checkpoint-900}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_0.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_1.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_2.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_3.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_4.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_5.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_6.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/rng_state_7.pth RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/special_tokens_map.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/tokenizer.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/tokenizer.model RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/tokenizer_config.json RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16666666666666666,
5
- "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -306,11 +306,251 @@
306
  "learning_rate": 0.0003,
307
  "loss": 2.1092,
308
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  }
310
  ],
311
  "max_steps": 3000,
312
  "num_train_epochs": 9223372036854775807,
313
- "total_flos": 209379655680000.0,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3,
5
+ "global_step": 900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
306
  "learning_rate": 0.0003,
307
  "loss": 2.1092,
308
  "step": 500
309
+ },
310
+ {
311
+ "epoch": 0.17,
312
+ "learning_rate": 0.0003,
313
+ "loss": 2.0715,
314
+ "step": 510
315
+ },
316
+ {
317
+ "epoch": 0.17,
318
+ "learning_rate": 0.0003,
319
+ "loss": 2.0473,
320
+ "step": 520
321
+ },
322
+ {
323
+ "epoch": 0.18,
324
+ "learning_rate": 0.0003,
325
+ "loss": 2.0566,
326
+ "step": 530
327
+ },
328
+ {
329
+ "epoch": 0.18,
330
+ "learning_rate": 0.0003,
331
+ "loss": 1.9818,
332
+ "step": 540
333
+ },
334
+ {
335
+ "epoch": 0.18,
336
+ "learning_rate": 0.0003,
337
+ "loss": 1.9592,
338
+ "step": 550
339
+ },
340
+ {
341
+ "epoch": 0.19,
342
+ "learning_rate": 0.0003,
343
+ "loss": 1.8939,
344
+ "step": 560
345
+ },
346
+ {
347
+ "epoch": 0.19,
348
+ "learning_rate": 0.0003,
349
+ "loss": 1.8441,
350
+ "step": 570
351
+ },
352
+ {
353
+ "epoch": 0.19,
354
+ "learning_rate": 0.0003,
355
+ "loss": 1.808,
356
+ "step": 580
357
+ },
358
+ {
359
+ "epoch": 0.2,
360
+ "learning_rate": 0.0003,
361
+ "loss": 1.765,
362
+ "step": 590
363
+ },
364
+ {
365
+ "epoch": 0.2,
366
+ "learning_rate": 0.0003,
367
+ "loss": 1.706,
368
+ "step": 600
369
+ },
370
+ {
371
+ "epoch": 0.2,
372
+ "learning_rate": 0.0003,
373
+ "loss": 1.6328,
374
+ "step": 610
375
+ },
376
+ {
377
+ "epoch": 0.21,
378
+ "learning_rate": 0.0003,
379
+ "loss": 1.6197,
380
+ "step": 620
381
+ },
382
+ {
383
+ "epoch": 0.21,
384
+ "learning_rate": 0.0003,
385
+ "loss": 1.6106,
386
+ "step": 630
387
+ },
388
+ {
389
+ "epoch": 0.21,
390
+ "learning_rate": 0.0003,
391
+ "loss": 1.5649,
392
+ "step": 640
393
+ },
394
+ {
395
+ "epoch": 0.22,
396
+ "learning_rate": 0.0003,
397
+ "loss": 1.5001,
398
+ "step": 650
399
+ },
400
+ {
401
+ "epoch": 0.22,
402
+ "learning_rate": 0.0003,
403
+ "loss": 1.4926,
404
+ "step": 660
405
+ },
406
+ {
407
+ "epoch": 0.22,
408
+ "learning_rate": 0.0003,
409
+ "loss": 1.4682,
410
+ "step": 670
411
+ },
412
+ {
413
+ "epoch": 0.23,
414
+ "learning_rate": 0.0003,
415
+ "loss": 1.442,
416
+ "step": 680
417
+ },
418
+ {
419
+ "epoch": 0.23,
420
+ "learning_rate": 0.0003,
421
+ "loss": 1.4117,
422
+ "step": 690
423
+ },
424
+ {
425
+ "epoch": 0.23,
426
+ "learning_rate": 0.0003,
427
+ "loss": 1.379,
428
+ "step": 700
429
+ },
430
+ {
431
+ "epoch": 0.24,
432
+ "learning_rate": 0.0003,
433
+ "loss": 1.3609,
434
+ "step": 710
435
+ },
436
+ {
437
+ "epoch": 0.24,
438
+ "learning_rate": 0.0003,
439
+ "loss": 1.3224,
440
+ "step": 720
441
+ },
442
+ {
443
+ "epoch": 0.24,
444
+ "learning_rate": 0.0003,
445
+ "loss": 1.2853,
446
+ "step": 730
447
+ },
448
+ {
449
+ "epoch": 0.25,
450
+ "learning_rate": 0.0003,
451
+ "loss": 1.2762,
452
+ "step": 740
453
+ },
454
+ {
455
+ "epoch": 0.25,
456
+ "learning_rate": 0.0003,
457
+ "loss": 1.2265,
458
+ "step": 750
459
+ },
460
+ {
461
+ "epoch": 0.25,
462
+ "learning_rate": 0.0003,
463
+ "loss": 1.2232,
464
+ "step": 760
465
+ },
466
+ {
467
+ "epoch": 0.26,
468
+ "learning_rate": 0.0003,
469
+ "loss": 1.2075,
470
+ "step": 770
471
+ },
472
+ {
473
+ "epoch": 0.26,
474
+ "learning_rate": 0.0003,
475
+ "loss": 1.168,
476
+ "step": 780
477
+ },
478
+ {
479
+ "epoch": 0.26,
480
+ "learning_rate": 0.0003,
481
+ "loss": 1.1472,
482
+ "step": 790
483
+ },
484
+ {
485
+ "epoch": 0.27,
486
+ "learning_rate": 0.0003,
487
+ "loss": 1.118,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 0.27,
492
+ "learning_rate": 0.0003,
493
+ "loss": 1.1228,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 0.27,
498
+ "learning_rate": 0.0003,
499
+ "loss": 1.1339,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 0.28,
504
+ "learning_rate": 0.0003,
505
+ "loss": 1.0853,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 0.28,
510
+ "learning_rate": 0.0003,
511
+ "loss": 1.0676,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 0.28,
516
+ "learning_rate": 0.0003,
517
+ "loss": 1.0905,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 0.29,
522
+ "learning_rate": 0.0003,
523
+ "loss": 1.076,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 0.29,
528
+ "learning_rate": 0.0003,
529
+ "loss": 1.0202,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 0.29,
534
+ "learning_rate": 0.0003,
535
+ "loss": 1.0123,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 0.3,
540
+ "learning_rate": 0.0003,
541
+ "loss": 0.9863,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 0.3,
546
+ "learning_rate": 0.0003,
547
+ "loss": 0.9347,
548
+ "step": 900
549
  }
550
  ],
551
  "max_steps": 3000,
552
  "num_train_epochs": 9223372036854775807,
553
+ "total_flos": 376883380224000.0,
554
  "trial_name": null,
555
  "trial_params": null
556
  }
{checkpoint-500 β†’ checkpoint-900}/training_args.bin RENAMED
File without changes
{checkpoint-500 β†’ checkpoint-900}/zero_to_fp32.py RENAMED
File without changes
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b518fc726384856248944d1be48f0582a8e8298752371b3b3f5437577456a8b7
3
- size 16711
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0edb971b8a90918c57c5b33c853a049ca331754786d7d176c54919449c9aaa
3
+ size 18281