joelniklaus commited on
Commit
598c023
1 Parent(s): e8120ef

Training in progress, step 750000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbe29aa02358e2143d72f205b3a4a91a5018b93605acb79f66ed604b5f19383d
3
  size 3480942553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017878d7c611ba78f49d36fac2d29744f60586a2f3748e728a8daa9a71cdc581
3
  size 3480942553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb5b90c98009d51696b36759831d22a584a9950a286ec1f19cf3456f91a65bb8
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39eb0b770ba80ef91093b86ebb60b6ce4aa8879c4b6c52b6fa67d411db4e4956
3
  size 1740493675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2d6ab5c2c0046e2c66cf435ff00463ff3da7208ddb35b5b6f19c87d94f3623b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:411052d7656a2fbf4baa154bd61bcb86c1d4e17113e6919b82f37e9aff99019f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7,
5
- "global_step": 700000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4318,11 +4318,319 @@
4318
  "eval_samples_per_second": 27.086,
4319
  "eval_steps_per_second": 0.428,
4320
  "step": 700000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4321
  }
4322
  ],
4323
  "max_steps": 1000000,
4324
  "num_train_epochs": 9223372036854775807,
4325
- "total_flos": 4.17681408786432e+19,
4326
  "trial_name": null,
4327
  "trial_params": null
4328
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.75,
5
+ "global_step": 750000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4318
  "eval_samples_per_second": 27.086,
4319
  "eval_steps_per_second": 0.428,
4320
  "step": 700000
4321
+ },
4322
+ {
4323
+ "epoch": 0.7,
4324
+ "learning_rate": 2.251431892919171e-05,
4325
+ "loss": 0.7484,
4326
+ "step": 701000
4327
+ },
4328
+ {
4329
+ "epoch": 0.7,
4330
+ "learning_rate": 2.237634634350934e-05,
4331
+ "loss": 0.7388,
4332
+ "step": 702000
4333
+ },
4334
+ {
4335
+ "epoch": 0.7,
4336
+ "learning_rate": 2.2238675845677663e-05,
4337
+ "loss": 0.7153,
4338
+ "step": 703000
4339
+ },
4340
+ {
4341
+ "epoch": 0.7,
4342
+ "learning_rate": 2.2101308941239203e-05,
4343
+ "loss": 0.7454,
4344
+ "step": 704000
4345
+ },
4346
+ {
4347
+ "epoch": 0.7,
4348
+ "learning_rate": 2.196424713241637e-05,
4349
+ "loss": 0.7605,
4350
+ "step": 705000
4351
+ },
4352
+ {
4353
+ "epoch": 0.71,
4354
+ "learning_rate": 2.182749191809518e-05,
4355
+ "loss": 0.7548,
4356
+ "step": 706000
4357
+ },
4358
+ {
4359
+ "epoch": 0.71,
4360
+ "learning_rate": 2.1691044793808734e-05,
4361
+ "loss": 0.7524,
4362
+ "step": 707000
4363
+ },
4364
+ {
4365
+ "epoch": 0.71,
4366
+ "learning_rate": 2.1554907251720945e-05,
4367
+ "loss": 0.7297,
4368
+ "step": 708000
4369
+ },
4370
+ {
4371
+ "epoch": 0.71,
4372
+ "learning_rate": 2.1419080780610123e-05,
4373
+ "loss": 0.7613,
4374
+ "step": 709000
4375
+ },
4376
+ {
4377
+ "epoch": 0.71,
4378
+ "learning_rate": 2.128356686585282e-05,
4379
+ "loss": 0.7674,
4380
+ "step": 710000
4381
+ },
4382
+ {
4383
+ "epoch": 0.71,
4384
+ "learning_rate": 2.1148366989407496e-05,
4385
+ "loss": 0.7806,
4386
+ "step": 711000
4387
+ },
4388
+ {
4389
+ "epoch": 0.71,
4390
+ "learning_rate": 2.1013482629798333e-05,
4391
+ "loss": 0.7822,
4392
+ "step": 712000
4393
+ },
4394
+ {
4395
+ "epoch": 0.71,
4396
+ "learning_rate": 2.0878915262099098e-05,
4397
+ "loss": 0.7801,
4398
+ "step": 713000
4399
+ },
4400
+ {
4401
+ "epoch": 0.71,
4402
+ "learning_rate": 2.0744666357916925e-05,
4403
+ "loss": 0.7657,
4404
+ "step": 714000
4405
+ },
4406
+ {
4407
+ "epoch": 0.71,
4408
+ "learning_rate": 2.061073738537635e-05,
4409
+ "loss": 0.7521,
4410
+ "step": 715000
4411
+ },
4412
+ {
4413
+ "epoch": 0.72,
4414
+ "learning_rate": 2.0477129809103147e-05,
4415
+ "loss": 0.728,
4416
+ "step": 716000
4417
+ },
4418
+ {
4419
+ "epoch": 0.72,
4420
+ "learning_rate": 2.0343845090208368e-05,
4421
+ "loss": 0.7287,
4422
+ "step": 717000
4423
+ },
4424
+ {
4425
+ "epoch": 0.72,
4426
+ "learning_rate": 2.0210884686272368e-05,
4427
+ "loss": 0.7304,
4428
+ "step": 718000
4429
+ },
4430
+ {
4431
+ "epoch": 0.72,
4432
+ "learning_rate": 2.0078250051328784e-05,
4433
+ "loss": 0.747,
4434
+ "step": 719000
4435
+ },
4436
+ {
4437
+ "epoch": 0.72,
4438
+ "learning_rate": 1.9945942635848748e-05,
4439
+ "loss": 0.7278,
4440
+ "step": 720000
4441
+ },
4442
+ {
4443
+ "epoch": 0.72,
4444
+ "learning_rate": 1.981396388672496e-05,
4445
+ "loss": 0.7561,
4446
+ "step": 721000
4447
+ },
4448
+ {
4449
+ "epoch": 0.72,
4450
+ "learning_rate": 1.9682315247255894e-05,
4451
+ "loss": 0.7602,
4452
+ "step": 722000
4453
+ },
4454
+ {
4455
+ "epoch": 0.72,
4456
+ "learning_rate": 1.9550998157129946e-05,
4457
+ "loss": 0.7575,
4458
+ "step": 723000
4459
+ },
4460
+ {
4461
+ "epoch": 0.72,
4462
+ "learning_rate": 1.942001405240979e-05,
4463
+ "loss": 0.755,
4464
+ "step": 724000
4465
+ },
4466
+ {
4467
+ "epoch": 0.72,
4468
+ "learning_rate": 1.928936436551661e-05,
4469
+ "loss": 0.7168,
4470
+ "step": 725000
4471
+ },
4472
+ {
4473
+ "epoch": 0.73,
4474
+ "learning_rate": 1.9159050525214452e-05,
4475
+ "loss": 0.723,
4476
+ "step": 726000
4477
+ },
4478
+ {
4479
+ "epoch": 0.73,
4480
+ "learning_rate": 1.9029073956594606e-05,
4481
+ "loss": 0.7411,
4482
+ "step": 727000
4483
+ },
4484
+ {
4485
+ "epoch": 0.73,
4486
+ "learning_rate": 1.8899436081059975e-05,
4487
+ "loss": 0.7462,
4488
+ "step": 728000
4489
+ },
4490
+ {
4491
+ "epoch": 0.73,
4492
+ "learning_rate": 1.877013831630961e-05,
4493
+ "loss": 0.7513,
4494
+ "step": 729000
4495
+ },
4496
+ {
4497
+ "epoch": 0.73,
4498
+ "learning_rate": 1.8641182076323148e-05,
4499
+ "loss": 0.7438,
4500
+ "step": 730000
4501
+ },
4502
+ {
4503
+ "epoch": 0.73,
4504
+ "learning_rate": 1.851256877134538e-05,
4505
+ "loss": 0.7516,
4506
+ "step": 731000
4507
+ },
4508
+ {
4509
+ "epoch": 0.73,
4510
+ "learning_rate": 1.838429980787081e-05,
4511
+ "loss": 0.735,
4512
+ "step": 732000
4513
+ },
4514
+ {
4515
+ "epoch": 0.73,
4516
+ "learning_rate": 1.8256376588628238e-05,
4517
+ "loss": 0.7481,
4518
+ "step": 733000
4519
+ },
4520
+ {
4521
+ "epoch": 0.73,
4522
+ "learning_rate": 1.8128800512565513e-05,
4523
+ "loss": 0.7661,
4524
+ "step": 734000
4525
+ },
4526
+ {
4527
+ "epoch": 0.73,
4528
+ "learning_rate": 1.800157297483417e-05,
4529
+ "loss": 0.7545,
4530
+ "step": 735000
4531
+ },
4532
+ {
4533
+ "epoch": 0.74,
4534
+ "learning_rate": 1.787469536677419e-05,
4535
+ "loss": 0.7634,
4536
+ "step": 736000
4537
+ },
4538
+ {
4539
+ "epoch": 0.74,
4540
+ "learning_rate": 1.774816907589873e-05,
4541
+ "loss": 0.757,
4542
+ "step": 737000
4543
+ },
4544
+ {
4545
+ "epoch": 0.74,
4546
+ "learning_rate": 1.7621995485879062e-05,
4547
+ "loss": 0.7594,
4548
+ "step": 738000
4549
+ },
4550
+ {
4551
+ "epoch": 0.74,
4552
+ "learning_rate": 1.749617597652934e-05,
4553
+ "loss": 0.7589,
4554
+ "step": 739000
4555
+ },
4556
+ {
4557
+ "epoch": 0.74,
4558
+ "learning_rate": 1.7370711923791567e-05,
4559
+ "loss": 0.767,
4560
+ "step": 740000
4561
+ },
4562
+ {
4563
+ "epoch": 0.74,
4564
+ "learning_rate": 1.7245604699720535e-05,
4565
+ "loss": 0.7628,
4566
+ "step": 741000
4567
+ },
4568
+ {
4569
+ "epoch": 0.74,
4570
+ "learning_rate": 1.712085567246878e-05,
4571
+ "loss": 0.7691,
4572
+ "step": 742000
4573
+ },
4574
+ {
4575
+ "epoch": 0.74,
4576
+ "learning_rate": 1.699646620627168e-05,
4577
+ "loss": 0.771,
4578
+ "step": 743000
4579
+ },
4580
+ {
4581
+ "epoch": 0.74,
4582
+ "learning_rate": 1.6872437661432517e-05,
4583
+ "loss": 0.7716,
4584
+ "step": 744000
4585
+ },
4586
+ {
4587
+ "epoch": 0.74,
4588
+ "learning_rate": 1.6748771394307585e-05,
4589
+ "loss": 0.7728,
4590
+ "step": 745000
4591
+ },
4592
+ {
4593
+ "epoch": 0.75,
4594
+ "learning_rate": 1.662546875729138e-05,
4595
+ "loss": 0.7448,
4596
+ "step": 746000
4597
+ },
4598
+ {
4599
+ "epoch": 0.75,
4600
+ "learning_rate": 1.6502531098801753e-05,
4601
+ "loss": 0.772,
4602
+ "step": 747000
4603
+ },
4604
+ {
4605
+ "epoch": 0.75,
4606
+ "learning_rate": 1.637995976326527e-05,
4607
+ "loss": 0.7692,
4608
+ "step": 748000
4609
+ },
4610
+ {
4611
+ "epoch": 0.75,
4612
+ "learning_rate": 1.62577560911024e-05,
4613
+ "loss": 0.759,
4614
+ "step": 749000
4615
+ },
4616
+ {
4617
+ "epoch": 0.75,
4618
+ "learning_rate": 1.6135921418712956e-05,
4619
+ "loss": 0.7478,
4620
+ "step": 750000
4621
+ },
4622
+ {
4623
+ "epoch": 0.75,
4624
+ "eval_loss": 0.39371195435523987,
4625
+ "eval_runtime": 181.8712,
4626
+ "eval_samples_per_second": 27.492,
4627
+ "eval_steps_per_second": 0.434,
4628
+ "step": 750000
4629
  }
4630
  ],
4631
  "max_steps": 1000000,
4632
  "num_train_epochs": 9223372036854775807,
4633
+ "total_flos": 4.4751579512832e+19,
4634
  "trial_name": null,
4635
  "trial_params": null
4636
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb5b90c98009d51696b36759831d22a584a9950a286ec1f19cf3456f91a65bb8
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39eb0b770ba80ef91093b86ebb60b6ce4aa8879c4b6c52b6fa67d411db4e4956
3
  size 1740493675
runs/Mar22_03-02-10_t1v-n-ae339136-w-0/events.out.tfevents.1679454966.t1v-n-ae339136-w-0.10622.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7aace6ac9dd370a01161ccd7df8ab06deeb0d1f70aae54fc815d42c71c0f138
3
- size 12089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2540c671b3739602ebca9c18ce48494cf726814b5298e2ec76a5f6a5a74070
3
+ size 20365