joelniklaus commited on
Commit
a9c8650
1 Parent(s): 7e2fe2f

Training in progress, step 900000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad441ea99d1cf6fe2d1e72819c0f5db00a2a82c3dde945949e50f8f2ffe61e3
3
  size 885325017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4e671b063f0fab3308496f90e4263c75ca2bcfe806fb1ef05c7fad2e3b9ef1
3
  size 885325017
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:276a7965140304f9b691ebf5dbaef28e68a0d61c7432397f5d9a4edba8ae4065
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8ae5410675f243d0890ea34e523d99f0c2ce208d877293024b72a5ea3fe4ee
3
  size 442675755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2c3c50439af9a540b521038344b9830557724b4d1f3808af26f2999c2f1ea7
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b6318c1c37a88a7a88d1bea333e6b55fd0f1d3338fd7f99e67179de2e57d78
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adedebe0cc7e07de957a9e2967d6e9c3934a9fdca3245f46a29d125e5e36192e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98fbf159ce1bb90afdab5d6ac994b4ab633fc21d8eb6c04c41c7f3a26253e5b5
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.128545,
5
- "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5242,11 +5242,319 @@
5242
  "eval_samples_per_second": 420.44,
5243
  "eval_steps_per_second": 3.364,
5244
  "step": 850000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5245
  }
5246
  ],
5247
  "max_steps": 1000000,
5248
  "num_train_epochs": 9223372036854775807,
5249
- "total_flos": 1.4318609871698657e+19,
5250
  "trial_name": null,
5251
  "trial_params": null
5252
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.034254,
5
+ "global_step": 900000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5242
  "eval_samples_per_second": 420.44,
5243
  "eval_steps_per_second": 3.364,
5244
  "step": 850000
5245
+ },
5246
+ {
5247
+ "epoch": 5.13,
5248
+ "learning_rate": 5.947856562792925e-06,
5249
+ "loss": 0.5503,
5250
+ "step": 851000
5251
+ },
5252
+ {
5253
+ "epoch": 5.13,
5254
+ "learning_rate": 5.869882433093155e-06,
5255
+ "loss": 0.5584,
5256
+ "step": 852000
5257
+ },
5258
+ {
5259
+ "epoch": 5.13,
5260
+ "learning_rate": 5.79239090328883e-06,
5261
+ "loss": 0.5572,
5262
+ "step": 853000
5263
+ },
5264
+ {
5265
+ "epoch": 5.13,
5266
+ "learning_rate": 5.715382820814885e-06,
5267
+ "loss": 0.5602,
5268
+ "step": 854000
5269
+ },
5270
+ {
5271
+ "epoch": 5.13,
5272
+ "learning_rate": 5.6388590278194096e-06,
5273
+ "loss": 0.5611,
5274
+ "step": 855000
5275
+ },
5276
+ {
5277
+ "epoch": 5.13,
5278
+ "learning_rate": 5.562820361154314e-06,
5279
+ "loss": 0.5683,
5280
+ "step": 856000
5281
+ },
5282
+ {
5283
+ "epoch": 5.14,
5284
+ "learning_rate": 5.48726765236629e-06,
5285
+ "loss": 0.5604,
5286
+ "step": 857000
5287
+ },
5288
+ {
5289
+ "epoch": 5.14,
5290
+ "learning_rate": 5.412201727687644e-06,
5291
+ "loss": 0.5527,
5292
+ "step": 858000
5293
+ },
5294
+ {
5295
+ "epoch": 5.14,
5296
+ "learning_rate": 5.337623408027293e-06,
5297
+ "loss": 0.5515,
5298
+ "step": 859000
5299
+ },
5300
+ {
5301
+ "epoch": 5.14,
5302
+ "learning_rate": 5.263533508961827e-06,
5303
+ "loss": 0.5488,
5304
+ "step": 860000
5305
+ },
5306
+ {
5307
+ "epoch": 5.14,
5308
+ "learning_rate": 5.1899328407264855e-06,
5309
+ "loss": 0.5483,
5310
+ "step": 861000
5311
+ },
5312
+ {
5313
+ "epoch": 5.14,
5314
+ "learning_rate": 5.116822208206396e-06,
5315
+ "loss": 0.5586,
5316
+ "step": 862000
5317
+ },
5318
+ {
5319
+ "epoch": 5.14,
5320
+ "learning_rate": 5.044202410927706e-06,
5321
+ "loss": 0.5596,
5322
+ "step": 863000
5323
+ },
5324
+ {
5325
+ "epoch": 5.14,
5326
+ "learning_rate": 4.972074243048897e-06,
5327
+ "loss": 0.5601,
5328
+ "step": 864000
5329
+ },
5330
+ {
5331
+ "epoch": 5.14,
5332
+ "learning_rate": 4.900438493352055e-06,
5333
+ "loss": 0.5607,
5334
+ "step": 865000
5335
+ },
5336
+ {
5337
+ "epoch": 6.0,
5338
+ "learning_rate": 4.829295945234258e-06,
5339
+ "loss": 0.5429,
5340
+ "step": 866000
5341
+ },
5342
+ {
5343
+ "epoch": 6.0,
5344
+ "learning_rate": 4.758647376699032e-06,
5345
+ "loss": 0.5607,
5346
+ "step": 867000
5347
+ },
5348
+ {
5349
+ "epoch": 6.0,
5350
+ "learning_rate": 4.688493560347773e-06,
5351
+ "loss": 0.566,
5352
+ "step": 868000
5353
+ },
5354
+ {
5355
+ "epoch": 6.0,
5356
+ "learning_rate": 4.618835263371396e-06,
5357
+ "loss": 0.5564,
5358
+ "step": 869000
5359
+ },
5360
+ {
5361
+ "epoch": 6.0,
5362
+ "learning_rate": 4.549673247541875e-06,
5363
+ "loss": 0.5507,
5364
+ "step": 870000
5365
+ },
5366
+ {
5367
+ "epoch": 6.01,
5368
+ "learning_rate": 4.48100826920394e-06,
5369
+ "loss": 0.5489,
5370
+ "step": 871000
5371
+ },
5372
+ {
5373
+ "epoch": 6.01,
5374
+ "learning_rate": 4.412841079266777e-06,
5375
+ "loss": 0.5478,
5376
+ "step": 872000
5377
+ },
5378
+ {
5379
+ "epoch": 6.01,
5380
+ "learning_rate": 4.3451724231958644e-06,
5381
+ "loss": 0.549,
5382
+ "step": 873000
5383
+ },
5384
+ {
5385
+ "epoch": 6.01,
5386
+ "learning_rate": 4.27800304100478e-06,
5387
+ "loss": 0.558,
5388
+ "step": 874000
5389
+ },
5390
+ {
5391
+ "epoch": 6.01,
5392
+ "learning_rate": 4.2113336672471245e-06,
5393
+ "loss": 0.5615,
5394
+ "step": 875000
5395
+ },
5396
+ {
5397
+ "epoch": 6.01,
5398
+ "learning_rate": 4.145165031008508e-06,
5399
+ "loss": 0.5575,
5400
+ "step": 876000
5401
+ },
5402
+ {
5403
+ "epoch": 6.01,
5404
+ "learning_rate": 4.079497855898501e-06,
5405
+ "loss": 0.56,
5406
+ "step": 877000
5407
+ },
5408
+ {
5409
+ "epoch": 6.01,
5410
+ "learning_rate": 4.01433286004283e-06,
5411
+ "loss": 0.5667,
5412
+ "step": 878000
5413
+ },
5414
+ {
5415
+ "epoch": 6.01,
5416
+ "learning_rate": 3.949670756075447e-06,
5417
+ "loss": 0.5567,
5418
+ "step": 879000
5419
+ },
5420
+ {
5421
+ "epoch": 6.01,
5422
+ "learning_rate": 3.885512251130763e-06,
5423
+ "loss": 0.5505,
5424
+ "step": 880000
5425
+ },
5426
+ {
5427
+ "epoch": 6.02,
5428
+ "learning_rate": 3.821858046835913e-06,
5429
+ "loss": 0.5464,
5430
+ "step": 881000
5431
+ },
5432
+ {
5433
+ "epoch": 6.02,
5434
+ "learning_rate": 3.75870883930306e-06,
5435
+ "loss": 0.5447,
5436
+ "step": 882000
5437
+ },
5438
+ {
5439
+ "epoch": 6.02,
5440
+ "learning_rate": 3.696065319121833e-06,
5441
+ "loss": 0.5484,
5442
+ "step": 883000
5443
+ },
5444
+ {
5445
+ "epoch": 6.02,
5446
+ "learning_rate": 3.6339281713517303e-06,
5447
+ "loss": 0.558,
5448
+ "step": 884000
5449
+ },
5450
+ {
5451
+ "epoch": 6.02,
5452
+ "learning_rate": 3.5722980755146517e-06,
5453
+ "loss": 0.5587,
5454
+ "step": 885000
5455
+ },
5456
+ {
5457
+ "epoch": 6.02,
5458
+ "learning_rate": 3.511175705587433e-06,
5459
+ "loss": 0.5583,
5460
+ "step": 886000
5461
+ },
5462
+ {
5463
+ "epoch": 6.02,
5464
+ "learning_rate": 3.4505617299945336e-06,
5465
+ "loss": 0.5599,
5466
+ "step": 887000
5467
+ },
5468
+ {
5469
+ "epoch": 6.02,
5470
+ "learning_rate": 3.390456811600673e-06,
5471
+ "loss": 0.5673,
5472
+ "step": 888000
5473
+ },
5474
+ {
5475
+ "epoch": 6.02,
5476
+ "learning_rate": 3.3308616077036115e-06,
5477
+ "loss": 0.555,
5478
+ "step": 889000
5479
+ },
5480
+ {
5481
+ "epoch": 6.02,
5482
+ "learning_rate": 3.271776770026963e-06,
5483
+ "loss": 0.5494,
5484
+ "step": 890000
5485
+ },
5486
+ {
5487
+ "epoch": 6.03,
5488
+ "learning_rate": 3.213202944713023e-06,
5489
+ "loss": 0.5469,
5490
+ "step": 891000
5491
+ },
5492
+ {
5493
+ "epoch": 6.03,
5494
+ "learning_rate": 3.155140772315773e-06,
5495
+ "loss": 0.5463,
5496
+ "step": 892000
5497
+ },
5498
+ {
5499
+ "epoch": 6.03,
5500
+ "learning_rate": 3.0975908877938277e-06,
5501
+ "loss": 0.5501,
5502
+ "step": 893000
5503
+ },
5504
+ {
5505
+ "epoch": 6.03,
5506
+ "learning_rate": 3.040553920503503e-06,
5507
+ "loss": 0.5574,
5508
+ "step": 894000
5509
+ },
5510
+ {
5511
+ "epoch": 6.03,
5512
+ "learning_rate": 2.9840304941919415e-06,
5513
+ "loss": 0.5582,
5514
+ "step": 895000
5515
+ },
5516
+ {
5517
+ "epoch": 6.03,
5518
+ "learning_rate": 2.928021226990263e-06,
5519
+ "loss": 0.5578,
5520
+ "step": 896000
5521
+ },
5522
+ {
5523
+ "epoch": 6.03,
5524
+ "learning_rate": 2.8725267314068495e-06,
5525
+ "loss": 0.5576,
5526
+ "step": 897000
5527
+ },
5528
+ {
5529
+ "epoch": 6.03,
5530
+ "learning_rate": 2.817547614320615e-06,
5531
+ "loss": 0.5669,
5532
+ "step": 898000
5533
+ },
5534
+ {
5535
+ "epoch": 6.03,
5536
+ "learning_rate": 2.7630844769743757e-06,
5537
+ "loss": 0.5532,
5538
+ "step": 899000
5539
+ },
5540
+ {
5541
+ "epoch": 6.03,
5542
+ "learning_rate": 2.7091379149682685e-06,
5543
+ "loss": 0.5509,
5544
+ "step": 900000
5545
+ },
5546
+ {
5547
+ "epoch": 6.03,
5548
+ "eval_loss": 0.34162890911102295,
5549
+ "eval_runtime": 10.4601,
5550
+ "eval_samples_per_second": 478.006,
5551
+ "eval_steps_per_second": 3.824,
5552
+ "step": 900000
5553
  }
5554
  ],
5555
  "max_steps": 1000000,
5556
  "num_train_epochs": 9223372036854775807,
5557
+ "total_flos": 1.5160884013339509e+19,
5558
  "trial_name": null,
5559
  "trial_params": null
5560
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:276a7965140304f9b691ebf5dbaef28e68a0d61c7432397f5d9a4edba8ae4065
3
  size 442675755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e8ae5410675f243d0890ea34e523d99f0c2ce208d877293024b72a5ea3fe4ee
3
  size 442675755
runs/Dec30_08-05-16_t1v-n-4a21561c-w-0/events.out.tfevents.1672387541.t1v-n-4a21561c-w-0.14765.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:625a070d1a811cbf7e5bc01d5ab0a9bfa65a1a6143d426b99dfb4c69daba6d48
3
- size 144425
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1028393c8f1213b07dae01026b39aaf602d3385da51cabb40c1c38380db10810
3
+ size 152701