Joemgu commited on
Commit
648b742
1 Parent(s): 7e6a92c

Training in progress, step 800

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c31e002d936b0705bd9819e4e2df6e418d17150370dcd93e28e2e8f603d7e124
3
  size 4736616809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac64079a0330d63c2055aac2d6c2fdd2bdd9007c85e42939ced7054a4deb4102
3
  size 4736616809
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4922243f88d9ee4e31bb04c28db1b2ab1db2f80c964b73fff4a5e4391dbf52b8
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d064e377257d8b4962000d02ab7ff4990aabdaf29ea0066e9c7e06266ac53f
3
  size 2368281769
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7518d309409e3eeffce4944d2bad3a304f6e6dc6c4e4a04ab44158de763755d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99e0c5c4d03d18376b499a14aa2de3c961e0b17bd1755ddc51098c9bdc845ed
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2e0f128c7ae735ba8b14877fdb8cb2ead55b72037741596f80aa07ed1f6f130
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e958ac3a412653c5a55518335f9da8afd5387ca22d1d3dbfc5f94de4bbb91f7f
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.021120071411133,
3
- "best_model_checkpoint": "output/checkpoint-400",
4
- "epoch": 0.27347359537658705,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2414,11 +2414,2419 @@
2414
  "eval_samples_per_second": 9.879,
2415
  "eval_steps_per_second": 9.879,
2416
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2417
  }
2418
  ],
2419
  "max_steps": 5848,
2420
  "num_train_epochs": 4,
2421
- "total_flos": 4.8975138397771776e+17,
2422
  "trial_name": null,
2423
  "trial_params": null
2424
  }
 
1
  {
2
+ "best_metric": 1.9985228776931763,
3
+ "best_model_checkpoint": "output/checkpoint-800",
4
+ "epoch": 0.5469471907531741,
5
+ "global_step": 800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2414
  "eval_samples_per_second": 9.879,
2415
  "eval_steps_per_second": 9.879,
2416
  "step": 400
2417
+ },
2418
+ {
2419
+ "epoch": 0.27,
2420
+ "learning_rate": 0.0005786473087818696,
2421
+ "loss": 1.9149,
2422
+ "step": 401
2423
+ },
2424
+ {
2425
+ "epoch": 0.27,
2426
+ "learning_rate": 0.0005785410764872521,
2427
+ "loss": 1.9261,
2428
+ "step": 402
2429
+ },
2430
+ {
2431
+ "epoch": 0.28,
2432
+ "learning_rate": 0.0005784348441926345,
2433
+ "loss": 1.9515,
2434
+ "step": 403
2435
+ },
2436
+ {
2437
+ "epoch": 0.28,
2438
+ "learning_rate": 0.0005783286118980169,
2439
+ "loss": 1.8682,
2440
+ "step": 404
2441
+ },
2442
+ {
2443
+ "epoch": 0.28,
2444
+ "learning_rate": 0.0005782223796033994,
2445
+ "loss": 1.8787,
2446
+ "step": 405
2447
+ },
2448
+ {
2449
+ "epoch": 0.28,
2450
+ "learning_rate": 0.0005781161473087819,
2451
+ "loss": 1.8882,
2452
+ "step": 406
2453
+ },
2454
+ {
2455
+ "epoch": 0.28,
2456
+ "learning_rate": 0.0005780099150141643,
2457
+ "loss": 1.8623,
2458
+ "step": 407
2459
+ },
2460
+ {
2461
+ "epoch": 0.28,
2462
+ "learning_rate": 0.0005779036827195467,
2463
+ "loss": 1.9533,
2464
+ "step": 408
2465
+ },
2466
+ {
2467
+ "epoch": 0.28,
2468
+ "learning_rate": 0.0005777974504249291,
2469
+ "loss": 1.8139,
2470
+ "step": 409
2471
+ },
2472
+ {
2473
+ "epoch": 0.28,
2474
+ "learning_rate": 0.0005776912181303115,
2475
+ "loss": 1.9436,
2476
+ "step": 410
2477
+ },
2478
+ {
2479
+ "epoch": 0.28,
2480
+ "learning_rate": 0.000577584985835694,
2481
+ "loss": 2.012,
2482
+ "step": 411
2483
+ },
2484
+ {
2485
+ "epoch": 0.28,
2486
+ "learning_rate": 0.0005774787535410764,
2487
+ "loss": 1.8593,
2488
+ "step": 412
2489
+ },
2490
+ {
2491
+ "epoch": 0.28,
2492
+ "learning_rate": 0.0005773725212464589,
2493
+ "loss": 1.9534,
2494
+ "step": 413
2495
+ },
2496
+ {
2497
+ "epoch": 0.28,
2498
+ "learning_rate": 0.0005772662889518413,
2499
+ "loss": 1.8901,
2500
+ "step": 414
2501
+ },
2502
+ {
2503
+ "epoch": 0.28,
2504
+ "learning_rate": 0.0005771600566572237,
2505
+ "loss": 1.9311,
2506
+ "step": 415
2507
+ },
2508
+ {
2509
+ "epoch": 0.28,
2510
+ "learning_rate": 0.0005770538243626062,
2511
+ "loss": 1.9357,
2512
+ "step": 416
2513
+ },
2514
+ {
2515
+ "epoch": 0.29,
2516
+ "learning_rate": 0.0005769475920679887,
2517
+ "loss": 2.0402,
2518
+ "step": 417
2519
+ },
2520
+ {
2521
+ "epoch": 0.29,
2522
+ "learning_rate": 0.0005768413597733711,
2523
+ "loss": 1.9272,
2524
+ "step": 418
2525
+ },
2526
+ {
2527
+ "epoch": 0.29,
2528
+ "learning_rate": 0.0005767351274787535,
2529
+ "loss": 1.8699,
2530
+ "step": 419
2531
+ },
2532
+ {
2533
+ "epoch": 0.29,
2534
+ "learning_rate": 0.000576628895184136,
2535
+ "loss": 1.9146,
2536
+ "step": 420
2537
+ },
2538
+ {
2539
+ "epoch": 0.29,
2540
+ "learning_rate": 0.0005765226628895183,
2541
+ "loss": 1.8632,
2542
+ "step": 421
2543
+ },
2544
+ {
2545
+ "epoch": 0.29,
2546
+ "learning_rate": 0.0005764164305949008,
2547
+ "loss": 1.9577,
2548
+ "step": 422
2549
+ },
2550
+ {
2551
+ "epoch": 0.29,
2552
+ "learning_rate": 0.0005763101983002832,
2553
+ "loss": 1.9124,
2554
+ "step": 423
2555
+ },
2556
+ {
2557
+ "epoch": 0.29,
2558
+ "learning_rate": 0.0005762039660056656,
2559
+ "loss": 1.8922,
2560
+ "step": 424
2561
+ },
2562
+ {
2563
+ "epoch": 0.29,
2564
+ "learning_rate": 0.0005760977337110481,
2565
+ "loss": 1.8515,
2566
+ "step": 425
2567
+ },
2568
+ {
2569
+ "epoch": 0.29,
2570
+ "learning_rate": 0.0005759915014164305,
2571
+ "loss": 1.9232,
2572
+ "step": 426
2573
+ },
2574
+ {
2575
+ "epoch": 0.29,
2576
+ "learning_rate": 0.000575885269121813,
2577
+ "loss": 1.9402,
2578
+ "step": 427
2579
+ },
2580
+ {
2581
+ "epoch": 0.29,
2582
+ "learning_rate": 0.0005757790368271955,
2583
+ "loss": 1.885,
2584
+ "step": 428
2585
+ },
2586
+ {
2587
+ "epoch": 0.29,
2588
+ "learning_rate": 0.0005756728045325779,
2589
+ "loss": 1.8847,
2590
+ "step": 429
2591
+ },
2592
+ {
2593
+ "epoch": 0.29,
2594
+ "learning_rate": 0.0005755665722379603,
2595
+ "loss": 1.8776,
2596
+ "step": 430
2597
+ },
2598
+ {
2599
+ "epoch": 0.29,
2600
+ "learning_rate": 0.0005754603399433428,
2601
+ "loss": 1.9292,
2602
+ "step": 431
2603
+ },
2604
+ {
2605
+ "epoch": 0.3,
2606
+ "learning_rate": 0.0005753541076487251,
2607
+ "loss": 2.037,
2608
+ "step": 432
2609
+ },
2610
+ {
2611
+ "epoch": 0.3,
2612
+ "learning_rate": 0.0005752478753541076,
2613
+ "loss": 1.9526,
2614
+ "step": 433
2615
+ },
2616
+ {
2617
+ "epoch": 0.3,
2618
+ "learning_rate": 0.00057514164305949,
2619
+ "loss": 1.8857,
2620
+ "step": 434
2621
+ },
2622
+ {
2623
+ "epoch": 0.3,
2624
+ "learning_rate": 0.0005750354107648724,
2625
+ "loss": 1.8571,
2626
+ "step": 435
2627
+ },
2628
+ {
2629
+ "epoch": 0.3,
2630
+ "learning_rate": 0.0005749291784702549,
2631
+ "loss": 1.99,
2632
+ "step": 436
2633
+ },
2634
+ {
2635
+ "epoch": 0.3,
2636
+ "learning_rate": 0.0005748229461756373,
2637
+ "loss": 1.9094,
2638
+ "step": 437
2639
+ },
2640
+ {
2641
+ "epoch": 0.3,
2642
+ "learning_rate": 0.0005747167138810198,
2643
+ "loss": 1.8634,
2644
+ "step": 438
2645
+ },
2646
+ {
2647
+ "epoch": 0.3,
2648
+ "learning_rate": 0.0005746104815864022,
2649
+ "loss": 1.85,
2650
+ "step": 439
2651
+ },
2652
+ {
2653
+ "epoch": 0.3,
2654
+ "learning_rate": 0.0005745042492917847,
2655
+ "loss": 1.9329,
2656
+ "step": 440
2657
+ },
2658
+ {
2659
+ "epoch": 0.3,
2660
+ "learning_rate": 0.0005743980169971671,
2661
+ "loss": 1.9566,
2662
+ "step": 441
2663
+ },
2664
+ {
2665
+ "epoch": 0.3,
2666
+ "learning_rate": 0.0005742917847025496,
2667
+ "loss": 1.9756,
2668
+ "step": 442
2669
+ },
2670
+ {
2671
+ "epoch": 0.3,
2672
+ "learning_rate": 0.000574185552407932,
2673
+ "loss": 1.9327,
2674
+ "step": 443
2675
+ },
2676
+ {
2677
+ "epoch": 0.3,
2678
+ "learning_rate": 0.0005740793201133143,
2679
+ "loss": 1.8133,
2680
+ "step": 444
2681
+ },
2682
+ {
2683
+ "epoch": 0.3,
2684
+ "learning_rate": 0.0005739730878186968,
2685
+ "loss": 1.9292,
2686
+ "step": 445
2687
+ },
2688
+ {
2689
+ "epoch": 0.3,
2690
+ "learning_rate": 0.0005738668555240792,
2691
+ "loss": 1.8839,
2692
+ "step": 446
2693
+ },
2694
+ {
2695
+ "epoch": 0.31,
2696
+ "learning_rate": 0.0005737606232294617,
2697
+ "loss": 1.9216,
2698
+ "step": 447
2699
+ },
2700
+ {
2701
+ "epoch": 0.31,
2702
+ "learning_rate": 0.0005736543909348441,
2703
+ "loss": 1.9303,
2704
+ "step": 448
2705
+ },
2706
+ {
2707
+ "epoch": 0.31,
2708
+ "learning_rate": 0.0005735481586402266,
2709
+ "loss": 2.0129,
2710
+ "step": 449
2711
+ },
2712
+ {
2713
+ "epoch": 0.31,
2714
+ "learning_rate": 0.000573441926345609,
2715
+ "loss": 1.9233,
2716
+ "step": 450
2717
+ },
2718
+ {
2719
+ "epoch": 0.31,
2720
+ "learning_rate": 0.0005733356940509915,
2721
+ "loss": 1.809,
2722
+ "step": 451
2723
+ },
2724
+ {
2725
+ "epoch": 0.31,
2726
+ "learning_rate": 0.0005732294617563739,
2727
+ "loss": 1.822,
2728
+ "step": 452
2729
+ },
2730
+ {
2731
+ "epoch": 0.31,
2732
+ "learning_rate": 0.0005731232294617564,
2733
+ "loss": 1.829,
2734
+ "step": 453
2735
+ },
2736
+ {
2737
+ "epoch": 0.31,
2738
+ "learning_rate": 0.0005730169971671388,
2739
+ "loss": 1.9524,
2740
+ "step": 454
2741
+ },
2742
+ {
2743
+ "epoch": 0.31,
2744
+ "learning_rate": 0.0005729107648725212,
2745
+ "loss": 1.8695,
2746
+ "step": 455
2747
+ },
2748
+ {
2749
+ "epoch": 0.31,
2750
+ "learning_rate": 0.0005728045325779037,
2751
+ "loss": 1.8614,
2752
+ "step": 456
2753
+ },
2754
+ {
2755
+ "epoch": 0.31,
2756
+ "learning_rate": 0.000572698300283286,
2757
+ "loss": 1.8697,
2758
+ "step": 457
2759
+ },
2760
+ {
2761
+ "epoch": 0.31,
2762
+ "learning_rate": 0.0005725920679886684,
2763
+ "loss": 1.8378,
2764
+ "step": 458
2765
+ },
2766
+ {
2767
+ "epoch": 0.31,
2768
+ "learning_rate": 0.0005724858356940509,
2769
+ "loss": 1.9554,
2770
+ "step": 459
2771
+ },
2772
+ {
2773
+ "epoch": 0.31,
2774
+ "learning_rate": 0.0005723796033994334,
2775
+ "loss": 1.9066,
2776
+ "step": 460
2777
+ },
2778
+ {
2779
+ "epoch": 0.32,
2780
+ "learning_rate": 0.0005722733711048158,
2781
+ "loss": 1.8836,
2782
+ "step": 461
2783
+ },
2784
+ {
2785
+ "epoch": 0.32,
2786
+ "learning_rate": 0.0005721671388101983,
2787
+ "loss": 1.9541,
2788
+ "step": 462
2789
+ },
2790
+ {
2791
+ "epoch": 0.32,
2792
+ "learning_rate": 0.0005720609065155807,
2793
+ "loss": 1.8879,
2794
+ "step": 463
2795
+ },
2796
+ {
2797
+ "epoch": 0.32,
2798
+ "learning_rate": 0.0005719546742209631,
2799
+ "loss": 1.8126,
2800
+ "step": 464
2801
+ },
2802
+ {
2803
+ "epoch": 0.32,
2804
+ "learning_rate": 0.0005718484419263456,
2805
+ "loss": 1.9861,
2806
+ "step": 465
2807
+ },
2808
+ {
2809
+ "epoch": 0.32,
2810
+ "learning_rate": 0.000571742209631728,
2811
+ "loss": 2.0253,
2812
+ "step": 466
2813
+ },
2814
+ {
2815
+ "epoch": 0.32,
2816
+ "learning_rate": 0.0005716359773371105,
2817
+ "loss": 1.9213,
2818
+ "step": 467
2819
+ },
2820
+ {
2821
+ "epoch": 0.32,
2822
+ "learning_rate": 0.0005715297450424929,
2823
+ "loss": 1.9988,
2824
+ "step": 468
2825
+ },
2826
+ {
2827
+ "epoch": 0.32,
2828
+ "learning_rate": 0.0005714235127478752,
2829
+ "loss": 1.9285,
2830
+ "step": 469
2831
+ },
2832
+ {
2833
+ "epoch": 0.32,
2834
+ "learning_rate": 0.0005713172804532577,
2835
+ "loss": 1.9973,
2836
+ "step": 470
2837
+ },
2838
+ {
2839
+ "epoch": 0.32,
2840
+ "learning_rate": 0.0005712110481586402,
2841
+ "loss": 1.945,
2842
+ "step": 471
2843
+ },
2844
+ {
2845
+ "epoch": 0.32,
2846
+ "learning_rate": 0.0005711048158640226,
2847
+ "loss": 1.9327,
2848
+ "step": 472
2849
+ },
2850
+ {
2851
+ "epoch": 0.32,
2852
+ "learning_rate": 0.0005709985835694051,
2853
+ "loss": 2.0014,
2854
+ "step": 473
2855
+ },
2856
+ {
2857
+ "epoch": 0.32,
2858
+ "learning_rate": 0.0005708923512747875,
2859
+ "loss": 1.8221,
2860
+ "step": 474
2861
+ },
2862
+ {
2863
+ "epoch": 0.32,
2864
+ "learning_rate": 0.0005707861189801699,
2865
+ "loss": 1.8905,
2866
+ "step": 475
2867
+ },
2868
+ {
2869
+ "epoch": 0.33,
2870
+ "learning_rate": 0.0005706798866855524,
2871
+ "loss": 1.9236,
2872
+ "step": 476
2873
+ },
2874
+ {
2875
+ "epoch": 0.33,
2876
+ "learning_rate": 0.0005705736543909348,
2877
+ "loss": 1.8436,
2878
+ "step": 477
2879
+ },
2880
+ {
2881
+ "epoch": 0.33,
2882
+ "learning_rate": 0.0005704674220963172,
2883
+ "loss": 1.8545,
2884
+ "step": 478
2885
+ },
2886
+ {
2887
+ "epoch": 0.33,
2888
+ "learning_rate": 0.0005703611898016997,
2889
+ "loss": 1.8886,
2890
+ "step": 479
2891
+ },
2892
+ {
2893
+ "epoch": 0.33,
2894
+ "learning_rate": 0.000570254957507082,
2895
+ "loss": 1.873,
2896
+ "step": 480
2897
+ },
2898
+ {
2899
+ "epoch": 0.33,
2900
+ "learning_rate": 0.0005701487252124645,
2901
+ "loss": 1.8624,
2902
+ "step": 481
2903
+ },
2904
+ {
2905
+ "epoch": 0.33,
2906
+ "learning_rate": 0.000570042492917847,
2907
+ "loss": 1.8614,
2908
+ "step": 482
2909
+ },
2910
+ {
2911
+ "epoch": 0.33,
2912
+ "learning_rate": 0.0005699362606232294,
2913
+ "loss": 1.9153,
2914
+ "step": 483
2915
+ },
2916
+ {
2917
+ "epoch": 0.33,
2918
+ "learning_rate": 0.0005698300283286118,
2919
+ "loss": 1.8524,
2920
+ "step": 484
2921
+ },
2922
+ {
2923
+ "epoch": 0.33,
2924
+ "learning_rate": 0.0005697237960339943,
2925
+ "loss": 1.9039,
2926
+ "step": 485
2927
+ },
2928
+ {
2929
+ "epoch": 0.33,
2930
+ "learning_rate": 0.0005696175637393767,
2931
+ "loss": 1.8866,
2932
+ "step": 486
2933
+ },
2934
+ {
2935
+ "epoch": 0.33,
2936
+ "learning_rate": 0.0005695113314447592,
2937
+ "loss": 1.7717,
2938
+ "step": 487
2939
+ },
2940
+ {
2941
+ "epoch": 0.33,
2942
+ "learning_rate": 0.0005694050991501416,
2943
+ "loss": 1.9695,
2944
+ "step": 488
2945
+ },
2946
+ {
2947
+ "epoch": 0.33,
2948
+ "learning_rate": 0.000569298866855524,
2949
+ "loss": 1.9931,
2950
+ "step": 489
2951
+ },
2952
+ {
2953
+ "epoch": 0.34,
2954
+ "learning_rate": 0.0005691926345609065,
2955
+ "loss": 1.8473,
2956
+ "step": 490
2957
+ },
2958
+ {
2959
+ "epoch": 0.34,
2960
+ "learning_rate": 0.0005690864022662889,
2961
+ "loss": 1.8366,
2962
+ "step": 491
2963
+ },
2964
+ {
2965
+ "epoch": 0.34,
2966
+ "learning_rate": 0.0005689801699716714,
2967
+ "loss": 1.9242,
2968
+ "step": 492
2969
+ },
2970
+ {
2971
+ "epoch": 0.34,
2972
+ "learning_rate": 0.0005688739376770539,
2973
+ "loss": 1.9081,
2974
+ "step": 493
2975
+ },
2976
+ {
2977
+ "epoch": 0.34,
2978
+ "learning_rate": 0.0005687677053824362,
2979
+ "loss": 1.8658,
2980
+ "step": 494
2981
+ },
2982
+ {
2983
+ "epoch": 0.34,
2984
+ "learning_rate": 0.0005686614730878186,
2985
+ "loss": 1.9603,
2986
+ "step": 495
2987
+ },
2988
+ {
2989
+ "epoch": 0.34,
2990
+ "learning_rate": 0.0005685552407932011,
2991
+ "loss": 1.8292,
2992
+ "step": 496
2993
+ },
2994
+ {
2995
+ "epoch": 0.34,
2996
+ "learning_rate": 0.0005684490084985835,
2997
+ "loss": 1.8917,
2998
+ "step": 497
2999
+ },
3000
+ {
3001
+ "epoch": 0.34,
3002
+ "learning_rate": 0.0005683427762039659,
3003
+ "loss": 1.9171,
3004
+ "step": 498
3005
+ },
3006
+ {
3007
+ "epoch": 0.34,
3008
+ "learning_rate": 0.0005682365439093484,
3009
+ "loss": 1.9373,
3010
+ "step": 499
3011
+ },
3012
+ {
3013
+ "epoch": 0.34,
3014
+ "learning_rate": 0.0005681303116147308,
3015
+ "loss": 1.9529,
3016
+ "step": 500
3017
+ },
3018
+ {
3019
+ "epoch": 0.34,
3020
+ "learning_rate": 0.0005680240793201133,
3021
+ "loss": 1.9459,
3022
+ "step": 501
3023
+ },
3024
+ {
3025
+ "epoch": 0.34,
3026
+ "learning_rate": 0.0005679178470254957,
3027
+ "loss": 1.9941,
3028
+ "step": 502
3029
+ },
3030
+ {
3031
+ "epoch": 0.34,
3032
+ "learning_rate": 0.0005678116147308781,
3033
+ "loss": 1.8738,
3034
+ "step": 503
3035
+ },
3036
+ {
3037
+ "epoch": 0.34,
3038
+ "learning_rate": 0.0005677053824362606,
3039
+ "loss": 1.9666,
3040
+ "step": 504
3041
+ },
3042
+ {
3043
+ "epoch": 0.35,
3044
+ "learning_rate": 0.000567599150141643,
3045
+ "loss": 2.0352,
3046
+ "step": 505
3047
+ },
3048
+ {
3049
+ "epoch": 0.35,
3050
+ "learning_rate": 0.0005674929178470254,
3051
+ "loss": 1.8621,
3052
+ "step": 506
3053
+ },
3054
+ {
3055
+ "epoch": 0.35,
3056
+ "learning_rate": 0.0005673866855524079,
3057
+ "loss": 1.8733,
3058
+ "step": 507
3059
+ },
3060
+ {
3061
+ "epoch": 0.35,
3062
+ "learning_rate": 0.0005672804532577903,
3063
+ "loss": 1.9414,
3064
+ "step": 508
3065
+ },
3066
+ {
3067
+ "epoch": 0.35,
3068
+ "learning_rate": 0.0005671742209631727,
3069
+ "loss": 1.9515,
3070
+ "step": 509
3071
+ },
3072
+ {
3073
+ "epoch": 0.35,
3074
+ "learning_rate": 0.0005670679886685552,
3075
+ "loss": 1.891,
3076
+ "step": 510
3077
+ },
3078
+ {
3079
+ "epoch": 0.35,
3080
+ "learning_rate": 0.0005669617563739376,
3081
+ "loss": 2.0231,
3082
+ "step": 511
3083
+ },
3084
+ {
3085
+ "epoch": 0.35,
3086
+ "learning_rate": 0.0005668555240793201,
3087
+ "loss": 1.9534,
3088
+ "step": 512
3089
+ },
3090
+ {
3091
+ "epoch": 0.35,
3092
+ "learning_rate": 0.0005667492917847025,
3093
+ "loss": 1.9437,
3094
+ "step": 513
3095
+ },
3096
+ {
3097
+ "epoch": 0.35,
3098
+ "learning_rate": 0.0005666430594900849,
3099
+ "loss": 1.8914,
3100
+ "step": 514
3101
+ },
3102
+ {
3103
+ "epoch": 0.35,
3104
+ "learning_rate": 0.0005665368271954674,
3105
+ "loss": 1.8711,
3106
+ "step": 515
3107
+ },
3108
+ {
3109
+ "epoch": 0.35,
3110
+ "learning_rate": 0.0005664305949008499,
3111
+ "loss": 1.9708,
3112
+ "step": 516
3113
+ },
3114
+ {
3115
+ "epoch": 0.35,
3116
+ "learning_rate": 0.0005663243626062323,
3117
+ "loss": 1.9462,
3118
+ "step": 517
3119
+ },
3120
+ {
3121
+ "epoch": 0.35,
3122
+ "learning_rate": 0.0005662181303116146,
3123
+ "loss": 1.8547,
3124
+ "step": 518
3125
+ },
3126
+ {
3127
+ "epoch": 0.35,
3128
+ "learning_rate": 0.0005661118980169971,
3129
+ "loss": 1.842,
3130
+ "step": 519
3131
+ },
3132
+ {
3133
+ "epoch": 0.36,
3134
+ "learning_rate": 0.0005660056657223795,
3135
+ "loss": 1.8544,
3136
+ "step": 520
3137
+ },
3138
+ {
3139
+ "epoch": 0.36,
3140
+ "learning_rate": 0.000565899433427762,
3141
+ "loss": 1.8744,
3142
+ "step": 521
3143
+ },
3144
+ {
3145
+ "epoch": 0.36,
3146
+ "learning_rate": 0.0005657932011331444,
3147
+ "loss": 1.9211,
3148
+ "step": 522
3149
+ },
3150
+ {
3151
+ "epoch": 0.36,
3152
+ "learning_rate": 0.0005656869688385268,
3153
+ "loss": 1.9399,
3154
+ "step": 523
3155
+ },
3156
+ {
3157
+ "epoch": 0.36,
3158
+ "learning_rate": 0.0005655807365439093,
3159
+ "loss": 1.9049,
3160
+ "step": 524
3161
+ },
3162
+ {
3163
+ "epoch": 0.36,
3164
+ "learning_rate": 0.0005654745042492917,
3165
+ "loss": 1.9623,
3166
+ "step": 525
3167
+ },
3168
+ {
3169
+ "epoch": 0.36,
3170
+ "learning_rate": 0.0005653682719546742,
3171
+ "loss": 1.9142,
3172
+ "step": 526
3173
+ },
3174
+ {
3175
+ "epoch": 0.36,
3176
+ "learning_rate": 0.0005652620396600567,
3177
+ "loss": 1.8899,
3178
+ "step": 527
3179
+ },
3180
+ {
3181
+ "epoch": 0.36,
3182
+ "learning_rate": 0.0005651558073654391,
3183
+ "loss": 1.8878,
3184
+ "step": 528
3185
+ },
3186
+ {
3187
+ "epoch": 0.36,
3188
+ "learning_rate": 0.0005650495750708215,
3189
+ "loss": 1.924,
3190
+ "step": 529
3191
+ },
3192
+ {
3193
+ "epoch": 0.36,
3194
+ "learning_rate": 0.000564943342776204,
3195
+ "loss": 1.9087,
3196
+ "step": 530
3197
+ },
3198
+ {
3199
+ "epoch": 0.36,
3200
+ "learning_rate": 0.0005648371104815863,
3201
+ "loss": 1.9945,
3202
+ "step": 531
3203
+ },
3204
+ {
3205
+ "epoch": 0.36,
3206
+ "learning_rate": 0.0005647308781869688,
3207
+ "loss": 1.8945,
3208
+ "step": 532
3209
+ },
3210
+ {
3211
+ "epoch": 0.36,
3212
+ "learning_rate": 0.0005646246458923512,
3213
+ "loss": 1.9209,
3214
+ "step": 533
3215
+ },
3216
+ {
3217
+ "epoch": 0.37,
3218
+ "learning_rate": 0.0005645184135977336,
3219
+ "loss": 1.9301,
3220
+ "step": 534
3221
+ },
3222
+ {
3223
+ "epoch": 0.37,
3224
+ "learning_rate": 0.0005644121813031161,
3225
+ "loss": 1.8516,
3226
+ "step": 535
3227
+ },
3228
+ {
3229
+ "epoch": 0.37,
3230
+ "learning_rate": 0.0005643059490084985,
3231
+ "loss": 1.906,
3232
+ "step": 536
3233
+ },
3234
+ {
3235
+ "epoch": 0.37,
3236
+ "learning_rate": 0.000564199716713881,
3237
+ "loss": 1.91,
3238
+ "step": 537
3239
+ },
3240
+ {
3241
+ "epoch": 0.37,
3242
+ "learning_rate": 0.0005640934844192634,
3243
+ "loss": 1.9069,
3244
+ "step": 538
3245
+ },
3246
+ {
3247
+ "epoch": 0.37,
3248
+ "learning_rate": 0.0005639872521246459,
3249
+ "loss": 1.7911,
3250
+ "step": 539
3251
+ },
3252
+ {
3253
+ "epoch": 0.37,
3254
+ "learning_rate": 0.0005638810198300283,
3255
+ "loss": 1.8925,
3256
+ "step": 540
3257
+ },
3258
+ {
3259
+ "epoch": 0.37,
3260
+ "learning_rate": 0.0005637747875354108,
3261
+ "loss": 1.9526,
3262
+ "step": 541
3263
+ },
3264
+ {
3265
+ "epoch": 0.37,
3266
+ "learning_rate": 0.0005636685552407931,
3267
+ "loss": 1.9279,
3268
+ "step": 542
3269
+ },
3270
+ {
3271
+ "epoch": 0.37,
3272
+ "learning_rate": 0.0005635623229461755,
3273
+ "loss": 1.8829,
3274
+ "step": 543
3275
+ },
3276
+ {
3277
+ "epoch": 0.37,
3278
+ "learning_rate": 0.000563456090651558,
3279
+ "loss": 1.9501,
3280
+ "step": 544
3281
+ },
3282
+ {
3283
+ "epoch": 0.37,
3284
+ "learning_rate": 0.0005633498583569404,
3285
+ "loss": 1.9602,
3286
+ "step": 545
3287
+ },
3288
+ {
3289
+ "epoch": 0.37,
3290
+ "learning_rate": 0.0005632436260623229,
3291
+ "loss": 1.9497,
3292
+ "step": 546
3293
+ },
3294
+ {
3295
+ "epoch": 0.37,
3296
+ "learning_rate": 0.0005631373937677053,
3297
+ "loss": 1.84,
3298
+ "step": 547
3299
+ },
3300
+ {
3301
+ "epoch": 0.37,
3302
+ "learning_rate": 0.0005630311614730878,
3303
+ "loss": 1.9337,
3304
+ "step": 548
3305
+ },
3306
+ {
3307
+ "epoch": 0.38,
3308
+ "learning_rate": 0.0005629249291784702,
3309
+ "loss": 1.8709,
3310
+ "step": 549
3311
+ },
3312
+ {
3313
+ "epoch": 0.38,
3314
+ "learning_rate": 0.0005628186968838527,
3315
+ "loss": 1.9231,
3316
+ "step": 550
3317
+ },
3318
+ {
3319
+ "epoch": 0.38,
3320
+ "learning_rate": 0.0005627124645892351,
3321
+ "loss": 1.9035,
3322
+ "step": 551
3323
+ },
3324
+ {
3325
+ "epoch": 0.38,
3326
+ "learning_rate": 0.0005626062322946176,
3327
+ "loss": 1.8365,
3328
+ "step": 552
3329
+ },
3330
+ {
3331
+ "epoch": 0.38,
3332
+ "learning_rate": 0.0005625,
3333
+ "loss": 2.1028,
3334
+ "step": 553
3335
+ },
3336
+ {
3337
+ "epoch": 0.38,
3338
+ "learning_rate": 0.0005623937677053823,
3339
+ "loss": 1.8692,
3340
+ "step": 554
3341
+ },
3342
+ {
3343
+ "epoch": 0.38,
3344
+ "learning_rate": 0.0005622875354107648,
3345
+ "loss": 1.9452,
3346
+ "step": 555
3347
+ },
3348
+ {
3349
+ "epoch": 0.38,
3350
+ "learning_rate": 0.0005621813031161472,
3351
+ "loss": 1.883,
3352
+ "step": 556
3353
+ },
3354
+ {
3355
+ "epoch": 0.38,
3356
+ "learning_rate": 0.0005620750708215296,
3357
+ "loss": 1.8987,
3358
+ "step": 557
3359
+ },
3360
+ {
3361
+ "epoch": 0.38,
3362
+ "learning_rate": 0.0005619688385269121,
3363
+ "loss": 1.9188,
3364
+ "step": 558
3365
+ },
3366
+ {
3367
+ "epoch": 0.38,
3368
+ "learning_rate": 0.0005618626062322946,
3369
+ "loss": 1.9373,
3370
+ "step": 559
3371
+ },
3372
+ {
3373
+ "epoch": 0.38,
3374
+ "learning_rate": 0.000561756373937677,
3375
+ "loss": 1.9167,
3376
+ "step": 560
3377
+ },
3378
+ {
3379
+ "epoch": 0.38,
3380
+ "learning_rate": 0.0005616501416430595,
3381
+ "loss": 1.8628,
3382
+ "step": 561
3383
+ },
3384
+ {
3385
+ "epoch": 0.38,
3386
+ "learning_rate": 0.0005615439093484419,
3387
+ "loss": 1.9258,
3388
+ "step": 562
3389
+ },
3390
+ {
3391
+ "epoch": 0.38,
3392
+ "learning_rate": 0.0005614376770538243,
3393
+ "loss": 1.9783,
3394
+ "step": 563
3395
+ },
3396
+ {
3397
+ "epoch": 0.39,
3398
+ "learning_rate": 0.0005613314447592068,
3399
+ "loss": 1.9738,
3400
+ "step": 564
3401
+ },
3402
+ {
3403
+ "epoch": 0.39,
3404
+ "learning_rate": 0.0005612252124645892,
3405
+ "loss": 1.9351,
3406
+ "step": 565
3407
+ },
3408
+ {
3409
+ "epoch": 0.39,
3410
+ "learning_rate": 0.0005611189801699717,
3411
+ "loss": 1.9109,
3412
+ "step": 566
3413
+ },
3414
+ {
3415
+ "epoch": 0.39,
3416
+ "learning_rate": 0.000561012747875354,
3417
+ "loss": 1.832,
3418
+ "step": 567
3419
+ },
3420
+ {
3421
+ "epoch": 0.39,
3422
+ "learning_rate": 0.0005609065155807364,
3423
+ "loss": 1.9258,
3424
+ "step": 568
3425
+ },
3426
+ {
3427
+ "epoch": 0.39,
3428
+ "learning_rate": 0.0005608002832861189,
3429
+ "loss": 1.9888,
3430
+ "step": 569
3431
+ },
3432
+ {
3433
+ "epoch": 0.39,
3434
+ "learning_rate": 0.0005606940509915014,
3435
+ "loss": 1.9195,
3436
+ "step": 570
3437
+ },
3438
+ {
3439
+ "epoch": 0.39,
3440
+ "learning_rate": 0.0005605878186968838,
3441
+ "loss": 1.8607,
3442
+ "step": 571
3443
+ },
3444
+ {
3445
+ "epoch": 0.39,
3446
+ "learning_rate": 0.0005604815864022663,
3447
+ "loss": 1.9868,
3448
+ "step": 572
3449
+ },
3450
+ {
3451
+ "epoch": 0.39,
3452
+ "learning_rate": 0.0005603753541076487,
3453
+ "loss": 1.885,
3454
+ "step": 573
3455
+ },
3456
+ {
3457
+ "epoch": 0.39,
3458
+ "learning_rate": 0.0005602691218130311,
3459
+ "loss": 1.9379,
3460
+ "step": 574
3461
+ },
3462
+ {
3463
+ "epoch": 0.39,
3464
+ "learning_rate": 0.0005601628895184136,
3465
+ "loss": 1.8685,
3466
+ "step": 575
3467
+ },
3468
+ {
3469
+ "epoch": 0.39,
3470
+ "learning_rate": 0.000560056657223796,
3471
+ "loss": 1.9636,
3472
+ "step": 576
3473
+ },
3474
+ {
3475
+ "epoch": 0.39,
3476
+ "learning_rate": 0.0005599504249291784,
3477
+ "loss": 1.9925,
3478
+ "step": 577
3479
+ },
3480
+ {
3481
+ "epoch": 0.4,
3482
+ "learning_rate": 0.0005598441926345609,
3483
+ "loss": 1.9332,
3484
+ "step": 578
3485
+ },
3486
+ {
3487
+ "epoch": 0.4,
3488
+ "learning_rate": 0.0005597379603399432,
3489
+ "loss": 1.8458,
3490
+ "step": 579
3491
+ },
3492
+ {
3493
+ "epoch": 0.4,
3494
+ "learning_rate": 0.0005596317280453257,
3495
+ "loss": 1.8204,
3496
+ "step": 580
3497
+ },
3498
+ {
3499
+ "epoch": 0.4,
3500
+ "learning_rate": 0.0005595254957507082,
3501
+ "loss": 1.8656,
3502
+ "step": 581
3503
+ },
3504
+ {
3505
+ "epoch": 0.4,
3506
+ "learning_rate": 0.0005594192634560906,
3507
+ "loss": 1.9399,
3508
+ "step": 582
3509
+ },
3510
+ {
3511
+ "epoch": 0.4,
3512
+ "learning_rate": 0.000559313031161473,
3513
+ "loss": 1.891,
3514
+ "step": 583
3515
+ },
3516
+ {
3517
+ "epoch": 0.4,
3518
+ "learning_rate": 0.0005592067988668555,
3519
+ "loss": 1.8595,
3520
+ "step": 584
3521
+ },
3522
+ {
3523
+ "epoch": 0.4,
3524
+ "learning_rate": 0.0005591005665722379,
3525
+ "loss": 1.8989,
3526
+ "step": 585
3527
+ },
3528
+ {
3529
+ "epoch": 0.4,
3530
+ "learning_rate": 0.0005589943342776204,
3531
+ "loss": 1.8275,
3532
+ "step": 586
3533
+ },
3534
+ {
3535
+ "epoch": 0.4,
3536
+ "learning_rate": 0.0005588881019830028,
3537
+ "loss": 1.8716,
3538
+ "step": 587
3539
+ },
3540
+ {
3541
+ "epoch": 0.4,
3542
+ "learning_rate": 0.0005587818696883852,
3543
+ "loss": 1.933,
3544
+ "step": 588
3545
+ },
3546
+ {
3547
+ "epoch": 0.4,
3548
+ "learning_rate": 0.0005586756373937677,
3549
+ "loss": 1.982,
3550
+ "step": 589
3551
+ },
3552
+ {
3553
+ "epoch": 0.4,
3554
+ "learning_rate": 0.00055856940509915,
3555
+ "loss": 1.8773,
3556
+ "step": 590
3557
+ },
3558
+ {
3559
+ "epoch": 0.4,
3560
+ "learning_rate": 0.0005584631728045325,
3561
+ "loss": 1.8793,
3562
+ "step": 591
3563
+ },
3564
+ {
3565
+ "epoch": 0.4,
3566
+ "learning_rate": 0.000558356940509915,
3567
+ "loss": 1.9778,
3568
+ "step": 592
3569
+ },
3570
+ {
3571
+ "epoch": 0.41,
3572
+ "learning_rate": 0.0005582507082152974,
3573
+ "loss": 1.9904,
3574
+ "step": 593
3575
+ },
3576
+ {
3577
+ "epoch": 0.41,
3578
+ "learning_rate": 0.0005581444759206798,
3579
+ "loss": 1.9301,
3580
+ "step": 594
3581
+ },
3582
+ {
3583
+ "epoch": 0.41,
3584
+ "learning_rate": 0.0005580382436260623,
3585
+ "loss": 1.9367,
3586
+ "step": 595
3587
+ },
3588
+ {
3589
+ "epoch": 0.41,
3590
+ "learning_rate": 0.0005579320113314447,
3591
+ "loss": 1.9853,
3592
+ "step": 596
3593
+ },
3594
+ {
3595
+ "epoch": 0.41,
3596
+ "learning_rate": 0.0005578257790368272,
3597
+ "loss": 1.9987,
3598
+ "step": 597
3599
+ },
3600
+ {
3601
+ "epoch": 0.41,
3602
+ "learning_rate": 0.0005577195467422096,
3603
+ "loss": 1.88,
3604
+ "step": 598
3605
+ },
3606
+ {
3607
+ "epoch": 0.41,
3608
+ "learning_rate": 0.000557613314447592,
3609
+ "loss": 1.9391,
3610
+ "step": 599
3611
+ },
3612
+ {
3613
+ "epoch": 0.41,
3614
+ "learning_rate": 0.0005575070821529745,
3615
+ "loss": 2.0134,
3616
+ "step": 600
3617
+ },
3618
+ {
3619
+ "epoch": 0.41,
3620
+ "learning_rate": 0.0005574008498583569,
3621
+ "loss": 1.8383,
3622
+ "step": 601
3623
+ },
3624
+ {
3625
+ "epoch": 0.41,
3626
+ "learning_rate": 0.0005572946175637392,
3627
+ "loss": 1.8189,
3628
+ "step": 602
3629
+ },
3630
+ {
3631
+ "epoch": 0.41,
3632
+ "learning_rate": 0.0005571883852691217,
3633
+ "loss": 1.9324,
3634
+ "step": 603
3635
+ },
3636
+ {
3637
+ "epoch": 0.41,
3638
+ "learning_rate": 0.0005570821529745042,
3639
+ "loss": 1.8702,
3640
+ "step": 604
3641
+ },
3642
+ {
3643
+ "epoch": 0.41,
3644
+ "learning_rate": 0.0005569759206798866,
3645
+ "loss": 1.8803,
3646
+ "step": 605
3647
+ },
3648
+ {
3649
+ "epoch": 0.41,
3650
+ "learning_rate": 0.0005568696883852691,
3651
+ "loss": 1.8409,
3652
+ "step": 606
3653
+ },
3654
+ {
3655
+ "epoch": 0.41,
3656
+ "learning_rate": 0.0005567634560906515,
3657
+ "loss": 1.8715,
3658
+ "step": 607
3659
+ },
3660
+ {
3661
+ "epoch": 0.42,
3662
+ "learning_rate": 0.0005566572237960339,
3663
+ "loss": 1.9359,
3664
+ "step": 608
3665
+ },
3666
+ {
3667
+ "epoch": 0.42,
3668
+ "learning_rate": 0.0005565509915014164,
3669
+ "loss": 1.922,
3670
+ "step": 609
3671
+ },
3672
+ {
3673
+ "epoch": 0.42,
3674
+ "learning_rate": 0.0005564447592067988,
3675
+ "loss": 1.8341,
3676
+ "step": 610
3677
+ },
3678
+ {
3679
+ "epoch": 0.42,
3680
+ "learning_rate": 0.0005563385269121813,
3681
+ "loss": 1.9494,
3682
+ "step": 611
3683
+ },
3684
+ {
3685
+ "epoch": 0.42,
3686
+ "learning_rate": 0.0005562322946175637,
3687
+ "loss": 1.9585,
3688
+ "step": 612
3689
+ },
3690
+ {
3691
+ "epoch": 0.42,
3692
+ "learning_rate": 0.0005561260623229461,
3693
+ "loss": 1.8668,
3694
+ "step": 613
3695
+ },
3696
+ {
3697
+ "epoch": 0.42,
3698
+ "learning_rate": 0.0005560198300283286,
3699
+ "loss": 1.9067,
3700
+ "step": 614
3701
+ },
3702
+ {
3703
+ "epoch": 0.42,
3704
+ "learning_rate": 0.000555913597733711,
3705
+ "loss": 1.8475,
3706
+ "step": 615
3707
+ },
3708
+ {
3709
+ "epoch": 0.42,
3710
+ "learning_rate": 0.0005558073654390934,
3711
+ "loss": 1.9009,
3712
+ "step": 616
3713
+ },
3714
+ {
3715
+ "epoch": 0.42,
3716
+ "learning_rate": 0.0005557011331444759,
3717
+ "loss": 1.941,
3718
+ "step": 617
3719
+ },
3720
+ {
3721
+ "epoch": 0.42,
3722
+ "learning_rate": 0.0005555949008498583,
3723
+ "loss": 1.9756,
3724
+ "step": 618
3725
+ },
3726
+ {
3727
+ "epoch": 0.42,
3728
+ "learning_rate": 0.0005554886685552407,
3729
+ "loss": 1.9568,
3730
+ "step": 619
3731
+ },
3732
+ {
3733
+ "epoch": 0.42,
3734
+ "learning_rate": 0.0005553824362606232,
3735
+ "loss": 1.8794,
3736
+ "step": 620
3737
+ },
3738
+ {
3739
+ "epoch": 0.42,
3740
+ "learning_rate": 0.0005552762039660056,
3741
+ "loss": 1.9152,
3742
+ "step": 621
3743
+ },
3744
+ {
3745
+ "epoch": 0.43,
3746
+ "learning_rate": 0.000555169971671388,
3747
+ "loss": 2.0171,
3748
+ "step": 622
3749
+ },
3750
+ {
3751
+ "epoch": 0.43,
3752
+ "learning_rate": 0.0005550637393767705,
3753
+ "loss": 1.9388,
3754
+ "step": 623
3755
+ },
3756
+ {
3757
+ "epoch": 0.43,
3758
+ "learning_rate": 0.0005549575070821529,
3759
+ "loss": 1.9048,
3760
+ "step": 624
3761
+ },
3762
+ {
3763
+ "epoch": 0.43,
3764
+ "learning_rate": 0.0005548512747875354,
3765
+ "loss": 1.9667,
3766
+ "step": 625
3767
+ },
3768
+ {
3769
+ "epoch": 0.43,
3770
+ "learning_rate": 0.0005547450424929179,
3771
+ "loss": 1.9878,
3772
+ "step": 626
3773
+ },
3774
+ {
3775
+ "epoch": 0.43,
3776
+ "learning_rate": 0.0005546388101983003,
3777
+ "loss": 1.9145,
3778
+ "step": 627
3779
+ },
3780
+ {
3781
+ "epoch": 0.43,
3782
+ "learning_rate": 0.0005545325779036826,
3783
+ "loss": 1.9873,
3784
+ "step": 628
3785
+ },
3786
+ {
3787
+ "epoch": 0.43,
3788
+ "learning_rate": 0.0005544263456090651,
3789
+ "loss": 2.015,
3790
+ "step": 629
3791
+ },
3792
+ {
3793
+ "epoch": 0.43,
3794
+ "learning_rate": 0.0005543201133144475,
3795
+ "loss": 1.9204,
3796
+ "step": 630
3797
+ },
3798
+ {
3799
+ "epoch": 0.43,
3800
+ "learning_rate": 0.00055421388101983,
3801
+ "loss": 1.8899,
3802
+ "step": 631
3803
+ },
3804
+ {
3805
+ "epoch": 0.43,
3806
+ "learning_rate": 0.0005541076487252124,
3807
+ "loss": 1.9057,
3808
+ "step": 632
3809
+ },
3810
+ {
3811
+ "epoch": 0.43,
3812
+ "learning_rate": 0.0005540014164305948,
3813
+ "loss": 1.9663,
3814
+ "step": 633
3815
+ },
3816
+ {
3817
+ "epoch": 0.43,
3818
+ "learning_rate": 0.0005538951841359773,
3819
+ "loss": 1.9447,
3820
+ "step": 634
3821
+ },
3822
+ {
3823
+ "epoch": 0.43,
3824
+ "learning_rate": 0.0005537889518413597,
3825
+ "loss": 1.957,
3826
+ "step": 635
3827
+ },
3828
+ {
3829
+ "epoch": 0.43,
3830
+ "learning_rate": 0.0005536827195467422,
3831
+ "loss": 1.8772,
3832
+ "step": 636
3833
+ },
3834
+ {
3835
+ "epoch": 0.44,
3836
+ "learning_rate": 0.0005535764872521247,
3837
+ "loss": 1.9601,
3838
+ "step": 637
3839
+ },
3840
+ {
3841
+ "epoch": 0.44,
3842
+ "learning_rate": 0.0005534702549575071,
3843
+ "loss": 1.8354,
3844
+ "step": 638
3845
+ },
3846
+ {
3847
+ "epoch": 0.44,
3848
+ "learning_rate": 0.0005533640226628895,
3849
+ "loss": 1.9107,
3850
+ "step": 639
3851
+ },
3852
+ {
3853
+ "epoch": 0.44,
3854
+ "learning_rate": 0.000553257790368272,
3855
+ "loss": 1.9511,
3856
+ "step": 640
3857
+ },
3858
+ {
3859
+ "epoch": 0.44,
3860
+ "learning_rate": 0.0005531515580736543,
3861
+ "loss": 1.9354,
3862
+ "step": 641
3863
+ },
3864
+ {
3865
+ "epoch": 0.44,
3866
+ "learning_rate": 0.0005530453257790367,
3867
+ "loss": 1.9189,
3868
+ "step": 642
3869
+ },
3870
+ {
3871
+ "epoch": 0.44,
3872
+ "learning_rate": 0.0005529390934844192,
3873
+ "loss": 1.803,
3874
+ "step": 643
3875
+ },
3876
+ {
3877
+ "epoch": 0.44,
3878
+ "learning_rate": 0.0005528328611898016,
3879
+ "loss": 1.9278,
3880
+ "step": 644
3881
+ },
3882
+ {
3883
+ "epoch": 0.44,
3884
+ "learning_rate": 0.0005527266288951841,
3885
+ "loss": 1.8629,
3886
+ "step": 645
3887
+ },
3888
+ {
3889
+ "epoch": 0.44,
3890
+ "learning_rate": 0.0005526203966005665,
3891
+ "loss": 1.9348,
3892
+ "step": 646
3893
+ },
3894
+ {
3895
+ "epoch": 0.44,
3896
+ "learning_rate": 0.000552514164305949,
3897
+ "loss": 1.9472,
3898
+ "step": 647
3899
+ },
3900
+ {
3901
+ "epoch": 0.44,
3902
+ "learning_rate": 0.0005524079320113314,
3903
+ "loss": 1.9808,
3904
+ "step": 648
3905
+ },
3906
+ {
3907
+ "epoch": 0.44,
3908
+ "learning_rate": 0.0005523016997167139,
3909
+ "loss": 1.8421,
3910
+ "step": 649
3911
+ },
3912
+ {
3913
+ "epoch": 0.44,
3914
+ "learning_rate": 0.0005521954674220963,
3915
+ "loss": 1.9773,
3916
+ "step": 650
3917
+ },
3918
+ {
3919
+ "epoch": 0.45,
3920
+ "learning_rate": 0.0005520892351274788,
3921
+ "loss": 1.9566,
3922
+ "step": 651
3923
+ },
3924
+ {
3925
+ "epoch": 0.45,
3926
+ "learning_rate": 0.0005519830028328611,
3927
+ "loss": 1.9461,
3928
+ "step": 652
3929
+ },
3930
+ {
3931
+ "epoch": 0.45,
3932
+ "learning_rate": 0.0005518767705382435,
3933
+ "loss": 1.9117,
3934
+ "step": 653
3935
+ },
3936
+ {
3937
+ "epoch": 0.45,
3938
+ "learning_rate": 0.000551770538243626,
3939
+ "loss": 1.8791,
3940
+ "step": 654
3941
+ },
3942
+ {
3943
+ "epoch": 0.45,
3944
+ "learning_rate": 0.0005516643059490084,
3945
+ "loss": 1.9065,
3946
+ "step": 655
3947
+ },
3948
+ {
3949
+ "epoch": 0.45,
3950
+ "learning_rate": 0.0005515580736543909,
3951
+ "loss": 1.9535,
3952
+ "step": 656
3953
+ },
3954
+ {
3955
+ "epoch": 0.45,
3956
+ "learning_rate": 0.0005514518413597733,
3957
+ "loss": 1.8854,
3958
+ "step": 657
3959
+ },
3960
+ {
3961
+ "epoch": 0.45,
3962
+ "learning_rate": 0.0005513456090651558,
3963
+ "loss": 2.023,
3964
+ "step": 658
3965
+ },
3966
+ {
3967
+ "epoch": 0.45,
3968
+ "learning_rate": 0.0005512393767705382,
3969
+ "loss": 2.0061,
3970
+ "step": 659
3971
+ },
3972
+ {
3973
+ "epoch": 0.45,
3974
+ "learning_rate": 0.0005511331444759207,
3975
+ "loss": 1.8768,
3976
+ "step": 660
3977
+ },
3978
+ {
3979
+ "epoch": 0.45,
3980
+ "learning_rate": 0.0005510269121813031,
3981
+ "loss": 1.948,
3982
+ "step": 661
3983
+ },
3984
+ {
3985
+ "epoch": 0.45,
3986
+ "learning_rate": 0.0005509206798866855,
3987
+ "loss": 1.9478,
3988
+ "step": 662
3989
+ },
3990
+ {
3991
+ "epoch": 0.45,
3992
+ "learning_rate": 0.000550814447592068,
3993
+ "loss": 1.977,
3994
+ "step": 663
3995
+ },
3996
+ {
3997
+ "epoch": 0.45,
3998
+ "learning_rate": 0.0005507082152974503,
3999
+ "loss": 1.9208,
4000
+ "step": 664
4001
+ },
4002
+ {
4003
+ "epoch": 0.45,
4004
+ "learning_rate": 0.0005506019830028328,
4005
+ "loss": 1.9287,
4006
+ "step": 665
4007
+ },
4008
+ {
4009
+ "epoch": 0.46,
4010
+ "learning_rate": 0.0005504957507082152,
4011
+ "loss": 1.8817,
4012
+ "step": 666
4013
+ },
4014
+ {
4015
+ "epoch": 0.46,
4016
+ "learning_rate": 0.0005503895184135976,
4017
+ "loss": 1.9567,
4018
+ "step": 667
4019
+ },
4020
+ {
4021
+ "epoch": 0.46,
4022
+ "learning_rate": 0.0005502832861189801,
4023
+ "loss": 1.8139,
4024
+ "step": 668
4025
+ },
4026
+ {
4027
+ "epoch": 0.46,
4028
+ "learning_rate": 0.0005501770538243626,
4029
+ "loss": 1.913,
4030
+ "step": 669
4031
+ },
4032
+ {
4033
+ "epoch": 0.46,
4034
+ "learning_rate": 0.000550070821529745,
4035
+ "loss": 1.9434,
4036
+ "step": 670
4037
+ },
4038
+ {
4039
+ "epoch": 0.46,
4040
+ "learning_rate": 0.0005499645892351275,
4041
+ "loss": 1.9377,
4042
+ "step": 671
4043
+ },
4044
+ {
4045
+ "epoch": 0.46,
4046
+ "learning_rate": 0.0005498583569405099,
4047
+ "loss": 1.8781,
4048
+ "step": 672
4049
+ },
4050
+ {
4051
+ "epoch": 0.46,
4052
+ "learning_rate": 0.0005497521246458923,
4053
+ "loss": 1.8837,
4054
+ "step": 673
4055
+ },
4056
+ {
4057
+ "epoch": 0.46,
4058
+ "learning_rate": 0.0005496458923512748,
4059
+ "loss": 1.9478,
4060
+ "step": 674
4061
+ },
4062
+ {
4063
+ "epoch": 0.46,
4064
+ "learning_rate": 0.0005495396600566572,
4065
+ "loss": 1.8756,
4066
+ "step": 675
4067
+ },
4068
+ {
4069
+ "epoch": 0.46,
4070
+ "learning_rate": 0.0005494334277620397,
4071
+ "loss": 1.9507,
4072
+ "step": 676
4073
+ },
4074
+ {
4075
+ "epoch": 0.46,
4076
+ "learning_rate": 0.000549327195467422,
4077
+ "loss": 1.9027,
4078
+ "step": 677
4079
+ },
4080
+ {
4081
+ "epoch": 0.46,
4082
+ "learning_rate": 0.0005492209631728044,
4083
+ "loss": 1.9749,
4084
+ "step": 678
4085
+ },
4086
+ {
4087
+ "epoch": 0.46,
4088
+ "learning_rate": 0.0005491147308781869,
4089
+ "loss": 1.8564,
4090
+ "step": 679
4091
+ },
4092
+ {
4093
+ "epoch": 0.46,
4094
+ "learning_rate": 0.0005490084985835694,
4095
+ "loss": 1.9649,
4096
+ "step": 680
4097
+ },
4098
+ {
4099
+ "epoch": 0.47,
4100
+ "learning_rate": 0.0005489022662889518,
4101
+ "loss": 2.0128,
4102
+ "step": 681
4103
+ },
4104
+ {
4105
+ "epoch": 0.47,
4106
+ "learning_rate": 0.0005487960339943342,
4107
+ "loss": 1.9041,
4108
+ "step": 682
4109
+ },
4110
+ {
4111
+ "epoch": 0.47,
4112
+ "learning_rate": 0.0005486898016997167,
4113
+ "loss": 2.0121,
4114
+ "step": 683
4115
+ },
4116
+ {
4117
+ "epoch": 0.47,
4118
+ "learning_rate": 0.0005485835694050991,
4119
+ "loss": 1.9203,
4120
+ "step": 684
4121
+ },
4122
+ {
4123
+ "epoch": 0.47,
4124
+ "learning_rate": 0.0005484773371104816,
4125
+ "loss": 1.9,
4126
+ "step": 685
4127
+ },
4128
+ {
4129
+ "epoch": 0.47,
4130
+ "learning_rate": 0.000548371104815864,
4131
+ "loss": 1.9857,
4132
+ "step": 686
4133
+ },
4134
+ {
4135
+ "epoch": 0.47,
4136
+ "learning_rate": 0.0005482648725212464,
4137
+ "loss": 2.0838,
4138
+ "step": 687
4139
+ },
4140
+ {
4141
+ "epoch": 0.47,
4142
+ "learning_rate": 0.0005481586402266289,
4143
+ "loss": 1.8973,
4144
+ "step": 688
4145
+ },
4146
+ {
4147
+ "epoch": 0.47,
4148
+ "learning_rate": 0.0005480524079320112,
4149
+ "loss": 1.9636,
4150
+ "step": 689
4151
+ },
4152
+ {
4153
+ "epoch": 0.47,
4154
+ "learning_rate": 0.0005479461756373937,
4155
+ "loss": 1.9726,
4156
+ "step": 690
4157
+ },
4158
+ {
4159
+ "epoch": 0.47,
4160
+ "learning_rate": 0.0005478399433427762,
4161
+ "loss": 1.9062,
4162
+ "step": 691
4163
+ },
4164
+ {
4165
+ "epoch": 0.47,
4166
+ "learning_rate": 0.0005477337110481586,
4167
+ "loss": 1.8709,
4168
+ "step": 692
4169
+ },
4170
+ {
4171
+ "epoch": 0.47,
4172
+ "learning_rate": 0.000547627478753541,
4173
+ "loss": 1.9234,
4174
+ "step": 693
4175
+ },
4176
+ {
4177
+ "epoch": 0.47,
4178
+ "learning_rate": 0.0005475212464589235,
4179
+ "loss": 1.8859,
4180
+ "step": 694
4181
+ },
4182
+ {
4183
+ "epoch": 0.48,
4184
+ "learning_rate": 0.0005474150141643059,
4185
+ "loss": 1.9746,
4186
+ "step": 695
4187
+ },
4188
+ {
4189
+ "epoch": 0.48,
4190
+ "learning_rate": 0.0005473087818696884,
4191
+ "loss": 1.9229,
4192
+ "step": 696
4193
+ },
4194
+ {
4195
+ "epoch": 0.48,
4196
+ "learning_rate": 0.0005472025495750708,
4197
+ "loss": 1.8827,
4198
+ "step": 697
4199
+ },
4200
+ {
4201
+ "epoch": 0.48,
4202
+ "learning_rate": 0.0005470963172804532,
4203
+ "loss": 1.9455,
4204
+ "step": 698
4205
+ },
4206
+ {
4207
+ "epoch": 0.48,
4208
+ "learning_rate": 0.0005469900849858357,
4209
+ "loss": 1.8779,
4210
+ "step": 699
4211
+ },
4212
+ {
4213
+ "epoch": 0.48,
4214
+ "learning_rate": 0.000546883852691218,
4215
+ "loss": 1.9151,
4216
+ "step": 700
4217
+ },
4218
+ {
4219
+ "epoch": 0.48,
4220
+ "learning_rate": 0.0005467776203966004,
4221
+ "loss": 1.8283,
4222
+ "step": 701
4223
+ },
4224
+ {
4225
+ "epoch": 0.48,
4226
+ "learning_rate": 0.0005466713881019829,
4227
+ "loss": 1.9657,
4228
+ "step": 702
4229
+ },
4230
+ {
4231
+ "epoch": 0.48,
4232
+ "learning_rate": 0.0005465651558073654,
4233
+ "loss": 2.0174,
4234
+ "step": 703
4235
+ },
4236
+ {
4237
+ "epoch": 0.48,
4238
+ "learning_rate": 0.0005464589235127478,
4239
+ "loss": 1.9462,
4240
+ "step": 704
4241
+ },
4242
+ {
4243
+ "epoch": 0.48,
4244
+ "learning_rate": 0.0005463526912181303,
4245
+ "loss": 1.9257,
4246
+ "step": 705
4247
+ },
4248
+ {
4249
+ "epoch": 0.48,
4250
+ "learning_rate": 0.0005462464589235127,
4251
+ "loss": 1.8828,
4252
+ "step": 706
4253
+ },
4254
+ {
4255
+ "epoch": 0.48,
4256
+ "learning_rate": 0.0005461402266288951,
4257
+ "loss": 1.8831,
4258
+ "step": 707
4259
+ },
4260
+ {
4261
+ "epoch": 0.48,
4262
+ "learning_rate": 0.0005460339943342776,
4263
+ "loss": 1.9266,
4264
+ "step": 708
4265
+ },
4266
+ {
4267
+ "epoch": 0.48,
4268
+ "learning_rate": 0.00054592776203966,
4269
+ "loss": 2.0017,
4270
+ "step": 709
4271
+ },
4272
+ {
4273
+ "epoch": 0.49,
4274
+ "learning_rate": 0.0005458215297450425,
4275
+ "loss": 1.9537,
4276
+ "step": 710
4277
+ },
4278
+ {
4279
+ "epoch": 0.49,
4280
+ "learning_rate": 0.0005457152974504249,
4281
+ "loss": 1.9141,
4282
+ "step": 711
4283
+ },
4284
+ {
4285
+ "epoch": 0.49,
4286
+ "learning_rate": 0.0005456090651558072,
4287
+ "loss": 1.9393,
4288
+ "step": 712
4289
+ },
4290
+ {
4291
+ "epoch": 0.49,
4292
+ "learning_rate": 0.0005455028328611897,
4293
+ "loss": 1.8896,
4294
+ "step": 713
4295
+ },
4296
+ {
4297
+ "epoch": 0.49,
4298
+ "learning_rate": 0.0005453966005665722,
4299
+ "loss": 1.8186,
4300
+ "step": 714
4301
+ },
4302
+ {
4303
+ "epoch": 0.49,
4304
+ "learning_rate": 0.0005452903682719546,
4305
+ "loss": 1.9527,
4306
+ "step": 715
4307
+ },
4308
+ {
4309
+ "epoch": 0.49,
4310
+ "learning_rate": 0.0005451841359773371,
4311
+ "loss": 2.0319,
4312
+ "step": 716
4313
+ },
4314
+ {
4315
+ "epoch": 0.49,
4316
+ "learning_rate": 0.0005450779036827195,
4317
+ "loss": 1.8668,
4318
+ "step": 717
4319
+ },
4320
+ {
4321
+ "epoch": 0.49,
4322
+ "learning_rate": 0.0005449716713881019,
4323
+ "loss": 1.9775,
4324
+ "step": 718
4325
+ },
4326
+ {
4327
+ "epoch": 0.49,
4328
+ "learning_rate": 0.0005448654390934844,
4329
+ "loss": 1.8624,
4330
+ "step": 719
4331
+ },
4332
+ {
4333
+ "epoch": 0.49,
4334
+ "learning_rate": 0.0005447592067988668,
4335
+ "loss": 1.8725,
4336
+ "step": 720
4337
+ },
4338
+ {
4339
+ "epoch": 0.49,
4340
+ "learning_rate": 0.0005446529745042492,
4341
+ "loss": 1.8682,
4342
+ "step": 721
4343
+ },
4344
+ {
4345
+ "epoch": 0.49,
4346
+ "learning_rate": 0.0005445467422096317,
4347
+ "loss": 1.9078,
4348
+ "step": 722
4349
+ },
4350
+ {
4351
+ "epoch": 0.49,
4352
+ "learning_rate": 0.0005444405099150141,
4353
+ "loss": 1.8206,
4354
+ "step": 723
4355
+ },
4356
+ {
4357
+ "epoch": 0.49,
4358
+ "learning_rate": 0.0005443342776203966,
4359
+ "loss": 1.9923,
4360
+ "step": 724
4361
+ },
4362
+ {
4363
+ "epoch": 0.5,
4364
+ "learning_rate": 0.000544228045325779,
4365
+ "loss": 1.9765,
4366
+ "step": 725
4367
+ },
4368
+ {
4369
+ "epoch": 0.5,
4370
+ "learning_rate": 0.0005441218130311614,
4371
+ "loss": 1.9154,
4372
+ "step": 726
4373
+ },
4374
+ {
4375
+ "epoch": 0.5,
4376
+ "learning_rate": 0.0005440155807365438,
4377
+ "loss": 1.8111,
4378
+ "step": 727
4379
+ },
4380
+ {
4381
+ "epoch": 0.5,
4382
+ "learning_rate": 0.0005439093484419263,
4383
+ "loss": 1.9149,
4384
+ "step": 728
4385
+ },
4386
+ {
4387
+ "epoch": 0.5,
4388
+ "learning_rate": 0.0005438031161473087,
4389
+ "loss": 1.927,
4390
+ "step": 729
4391
+ },
4392
+ {
4393
+ "epoch": 0.5,
4394
+ "learning_rate": 0.0005436968838526912,
4395
+ "loss": 1.855,
4396
+ "step": 730
4397
+ },
4398
+ {
4399
+ "epoch": 0.5,
4400
+ "learning_rate": 0.0005435906515580736,
4401
+ "loss": 1.8842,
4402
+ "step": 731
4403
+ },
4404
+ {
4405
+ "epoch": 0.5,
4406
+ "learning_rate": 0.000543484419263456,
4407
+ "loss": 1.955,
4408
+ "step": 732
4409
+ },
4410
+ {
4411
+ "epoch": 0.5,
4412
+ "learning_rate": 0.0005433781869688385,
4413
+ "loss": 1.9599,
4414
+ "step": 733
4415
+ },
4416
+ {
4417
+ "epoch": 0.5,
4418
+ "learning_rate": 0.0005432719546742209,
4419
+ "loss": 1.8462,
4420
+ "step": 734
4421
+ },
4422
+ {
4423
+ "epoch": 0.5,
4424
+ "learning_rate": 0.0005431657223796034,
4425
+ "loss": 1.7453,
4426
+ "step": 735
4427
+ },
4428
+ {
4429
+ "epoch": 0.5,
4430
+ "learning_rate": 0.0005430594900849859,
4431
+ "loss": 1.8518,
4432
+ "step": 736
4433
+ },
4434
+ {
4435
+ "epoch": 0.5,
4436
+ "learning_rate": 0.0005429532577903683,
4437
+ "loss": 1.7791,
4438
+ "step": 737
4439
+ },
4440
+ {
4441
+ "epoch": 0.5,
4442
+ "learning_rate": 0.0005428470254957506,
4443
+ "loss": 1.918,
4444
+ "step": 738
4445
+ },
4446
+ {
4447
+ "epoch": 0.51,
4448
+ "learning_rate": 0.0005427407932011331,
4449
+ "loss": 1.8775,
4450
+ "step": 739
4451
+ },
4452
+ {
4453
+ "epoch": 0.51,
4454
+ "learning_rate": 0.0005426345609065155,
4455
+ "loss": 1.9502,
4456
+ "step": 740
4457
+ },
4458
+ {
4459
+ "epoch": 0.51,
4460
+ "learning_rate": 0.0005425283286118979,
4461
+ "loss": 1.9256,
4462
+ "step": 741
4463
+ },
4464
+ {
4465
+ "epoch": 0.51,
4466
+ "learning_rate": 0.0005424220963172804,
4467
+ "loss": 1.9275,
4468
+ "step": 742
4469
+ },
4470
+ {
4471
+ "epoch": 0.51,
4472
+ "learning_rate": 0.0005423158640226628,
4473
+ "loss": 1.9383,
4474
+ "step": 743
4475
+ },
4476
+ {
4477
+ "epoch": 0.51,
4478
+ "learning_rate": 0.0005422096317280453,
4479
+ "loss": 1.8964,
4480
+ "step": 744
4481
+ },
4482
+ {
4483
+ "epoch": 0.51,
4484
+ "learning_rate": 0.0005421033994334277,
4485
+ "loss": 1.9313,
4486
+ "step": 745
4487
+ },
4488
+ {
4489
+ "epoch": 0.51,
4490
+ "learning_rate": 0.0005419971671388101,
4491
+ "loss": 1.8931,
4492
+ "step": 746
4493
+ },
4494
+ {
4495
+ "epoch": 0.51,
4496
+ "learning_rate": 0.0005418909348441926,
4497
+ "loss": 1.9748,
4498
+ "step": 747
4499
+ },
4500
+ {
4501
+ "epoch": 0.51,
4502
+ "learning_rate": 0.0005417847025495751,
4503
+ "loss": 1.985,
4504
+ "step": 748
4505
+ },
4506
+ {
4507
+ "epoch": 0.51,
4508
+ "learning_rate": 0.0005416784702549575,
4509
+ "loss": 1.8515,
4510
+ "step": 749
4511
+ },
4512
+ {
4513
+ "epoch": 0.51,
4514
+ "learning_rate": 0.00054157223796034,
4515
+ "loss": 1.8717,
4516
+ "step": 750
4517
+ },
4518
+ {
4519
+ "epoch": 0.51,
4520
+ "learning_rate": 0.0005414660056657223,
4521
+ "loss": 1.9044,
4522
+ "step": 751
4523
+ },
4524
+ {
4525
+ "epoch": 0.51,
4526
+ "learning_rate": 0.0005413597733711047,
4527
+ "loss": 1.9972,
4528
+ "step": 752
4529
+ },
4530
+ {
4531
+ "epoch": 0.51,
4532
+ "learning_rate": 0.0005412535410764872,
4533
+ "loss": 1.8846,
4534
+ "step": 753
4535
+ },
4536
+ {
4537
+ "epoch": 0.52,
4538
+ "learning_rate": 0.0005411473087818696,
4539
+ "loss": 1.8465,
4540
+ "step": 754
4541
+ },
4542
+ {
4543
+ "epoch": 0.52,
4544
+ "learning_rate": 0.0005410410764872521,
4545
+ "loss": 1.9254,
4546
+ "step": 755
4547
+ },
4548
+ {
4549
+ "epoch": 0.52,
4550
+ "learning_rate": 0.0005409348441926345,
4551
+ "loss": 1.89,
4552
+ "step": 756
4553
+ },
4554
+ {
4555
+ "epoch": 0.52,
4556
+ "learning_rate": 0.0005408286118980169,
4557
+ "loss": 1.9923,
4558
+ "step": 757
4559
+ },
4560
+ {
4561
+ "epoch": 0.52,
4562
+ "learning_rate": 0.0005407223796033994,
4563
+ "loss": 1.8325,
4564
+ "step": 758
4565
+ },
4566
+ {
4567
+ "epoch": 0.52,
4568
+ "learning_rate": 0.0005406161473087819,
4569
+ "loss": 2.023,
4570
+ "step": 759
4571
+ },
4572
+ {
4573
+ "epoch": 0.52,
4574
+ "learning_rate": 0.0005405099150141643,
4575
+ "loss": 1.8279,
4576
+ "step": 760
4577
+ },
4578
+ {
4579
+ "epoch": 0.52,
4580
+ "learning_rate": 0.0005404036827195467,
4581
+ "loss": 1.9296,
4582
+ "step": 761
4583
+ },
4584
+ {
4585
+ "epoch": 0.52,
4586
+ "learning_rate": 0.0005402974504249291,
4587
+ "loss": 1.8728,
4588
+ "step": 762
4589
+ },
4590
+ {
4591
+ "epoch": 0.52,
4592
+ "learning_rate": 0.0005401912181303115,
4593
+ "loss": 1.9336,
4594
+ "step": 763
4595
+ },
4596
+ {
4597
+ "epoch": 0.52,
4598
+ "learning_rate": 0.000540084985835694,
4599
+ "loss": 1.8554,
4600
+ "step": 764
4601
+ },
4602
+ {
4603
+ "epoch": 0.52,
4604
+ "learning_rate": 0.0005399787535410764,
4605
+ "loss": 1.9388,
4606
+ "step": 765
4607
+ },
4608
+ {
4609
+ "epoch": 0.52,
4610
+ "learning_rate": 0.0005398725212464588,
4611
+ "loss": 1.9101,
4612
+ "step": 766
4613
+ },
4614
+ {
4615
+ "epoch": 0.52,
4616
+ "learning_rate": 0.0005397662889518413,
4617
+ "loss": 1.866,
4618
+ "step": 767
4619
+ },
4620
+ {
4621
+ "epoch": 0.53,
4622
+ "learning_rate": 0.0005396600566572237,
4623
+ "loss": 1.9731,
4624
+ "step": 768
4625
+ },
4626
+ {
4627
+ "epoch": 0.53,
4628
+ "learning_rate": 0.0005395538243626062,
4629
+ "loss": 1.9263,
4630
+ "step": 769
4631
+ },
4632
+ {
4633
+ "epoch": 0.53,
4634
+ "learning_rate": 0.0005394475920679887,
4635
+ "loss": 1.9337,
4636
+ "step": 770
4637
+ },
4638
+ {
4639
+ "epoch": 0.53,
4640
+ "learning_rate": 0.0005393413597733711,
4641
+ "loss": 1.896,
4642
+ "step": 771
4643
+ },
4644
+ {
4645
+ "epoch": 0.53,
4646
+ "learning_rate": 0.0005392351274787535,
4647
+ "loss": 1.9376,
4648
+ "step": 772
4649
+ },
4650
+ {
4651
+ "epoch": 0.53,
4652
+ "learning_rate": 0.000539128895184136,
4653
+ "loss": 1.9601,
4654
+ "step": 773
4655
+ },
4656
+ {
4657
+ "epoch": 0.53,
4658
+ "learning_rate": 0.0005390226628895183,
4659
+ "loss": 1.9177,
4660
+ "step": 774
4661
+ },
4662
+ {
4663
+ "epoch": 0.53,
4664
+ "learning_rate": 0.0005389164305949008,
4665
+ "loss": 1.9044,
4666
+ "step": 775
4667
+ },
4668
+ {
4669
+ "epoch": 0.53,
4670
+ "learning_rate": 0.0005388101983002832,
4671
+ "loss": 1.9378,
4672
+ "step": 776
4673
+ },
4674
+ {
4675
+ "epoch": 0.53,
4676
+ "learning_rate": 0.0005387039660056656,
4677
+ "loss": 1.8153,
4678
+ "step": 777
4679
+ },
4680
+ {
4681
+ "epoch": 0.53,
4682
+ "learning_rate": 0.0005385977337110481,
4683
+ "loss": 1.9445,
4684
+ "step": 778
4685
+ },
4686
+ {
4687
+ "epoch": 0.53,
4688
+ "learning_rate": 0.0005384915014164305,
4689
+ "loss": 1.8467,
4690
+ "step": 779
4691
+ },
4692
+ {
4693
+ "epoch": 0.53,
4694
+ "learning_rate": 0.000538385269121813,
4695
+ "loss": 1.8743,
4696
+ "step": 780
4697
+ },
4698
+ {
4699
+ "epoch": 0.53,
4700
+ "learning_rate": 0.0005382790368271955,
4701
+ "loss": 1.8839,
4702
+ "step": 781
4703
+ },
4704
+ {
4705
+ "epoch": 0.53,
4706
+ "learning_rate": 0.0005381728045325779,
4707
+ "loss": 1.8023,
4708
+ "step": 782
4709
+ },
4710
+ {
4711
+ "epoch": 0.54,
4712
+ "learning_rate": 0.0005380665722379603,
4713
+ "loss": 1.9814,
4714
+ "step": 783
4715
+ },
4716
+ {
4717
+ "epoch": 0.54,
4718
+ "learning_rate": 0.0005379603399433428,
4719
+ "loss": 1.8621,
4720
+ "step": 784
4721
+ },
4722
+ {
4723
+ "epoch": 0.54,
4724
+ "learning_rate": 0.0005378541076487252,
4725
+ "loss": 1.9371,
4726
+ "step": 785
4727
+ },
4728
+ {
4729
+ "epoch": 0.54,
4730
+ "learning_rate": 0.0005377478753541075,
4731
+ "loss": 1.9692,
4732
+ "step": 786
4733
+ },
4734
+ {
4735
+ "epoch": 0.54,
4736
+ "learning_rate": 0.00053764164305949,
4737
+ "loss": 1.9392,
4738
+ "step": 787
4739
+ },
4740
+ {
4741
+ "epoch": 0.54,
4742
+ "learning_rate": 0.0005375354107648724,
4743
+ "loss": 2.0,
4744
+ "step": 788
4745
+ },
4746
+ {
4747
+ "epoch": 0.54,
4748
+ "learning_rate": 0.0005374291784702549,
4749
+ "loss": 1.9281,
4750
+ "step": 789
4751
+ },
4752
+ {
4753
+ "epoch": 0.54,
4754
+ "learning_rate": 0.0005373229461756373,
4755
+ "loss": 1.9512,
4756
+ "step": 790
4757
+ },
4758
+ {
4759
+ "epoch": 0.54,
4760
+ "learning_rate": 0.0005372167138810198,
4761
+ "loss": 1.9222,
4762
+ "step": 791
4763
+ },
4764
+ {
4765
+ "epoch": 0.54,
4766
+ "learning_rate": 0.0005371104815864022,
4767
+ "loss": 1.9698,
4768
+ "step": 792
4769
+ },
4770
+ {
4771
+ "epoch": 0.54,
4772
+ "learning_rate": 0.0005370042492917847,
4773
+ "loss": 1.8392,
4774
+ "step": 793
4775
+ },
4776
+ {
4777
+ "epoch": 0.54,
4778
+ "learning_rate": 0.0005368980169971671,
4779
+ "loss": 1.9714,
4780
+ "step": 794
4781
+ },
4782
+ {
4783
+ "epoch": 0.54,
4784
+ "learning_rate": 0.0005367917847025496,
4785
+ "loss": 1.8521,
4786
+ "step": 795
4787
+ },
4788
+ {
4789
+ "epoch": 0.54,
4790
+ "learning_rate": 0.000536685552407932,
4791
+ "loss": 1.9495,
4792
+ "step": 796
4793
+ },
4794
+ {
4795
+ "epoch": 0.54,
4796
+ "learning_rate": 0.0005365793201133144,
4797
+ "loss": 1.9401,
4798
+ "step": 797
4799
+ },
4800
+ {
4801
+ "epoch": 0.55,
4802
+ "learning_rate": 0.0005364730878186969,
4803
+ "loss": 1.8449,
4804
+ "step": 798
4805
+ },
4806
+ {
4807
+ "epoch": 0.55,
4808
+ "learning_rate": 0.0005363668555240792,
4809
+ "loss": 1.9425,
4810
+ "step": 799
4811
+ },
4812
+ {
4813
+ "epoch": 0.55,
4814
+ "learning_rate": 0.0005362606232294617,
4815
+ "loss": 1.8826,
4816
+ "step": 800
4817
+ },
4818
+ {
4819
+ "epoch": 0.55,
4820
+ "eval_loss": 1.9985228776931763,
4821
+ "eval_runtime": 1468.32,
4822
+ "eval_samples_per_second": 9.904,
4823
+ "eval_steps_per_second": 9.904,
4824
+ "step": 800
4825
  }
4826
  ],
4827
  "max_steps": 5848,
4828
  "num_train_epochs": 4,
4829
+ "total_flos": 9.813169375148851e+17,
4830
  "trial_name": null,
4831
  "trial_params": null
4832
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4922243f88d9ee4e31bb04c28db1b2ab1db2f80c964b73fff4a5e4391dbf52b8
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d064e377257d8b4962000d02ab7ff4990aabdaf29ea0066e9c7e06266ac53f
3
  size 2368281769