AlekseyKorshuk commited on
Commit
c4cb4d3
1 Parent(s): e2275aa

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/29aftir4/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1yj0yyz9) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1yj0yyz9/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/391kfg7f/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1361uz9o) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1361uz9o/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 0.4826279878616333, "eval_runtime": 14.6559, "eval_samples_per_second": 44.897, "eval_steps_per_second": 5.663, "epoch": 4.0}
 
1
+ {"eval_loss": 0.3943726122379303, "eval_runtime": 15.416, "eval_samples_per_second": 44.759, "eval_steps_per_second": 5.643, "epoch": 5.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:428633d1f6458ab43ed5eee44c1ef0505fe223f57c99426fe21435d1aa3b434b
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02edf25c965a60bf14d297e2e1b0c081720b9e8680d800634688a3fc4a2fb6e3
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f52b4dcb8476f623a272e10ac84ac049de0f1003081728e8884c6d2f759310f9
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3549806e8df4af4ca7246f4b76fb32c1636959e4dcfdd8e7ae18219bcecf618d
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:753a59950d6b66574f6214a57384f99ae242cd291bd19d5d9e54977a42f5f557
3
  size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74cc2a5b07365c0d471dcdc1b01d1d3b1079d6849be25c14f96e07d25c27d9e7
3
  size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f46f1e2250c32c3ee6983a1c0513e245212fb9a74b0dd129330fa6abe719c7
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4270d9bd9f11421914c6de82fdc91926e8d5b6956546e23f778f5df9e81d41a
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33680f79177570c60fb9a75223c44dc245db8eeb97bbb366911a9c63f50a44d8
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7478d4d8fd00284d9c6da95af37a13b23cb40ae04238067c0707b07c0d165090
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.4826279878616333,
3
- "best_model_checkpoint": "output/eminem/checkpoint-1936",
4
- "epoch": 4.0,
5
- "global_step": 1936,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2360,11 +2360,577 @@
2360
  "eval_samples_per_second": 44.716,
2361
  "eval_steps_per_second": 5.64,
2362
  "step": 1936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2363
  }
2364
  ],
2365
- "max_steps": 1936,
2366
- "num_train_epochs": 4,
2367
- "total_flos": 2021355159552000.0,
2368
  "trial_name": null,
2369
  "trial_params": null
2370
  }
 
1
  {
2
+ "best_metric": 0.3943726122379303,
3
+ "best_model_checkpoint": "output/eminem/checkpoint-2400",
4
+ "epoch": 5.0,
5
+ "global_step": 2400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2360
  "eval_samples_per_second": 44.716,
2361
  "eval_steps_per_second": 5.64,
2362
  "step": 1936
2363
+ },
2364
+ {
2365
+ "epoch": 4.04,
2366
+ "learning_rate": 0.00011450235959621898,
2367
+ "loss": 0.3751,
2368
+ "step": 1940
2369
+ },
2370
+ {
2371
+ "epoch": 4.05,
2372
+ "learning_rate": 0.00011280977272542054,
2373
+ "loss": 0.7741,
2374
+ "step": 1945
2375
+ },
2376
+ {
2377
+ "epoch": 4.06,
2378
+ "learning_rate": 0.00011106984492265664,
2379
+ "loss": 0.6279,
2380
+ "step": 1950
2381
+ },
2382
+ {
2383
+ "epoch": 4.07,
2384
+ "learning_rate": 0.00010928443934619291,
2385
+ "loss": 0.4172,
2386
+ "step": 1955
2387
+ },
2388
+ {
2389
+ "epoch": 4.08,
2390
+ "learning_rate": 0.00010745546785304314,
2391
+ "loss": 0.8202,
2392
+ "step": 1960
2393
+ },
2394
+ {
2395
+ "epoch": 4.09,
2396
+ "learning_rate": 0.0001055848889516962,
2397
+ "loss": 0.7253,
2398
+ "step": 1965
2399
+ },
2400
+ {
2401
+ "epoch": 4.1,
2402
+ "learning_rate": 0.00010367470570488607,
2403
+ "loss": 0.6737,
2404
+ "step": 1970
2405
+ },
2406
+ {
2407
+ "epoch": 4.11,
2408
+ "learning_rate": 0.00010172696358468322,
2409
+ "loss": 0.602,
2410
+ "step": 1975
2411
+ },
2412
+ {
2413
+ "epoch": 4.12,
2414
+ "learning_rate": 9.974374828213395e-05,
2415
+ "loss": 0.7079,
2416
+ "step": 1980
2417
+ },
2418
+ {
2419
+ "epoch": 4.14,
2420
+ "learning_rate": 9.772718347385836e-05,
2421
+ "loss": 0.8964,
2422
+ "step": 1985
2423
+ },
2424
+ {
2425
+ "epoch": 4.15,
2426
+ "learning_rate": 9.567942854796078e-05,
2427
+ "loss": 0.4818,
2428
+ "step": 1990
2429
+ },
2430
+ {
2431
+ "epoch": 4.16,
2432
+ "learning_rate": 9.360267629171191e-05,
2433
+ "loss": 0.7278,
2434
+ "step": 1995
2435
+ },
2436
+ {
2437
+ "epoch": 4.17,
2438
+ "learning_rate": 9.149915054343752e-05,
2439
+ "loss": 0.6072,
2440
+ "step": 2000
2441
+ },
2442
+ {
2443
+ "epoch": 4.18,
2444
+ "learning_rate": 8.937110381119999e-05,
2445
+ "loss": 0.8248,
2446
+ "step": 2005
2447
+ },
2448
+ {
2449
+ "epoch": 4.19,
2450
+ "learning_rate": 8.722081486074574e-05,
2451
+ "loss": 0.8012,
2452
+ "step": 2010
2453
+ },
2454
+ {
2455
+ "epoch": 4.2,
2456
+ "learning_rate": 8.50505862753343e-05,
2457
+ "loss": 0.542,
2458
+ "step": 2015
2459
+ },
2460
+ {
2461
+ "epoch": 4.21,
2462
+ "learning_rate": 8.286274199009886e-05,
2463
+ "loss": 0.5911,
2464
+ "step": 2020
2465
+ },
2466
+ {
2467
+ "epoch": 4.22,
2468
+ "learning_rate": 8.06596248034977e-05,
2469
+ "loss": 0.6034,
2470
+ "step": 2025
2471
+ },
2472
+ {
2473
+ "epoch": 4.23,
2474
+ "learning_rate": 7.844359386859629e-05,
2475
+ "loss": 0.6177,
2476
+ "step": 2030
2477
+ },
2478
+ {
2479
+ "epoch": 4.24,
2480
+ "learning_rate": 7.621702216682333e-05,
2481
+ "loss": 0.7298,
2482
+ "step": 2035
2483
+ },
2484
+ {
2485
+ "epoch": 4.25,
2486
+ "learning_rate": 7.398229396693239e-05,
2487
+ "loss": 0.51,
2488
+ "step": 2040
2489
+ },
2490
+ {
2491
+ "epoch": 4.26,
2492
+ "learning_rate": 7.174180227184603e-05,
2493
+ "loss": 0.8855,
2494
+ "step": 2045
2495
+ },
2496
+ {
2497
+ "epoch": 4.27,
2498
+ "learning_rate": 6.94979462561934e-05,
2499
+ "loss": 0.6728,
2500
+ "step": 2050
2501
+ },
2502
+ {
2503
+ "epoch": 4.28,
2504
+ "learning_rate": 6.725312869720243e-05,
2505
+ "loss": 0.4899,
2506
+ "step": 2055
2507
+ },
2508
+ {
2509
+ "epoch": 4.29,
2510
+ "learning_rate": 6.500975340173405e-05,
2511
+ "loss": 0.4827,
2512
+ "step": 2060
2513
+ },
2514
+ {
2515
+ "epoch": 4.3,
2516
+ "learning_rate": 6.27702226322509e-05,
2517
+ "loss": 0.6521,
2518
+ "step": 2065
2519
+ },
2520
+ {
2521
+ "epoch": 4.31,
2522
+ "learning_rate": 6.05369345343951e-05,
2523
+ "loss": 0.5115,
2524
+ "step": 2070
2525
+ },
2526
+ {
2527
+ "epoch": 4.32,
2528
+ "learning_rate": 5.831228056899374e-05,
2529
+ "loss": 0.3792,
2530
+ "step": 2075
2531
+ },
2532
+ {
2533
+ "epoch": 4.33,
2534
+ "learning_rate": 5.609864295123839e-05,
2535
+ "loss": 0.5564,
2536
+ "step": 2080
2537
+ },
2538
+ {
2539
+ "epoch": 4.34,
2540
+ "learning_rate": 5.389839209973954e-05,
2541
+ "loss": 0.8714,
2542
+ "step": 2085
2543
+ },
2544
+ {
2545
+ "epoch": 4.35,
2546
+ "learning_rate": 5.171388409821159e-05,
2547
+ "loss": 0.4843,
2548
+ "step": 2090
2549
+ },
2550
+ {
2551
+ "epoch": 4.36,
2552
+ "learning_rate": 4.9547458172542385e-05,
2553
+ "loss": 0.7155,
2554
+ "step": 2095
2555
+ },
2556
+ {
2557
+ "epoch": 4.38,
2558
+ "learning_rate": 4.74014341858799e-05,
2559
+ "loss": 0.4362,
2560
+ "step": 2100
2561
+ },
2562
+ {
2563
+ "epoch": 4.39,
2564
+ "learning_rate": 4.5278110154455465e-05,
2565
+ "loss": 0.4663,
2566
+ "step": 2105
2567
+ },
2568
+ {
2569
+ "epoch": 4.4,
2570
+ "learning_rate": 4.317975978682545e-05,
2571
+ "loss": 0.5994,
2572
+ "step": 2110
2573
+ },
2574
+ {
2575
+ "epoch": 4.41,
2576
+ "learning_rate": 4.110863004912692e-05,
2577
+ "loss": 0.4463,
2578
+ "step": 2115
2579
+ },
2580
+ {
2581
+ "epoch": 4.42,
2582
+ "learning_rate": 3.9066938758951916e-05,
2583
+ "loss": 0.6226,
2584
+ "step": 2120
2585
+ },
2586
+ {
2587
+ "epoch": 4.43,
2588
+ "learning_rate": 3.7056872210476396e-05,
2589
+ "loss": 0.5947,
2590
+ "step": 2125
2591
+ },
2592
+ {
2593
+ "epoch": 4.44,
2594
+ "learning_rate": 3.508058283331054e-05,
2595
+ "loss": 0.5276,
2596
+ "step": 2130
2597
+ },
2598
+ {
2599
+ "epoch": 4.45,
2600
+ "learning_rate": 3.314018688761322e-05,
2601
+ "loss": 0.5118,
2602
+ "step": 2135
2603
+ },
2604
+ {
2605
+ "epoch": 4.46,
2606
+ "learning_rate": 3.123776219796979e-05,
2607
+ "loss": 0.6581,
2608
+ "step": 2140
2609
+ },
2610
+ {
2611
+ "epoch": 4.47,
2612
+ "learning_rate": 2.937534592838677e-05,
2613
+ "loss": 0.6129,
2614
+ "step": 2145
2615
+ },
2616
+ {
2617
+ "epoch": 4.48,
2618
+ "learning_rate": 2.7554932400854067e-05,
2619
+ "loss": 0.6305,
2620
+ "step": 2150
2621
+ },
2622
+ {
2623
+ "epoch": 4.49,
2624
+ "learning_rate": 2.577847095977183e-05,
2625
+ "loss": 0.6368,
2626
+ "step": 2155
2627
+ },
2628
+ {
2629
+ "epoch": 4.5,
2630
+ "learning_rate": 2.4047863884551244e-05,
2631
+ "loss": 0.6684,
2632
+ "step": 2160
2633
+ },
2634
+ {
2635
+ "epoch": 4.51,
2636
+ "learning_rate": 2.2364964352589423e-05,
2637
+ "loss": 0.7136,
2638
+ "step": 2165
2639
+ },
2640
+ {
2641
+ "epoch": 4.52,
2642
+ "learning_rate": 2.0731574454860283e-05,
2643
+ "loss": 0.5243,
2644
+ "step": 2170
2645
+ },
2646
+ {
2647
+ "epoch": 4.53,
2648
+ "learning_rate": 1.914944326618096e-05,
2649
+ "loss": 0.665,
2650
+ "step": 2175
2651
+ },
2652
+ {
2653
+ "epoch": 4.54,
2654
+ "learning_rate": 1.7620264972250878e-05,
2655
+ "loss": 0.5977,
2656
+ "step": 2180
2657
+ },
2658
+ {
2659
+ "epoch": 4.55,
2660
+ "learning_rate": 1.6145677055492958e-05,
2661
+ "loss": 0.7612,
2662
+ "step": 2185
2663
+ },
2664
+ {
2665
+ "epoch": 4.56,
2666
+ "learning_rate": 1.4727258541581522e-05,
2667
+ "loss": 0.7172,
2668
+ "step": 2190
2669
+ },
2670
+ {
2671
+ "epoch": 4.57,
2672
+ "learning_rate": 1.3366528308588766e-05,
2673
+ "loss": 0.6571,
2674
+ "step": 2195
2675
+ },
2676
+ {
2677
+ "epoch": 4.58,
2678
+ "learning_rate": 1.2064943460529662e-05,
2679
+ "loss": 0.664,
2680
+ "step": 2200
2681
+ },
2682
+ {
2683
+ "epoch": 4.59,
2684
+ "learning_rate": 1.0823897767063955e-05,
2685
+ "loss": 1.1318,
2686
+ "step": 2205
2687
+ },
2688
+ {
2689
+ "epoch": 4.6,
2690
+ "learning_rate": 9.644720171000711e-06,
2691
+ "loss": 0.5895,
2692
+ "step": 2210
2693
+ },
2694
+ {
2695
+ "epoch": 4.61,
2696
+ "learning_rate": 8.52867336524718e-06,
2697
+ "loss": 0.5683,
2698
+ "step": 2215
2699
+ },
2700
+ {
2701
+ "epoch": 4.62,
2702
+ "learning_rate": 7.476952440678663e-06,
2703
+ "loss": 0.8862,
2704
+ "step": 2220
2705
+ },
2706
+ {
2707
+ "epoch": 4.64,
2708
+ "learning_rate": 6.490683606400273e-06,
2709
+ "loss": 0.5336,
2710
+ "step": 2225
2711
+ },
2712
+ {
2713
+ "epoch": 4.65,
2714
+ "learning_rate": 5.570922983785744e-06,
2715
+ "loss": 0.7185,
2716
+ "step": 2230
2717
+ },
2718
+ {
2719
+ "epoch": 4.66,
2720
+ "learning_rate": 4.718655475547088e-06,
2721
+ "loss": 0.6121,
2722
+ "step": 2235
2723
+ },
2724
+ {
2725
+ "epoch": 4.67,
2726
+ "learning_rate": 3.934793711076979e-06,
2727
+ "loss": 0.4995,
2728
+ "step": 2240
2729
+ },
2730
+ {
2731
+ "epoch": 4.68,
2732
+ "learning_rate": 3.2201770691879365e-06,
2733
+ "loss": 0.639,
2734
+ "step": 2245
2735
+ },
2736
+ {
2737
+ "epoch": 4.69,
2738
+ "learning_rate": 2.5755707792803602e-06,
2739
+ "loss": 0.6703,
2740
+ "step": 2250
2741
+ },
2742
+ {
2743
+ "epoch": 4.7,
2744
+ "learning_rate": 2.001665101912862e-06,
2745
+ "loss": 0.5835,
2746
+ "step": 2255
2747
+ },
2748
+ {
2749
+ "epoch": 4.71,
2750
+ "learning_rate": 1.499074589660724e-06,
2751
+ "loss": 0.8827,
2752
+ "step": 2260
2753
+ },
2754
+ {
2755
+ "epoch": 4.72,
2756
+ "learning_rate": 1.0683374290336727e-06,
2757
+ "loss": 0.521,
2758
+ "step": 2265
2759
+ },
2760
+ {
2761
+ "epoch": 4.73,
2762
+ "learning_rate": 7.099148641708803e-07,
2763
+ "loss": 0.5759,
2764
+ "step": 2270
2765
+ },
2766
+ {
2767
+ "epoch": 4.74,
2768
+ "learning_rate": 4.2419070293249417e-07,
2769
+ "loss": 0.4576,
2770
+ "step": 2275
2771
+ },
2772
+ {
2773
+ "epoch": 4.75,
2774
+ "learning_rate": 2.1147090590762733e-07,
2775
+ "loss": 0.9183,
2776
+ "step": 2280
2777
+ },
2778
+ {
2779
+ "epoch": 4.76,
2780
+ "learning_rate": 7.19832587822494e-08,
2781
+ "loss": 0.6811,
2782
+ "step": 2285
2783
+ },
2784
+ {
2785
+ "epoch": 4.77,
2786
+ "learning_rate": 5.877128423112166e-09,
2787
+ "loss": 0.6599,
2788
+ "step": 2290
2789
+ },
2790
+ {
2791
+ "epoch": 4.78,
2792
+ "learning_rate": 1.3223302930315373e-08,
2793
+ "loss": 0.8066,
2794
+ "step": 2295
2795
+ },
2796
+ {
2797
+ "epoch": 4.79,
2798
+ "learning_rate": 9.401391583621219e-08,
2799
+ "loss": 0.93,
2800
+ "step": 2300
2801
+ },
2802
+ {
2803
+ "epoch": 4.8,
2804
+ "learning_rate": 2.4816245452881085e-07,
2805
+ "loss": 0.7526,
2806
+ "step": 2305
2807
+ },
2808
+ {
2809
+ "epoch": 4.81,
2810
+ "learning_rate": 4.755038528919192e-07,
2811
+ "loss": 0.6209,
2812
+ "step": 2310
2813
+ },
2814
+ {
2815
+ "epoch": 4.82,
2816
+ "learning_rate": 7.757946680618806e-07,
2817
+ "loss": 0.5472,
2818
+ "step": 2315
2819
+ },
2820
+ {
2821
+ "epoch": 4.83,
2822
+ "learning_rate": 1.1487133411127057e-06,
2823
+ "loss": 0.5128,
2824
+ "step": 2320
2825
+ },
2826
+ {
2827
+ "epoch": 4.84,
2828
+ "learning_rate": 1.5938605413876288e-06,
2829
+ "loss": 0.5971,
2830
+ "step": 2325
2831
+ },
2832
+ {
2833
+ "epoch": 4.85,
2834
+ "learning_rate": 2.1107595941164267e-06,
2835
+ "loss": 0.9001,
2836
+ "step": 2330
2837
+ },
2838
+ {
2839
+ "epoch": 4.86,
2840
+ "learning_rate": 2.698856990845814e-06,
2841
+ "loss": 0.624,
2842
+ "step": 2335
2843
+ },
2844
+ {
2845
+ "epoch": 4.88,
2846
+ "learning_rate": 3.3575229821519346e-06,
2847
+ "loss": 0.469,
2848
+ "step": 2340
2849
+ },
2850
+ {
2851
+ "epoch": 4.89,
2852
+ "learning_rate": 4.086052251995796e-06,
2853
+ "loss": 0.7627,
2854
+ "step": 2345
2855
+ },
2856
+ {
2857
+ "epoch": 4.9,
2858
+ "learning_rate": 4.883664672985776e-06,
2859
+ "loss": 0.5373,
2860
+ "step": 2350
2861
+ },
2862
+ {
2863
+ "epoch": 4.91,
2864
+ "learning_rate": 5.749506141764069e-06,
2865
+ "loss": 0.684,
2866
+ "step": 2355
2867
+ },
2868
+ {
2869
+ "epoch": 4.92,
2870
+ "learning_rate": 6.682649493598944e-06,
2871
+ "loss": 0.6701,
2872
+ "step": 2360
2873
+ },
2874
+ {
2875
+ "epoch": 4.93,
2876
+ "learning_rate": 7.682095495217462e-06,
2877
+ "loss": 0.5721,
2878
+ "step": 2365
2879
+ },
2880
+ {
2881
+ "epoch": 4.94,
2882
+ "learning_rate": 8.746773914805009e-06,
2883
+ "loss": 0.5432,
2884
+ "step": 2370
2885
+ },
2886
+ {
2887
+ "epoch": 4.95,
2888
+ "learning_rate": 9.875544668046303e-06,
2889
+ "loss": 0.6582,
2890
+ "step": 2375
2891
+ },
2892
+ {
2893
+ "epoch": 4.96,
2894
+ "learning_rate": 1.1067199038944353e-05,
2895
+ "loss": 0.5341,
2896
+ "step": 2380
2897
+ },
2898
+ {
2899
+ "epoch": 4.97,
2900
+ "learning_rate": 1.2320460974148876e-05,
2901
+ "loss": 0.677,
2902
+ "step": 2385
2903
+ },
2904
+ {
2905
+ "epoch": 4.98,
2906
+ "learning_rate": 1.3633988449393231e-05,
2907
+ "loss": 0.5661,
2908
+ "step": 2390
2909
+ },
2910
+ {
2911
+ "epoch": 4.99,
2912
+ "learning_rate": 1.5006374906553291e-05,
2913
+ "loss": 0.4745,
2914
+ "step": 2395
2915
+ },
2916
+ {
2917
+ "epoch": 5.0,
2918
+ "learning_rate": 1.6436150759836066e-05,
2919
+ "loss": 0.746,
2920
+ "step": 2400
2921
+ },
2922
+ {
2923
+ "epoch": 5.0,
2924
+ "eval_loss": 0.3943726122379303,
2925
+ "eval_runtime": 15.4496,
2926
+ "eval_samples_per_second": 44.661,
2927
+ "eval_steps_per_second": 5.631,
2928
+ "step": 2400
2929
  }
2930
  ],
2931
+ "max_steps": 2400,
2932
+ "num_train_epochs": 5,
2933
+ "total_flos": 2505790586880000.0,
2934
  "trial_name": null,
2935
  "trial_params": null
2936
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28f78ff1353e48b556a427f64b800f3a4cfdb95bdf8553eb6558c9abb866bf9e
3
  size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16468c38c400c76b00da680c27696240fef059a3ea4837c5eeffa1b293611cac
3
  size 3375