Training in progress, step 4000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a45987252e54dc35108e11e93cd15c2f7eff117407dadcc866c536c9fe38d549
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1032541ab3e6eca2a68a25836b91b06b42d61052c34cec2e6dfe0544f185dcf0
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:329a377c90ca49d3bcb8c01bcb7bdf9bc769af05915d36720b3201a9c222f867
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f34721a2fd924d02bdad3691f09e25bcb5ed140f7982be7b710c4ccbd2538c0
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -2521,6 +2521,364 @@
|
|
| 2521 |
"eval_samples_per_second": 266.992,
|
| 2522 |
"eval_steps_per_second": 5.607,
|
| 2523 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2524 |
}
|
| 2525 |
],
|
| 2526 |
"logging_steps": 10,
|
|
@@ -2540,7 +2898,7 @@
|
|
| 2540 |
"attributes": {}
|
| 2541 |
}
|
| 2542 |
},
|
| 2543 |
-
"total_flos": 1.
|
| 2544 |
"train_batch_size": 48,
|
| 2545 |
"trial_name": null,
|
| 2546 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.6757898293630681,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 4000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 2521 |
"eval_samples_per_second": 266.992,
|
| 2522 |
"eval_steps_per_second": 5.607,
|
| 2523 |
"step": 3500
|
| 2524 |
+
},
|
| 2525 |
+
{
|
| 2526 |
+
"epoch": 0.5930055752660922,
|
| 2527 |
+
"grad_norm": 0.6172225475311279,
|
| 2528 |
+
"learning_rate": 0.00028291929128998293,
|
| 2529 |
+
"loss": 4.818932342529297,
|
| 2530 |
+
"step": 3510
|
| 2531 |
+
},
|
| 2532 |
+
{
|
| 2533 |
+
"epoch": 0.5946950498395,
|
| 2534 |
+
"grad_norm": 0.6093600392341614,
|
| 2535 |
+
"learning_rate": 0.00028269662709221635,
|
| 2536 |
+
"loss": 4.821521759033203,
|
| 2537 |
+
"step": 3520
|
| 2538 |
+
},
|
| 2539 |
+
{
|
| 2540 |
+
"epoch": 0.5963845244129076,
|
| 2541 |
+
"grad_norm": 0.5978082418441772,
|
| 2542 |
+
"learning_rate": 0.00028247260974544037,
|
| 2543 |
+
"loss": 4.8287818908691404,
|
| 2544 |
+
"step": 3530
|
| 2545 |
+
},
|
| 2546 |
+
{
|
| 2547 |
+
"epoch": 0.5980739989863153,
|
| 2548 |
+
"grad_norm": 0.6068260073661804,
|
| 2549 |
+
"learning_rate": 0.00028224724153403015,
|
| 2550 |
+
"loss": 4.830426025390625,
|
| 2551 |
+
"step": 3540
|
| 2552 |
+
},
|
| 2553 |
+
{
|
| 2554 |
+
"epoch": 0.5997634735597229,
|
| 2555 |
+
"grad_norm": 0.5650802850723267,
|
| 2556 |
+
"learning_rate": 0.0002820205247561356,
|
| 2557 |
+
"loss": 4.82833251953125,
|
| 2558 |
+
"step": 3550
|
| 2559 |
+
},
|
| 2560 |
+
{
|
| 2561 |
+
"epoch": 0.6014529481331305,
|
| 2562 |
+
"grad_norm": 0.5627108812332153,
|
| 2563 |
+
"learning_rate": 0.0002817924617236587,
|
| 2564 |
+
"loss": 4.835605239868164,
|
| 2565 |
+
"step": 3560
|
| 2566 |
+
},
|
| 2567 |
+
{
|
| 2568 |
+
"epoch": 0.6031424227065383,
|
| 2569 |
+
"grad_norm": 0.5983113050460815,
|
| 2570 |
+
"learning_rate": 0.00028156305476222966,
|
| 2571 |
+
"loss": 4.8316596984863285,
|
| 2572 |
+
"step": 3570
|
| 2573 |
+
},
|
| 2574 |
+
{
|
| 2575 |
+
"epoch": 0.6048318972799459,
|
| 2576 |
+
"grad_norm": 0.6292333006858826,
|
| 2577 |
+
"learning_rate": 0.0002813323062111828,
|
| 2578 |
+
"loss": 4.8177978515625,
|
| 2579 |
+
"step": 3580
|
| 2580 |
+
},
|
| 2581 |
+
{
|
| 2582 |
+
"epoch": 0.6065213718533536,
|
| 2583 |
+
"grad_norm": 0.5784122943878174,
|
| 2584 |
+
"learning_rate": 0.0002811002184235334,
|
| 2585 |
+
"loss": 4.801444625854492,
|
| 2586 |
+
"step": 3590
|
| 2587 |
+
},
|
| 2588 |
+
{
|
| 2589 |
+
"epoch": 0.6082108464267613,
|
| 2590 |
+
"grad_norm": 0.598548173904419,
|
| 2591 |
+
"learning_rate": 0.00028086679376595314,
|
| 2592 |
+
"loss": 4.825639343261718,
|
| 2593 |
+
"step": 3600
|
| 2594 |
+
},
|
| 2595 |
+
{
|
| 2596 |
+
"epoch": 0.609900321000169,
|
| 2597 |
+
"grad_norm": 0.6459059715270996,
|
| 2598 |
+
"learning_rate": 0.00028063203461874635,
|
| 2599 |
+
"loss": 4.819043350219727,
|
| 2600 |
+
"step": 3610
|
| 2601 |
+
},
|
| 2602 |
+
{
|
| 2603 |
+
"epoch": 0.6115897955735766,
|
| 2604 |
+
"grad_norm": 0.6150253415107727,
|
| 2605 |
+
"learning_rate": 0.0002803959433758254,
|
| 2606 |
+
"loss": 4.791939926147461,
|
| 2607 |
+
"step": 3620
|
| 2608 |
+
},
|
| 2609 |
+
{
|
| 2610 |
+
"epoch": 0.6132792701469842,
|
| 2611 |
+
"grad_norm": 0.6212024092674255,
|
| 2612 |
+
"learning_rate": 0.0002801585224446866,
|
| 2613 |
+
"loss": 4.81633415222168,
|
| 2614 |
+
"step": 3630
|
| 2615 |
+
},
|
| 2616 |
+
{
|
| 2617 |
+
"epoch": 0.614968744720392,
|
| 2618 |
+
"grad_norm": 0.677480936050415,
|
| 2619 |
+
"learning_rate": 0.0002799197742463854,
|
| 2620 |
+
"loss": 4.800606918334961,
|
| 2621 |
+
"step": 3640
|
| 2622 |
+
},
|
| 2623 |
+
{
|
| 2624 |
+
"epoch": 0.6166582192937996,
|
| 2625 |
+
"grad_norm": 0.5394392609596252,
|
| 2626 |
+
"learning_rate": 0.0002796797012155118,
|
| 2627 |
+
"loss": 4.792086791992188,
|
| 2628 |
+
"step": 3650
|
| 2629 |
+
},
|
| 2630 |
+
{
|
| 2631 |
+
"epoch": 0.6183476938672073,
|
| 2632 |
+
"grad_norm": 0.6263740658760071,
|
| 2633 |
+
"learning_rate": 0.0002794383058001657,
|
| 2634 |
+
"loss": 4.8161579132080075,
|
| 2635 |
+
"step": 3660
|
| 2636 |
+
},
|
| 2637 |
+
{
|
| 2638 |
+
"epoch": 0.620037168440615,
|
| 2639 |
+
"grad_norm": 0.5984665751457214,
|
| 2640 |
+
"learning_rate": 0.00027919559046193156,
|
| 2641 |
+
"loss": 4.80903434753418,
|
| 2642 |
+
"step": 3670
|
| 2643 |
+
},
|
| 2644 |
+
{
|
| 2645 |
+
"epoch": 0.6217266430140226,
|
| 2646 |
+
"grad_norm": 0.570600688457489,
|
| 2647 |
+
"learning_rate": 0.0002789515576758536,
|
| 2648 |
+
"loss": 4.814385986328125,
|
| 2649 |
+
"step": 3680
|
| 2650 |
+
},
|
| 2651 |
+
{
|
| 2652 |
+
"epoch": 0.6234161175874303,
|
| 2653 |
+
"grad_norm": 0.6285440325737,
|
| 2654 |
+
"learning_rate": 0.00027870620993041055,
|
| 2655 |
+
"loss": 4.766680908203125,
|
| 2656 |
+
"step": 3690
|
| 2657 |
+
},
|
| 2658 |
+
{
|
| 2659 |
+
"epoch": 0.6251055921608379,
|
| 2660 |
+
"grad_norm": 0.6015235185623169,
|
| 2661 |
+
"learning_rate": 0.00027845954972749004,
|
| 2662 |
+
"loss": 4.784580230712891,
|
| 2663 |
+
"step": 3700
|
| 2664 |
+
},
|
| 2665 |
+
{
|
| 2666 |
+
"epoch": 0.6267950667342457,
|
| 2667 |
+
"grad_norm": 0.5941788554191589,
|
| 2668 |
+
"learning_rate": 0.0002782115795823633,
|
| 2669 |
+
"loss": 4.8005828857421875,
|
| 2670 |
+
"step": 3710
|
| 2671 |
+
},
|
| 2672 |
+
{
|
| 2673 |
+
"epoch": 0.6284845413076533,
|
| 2674 |
+
"grad_norm": 0.5614147782325745,
|
| 2675 |
+
"learning_rate": 0.0002779623020236594,
|
| 2676 |
+
"loss": 4.785182952880859,
|
| 2677 |
+
"step": 3720
|
| 2678 |
+
},
|
| 2679 |
+
{
|
| 2680 |
+
"epoch": 0.630174015881061,
|
| 2681 |
+
"grad_norm": 0.6022568941116333,
|
| 2682 |
+
"learning_rate": 0.00027771171959333976,
|
| 2683 |
+
"loss": 4.825439453125,
|
| 2684 |
+
"step": 3730
|
| 2685 |
+
},
|
| 2686 |
+
{
|
| 2687 |
+
"epoch": 0.6318634904544687,
|
| 2688 |
+
"grad_norm": 0.5763116478919983,
|
| 2689 |
+
"learning_rate": 0.00027745983484667164,
|
| 2690 |
+
"loss": 4.781736373901367,
|
| 2691 |
+
"step": 3740
|
| 2692 |
+
},
|
| 2693 |
+
{
|
| 2694 |
+
"epoch": 0.6335529650278763,
|
| 2695 |
+
"grad_norm": 0.5323732495307922,
|
| 2696 |
+
"learning_rate": 0.0002772066503522026,
|
| 2697 |
+
"loss": 4.793933868408203,
|
| 2698 |
+
"step": 3750
|
| 2699 |
+
},
|
| 2700 |
+
{
|
| 2701 |
+
"epoch": 0.635242439601284,
|
| 2702 |
+
"grad_norm": 0.6131083369255066,
|
| 2703 |
+
"learning_rate": 0.00027695216869173415,
|
| 2704 |
+
"loss": 4.761587905883789,
|
| 2705 |
+
"step": 3760
|
| 2706 |
+
},
|
| 2707 |
+
{
|
| 2708 |
+
"epoch": 0.6369319141746916,
|
| 2709 |
+
"grad_norm": 0.6051084399223328,
|
| 2710 |
+
"learning_rate": 0.0002766963924602953,
|
| 2711 |
+
"loss": 4.781452941894531,
|
| 2712 |
+
"step": 3770
|
| 2713 |
+
},
|
| 2714 |
+
{
|
| 2715 |
+
"epoch": 0.6386213887480994,
|
| 2716 |
+
"grad_norm": 0.5932102203369141,
|
| 2717 |
+
"learning_rate": 0.00027643932426611647,
|
| 2718 |
+
"loss": 4.7832183837890625,
|
| 2719 |
+
"step": 3780
|
| 2720 |
+
},
|
| 2721 |
+
{
|
| 2722 |
+
"epoch": 0.640310863321507,
|
| 2723 |
+
"grad_norm": 0.5552225708961487,
|
| 2724 |
+
"learning_rate": 0.0002761809667306022,
|
| 2725 |
+
"loss": 4.767021560668946,
|
| 2726 |
+
"step": 3790
|
| 2727 |
+
},
|
| 2728 |
+
{
|
| 2729 |
+
"epoch": 0.6420003378949147,
|
| 2730 |
+
"grad_norm": 0.6563674211502075,
|
| 2731 |
+
"learning_rate": 0.00027592132248830526,
|
| 2732 |
+
"loss": 4.7745105743408205,
|
| 2733 |
+
"step": 3800
|
| 2734 |
+
},
|
| 2735 |
+
{
|
| 2736 |
+
"epoch": 0.6436898124683224,
|
| 2737 |
+
"grad_norm": 0.5460559725761414,
|
| 2738 |
+
"learning_rate": 0.00027566039418689905,
|
| 2739 |
+
"loss": 4.758267593383789,
|
| 2740 |
+
"step": 3810
|
| 2741 |
+
},
|
| 2742 |
+
{
|
| 2743 |
+
"epoch": 0.64537928704173,
|
| 2744 |
+
"grad_norm": 0.5483755469322205,
|
| 2745 |
+
"learning_rate": 0.00027539818448715124,
|
| 2746 |
+
"loss": 4.763653182983399,
|
| 2747 |
+
"step": 3820
|
| 2748 |
+
},
|
| 2749 |
+
{
|
| 2750 |
+
"epoch": 0.6470687616151377,
|
| 2751 |
+
"grad_norm": 0.5985057950019836,
|
| 2752 |
+
"learning_rate": 0.000275134696062896,
|
| 2753 |
+
"loss": 4.773738098144531,
|
| 2754 |
+
"step": 3830
|
| 2755 |
+
},
|
| 2756 |
+
{
|
| 2757 |
+
"epoch": 0.6487582361885453,
|
| 2758 |
+
"grad_norm": 0.5881696939468384,
|
| 2759 |
+
"learning_rate": 0.0002748699316010073,
|
| 2760 |
+
"loss": 4.750997161865234,
|
| 2761 |
+
"step": 3840
|
| 2762 |
+
},
|
| 2763 |
+
{
|
| 2764 |
+
"epoch": 0.6504477107619531,
|
| 2765 |
+
"grad_norm": 0.5993970632553101,
|
| 2766 |
+
"learning_rate": 0.000274603893801371,
|
| 2767 |
+
"loss": 4.757858657836914,
|
| 2768 |
+
"step": 3850
|
| 2769 |
+
},
|
| 2770 |
+
{
|
| 2771 |
+
"epoch": 0.6521371853353607,
|
| 2772 |
+
"grad_norm": 0.5355719327926636,
|
| 2773 |
+
"learning_rate": 0.000274336585376858,
|
| 2774 |
+
"loss": 4.756174468994141,
|
| 2775 |
+
"step": 3860
|
| 2776 |
+
},
|
| 2777 |
+
{
|
| 2778 |
+
"epoch": 0.6538266599087684,
|
| 2779 |
+
"grad_norm": 0.5604658722877502,
|
| 2780 |
+
"learning_rate": 0.0002740680090532958,
|
| 2781 |
+
"loss": 4.771471786499023,
|
| 2782 |
+
"step": 3870
|
| 2783 |
+
},
|
| 2784 |
+
{
|
| 2785 |
+
"epoch": 0.655516134482176,
|
| 2786 |
+
"grad_norm": 0.5580816268920898,
|
| 2787 |
+
"learning_rate": 0.0002737981675694411,
|
| 2788 |
+
"loss": 4.767171859741211,
|
| 2789 |
+
"step": 3880
|
| 2790 |
+
},
|
| 2791 |
+
{
|
| 2792 |
+
"epoch": 0.6572056090555837,
|
| 2793 |
+
"grad_norm": 0.5741564035415649,
|
| 2794 |
+
"learning_rate": 0.00027352706367695203,
|
| 2795 |
+
"loss": 4.755613708496094,
|
| 2796 |
+
"step": 3890
|
| 2797 |
+
},
|
| 2798 |
+
{
|
| 2799 |
+
"epoch": 0.6588950836289914,
|
| 2800 |
+
"grad_norm": 0.5679869055747986,
|
| 2801 |
+
"learning_rate": 0.00027325470014035965,
|
| 2802 |
+
"loss": 4.768374252319336,
|
| 2803 |
+
"step": 3900
|
| 2804 |
+
},
|
| 2805 |
+
{
|
| 2806 |
+
"epoch": 0.660584558202399,
|
| 2807 |
+
"grad_norm": 0.5516665577888489,
|
| 2808 |
+
"learning_rate": 0.0002729810797370402,
|
| 2809 |
+
"loss": 4.754274368286133,
|
| 2810 |
+
"step": 3910
|
| 2811 |
+
},
|
| 2812 |
+
{
|
| 2813 |
+
"epoch": 0.6622740327758068,
|
| 2814 |
+
"grad_norm": 0.572651743888855,
|
| 2815 |
+
"learning_rate": 0.00027270620525718647,
|
| 2816 |
+
"loss": 4.740098190307617,
|
| 2817 |
+
"step": 3920
|
| 2818 |
+
},
|
| 2819 |
+
{
|
| 2820 |
+
"epoch": 0.6639635073492144,
|
| 2821 |
+
"grad_norm": 0.5252317190170288,
|
| 2822 |
+
"learning_rate": 0.0002724300795037796,
|
| 2823 |
+
"loss": 4.781079864501953,
|
| 2824 |
+
"step": 3930
|
| 2825 |
+
},
|
| 2826 |
+
{
|
| 2827 |
+
"epoch": 0.665652981922622,
|
| 2828 |
+
"grad_norm": 0.578183114528656,
|
| 2829 |
+
"learning_rate": 0.00027215270529256015,
|
| 2830 |
+
"loss": 4.738787460327148,
|
| 2831 |
+
"step": 3940
|
| 2832 |
+
},
|
| 2833 |
+
{
|
| 2834 |
+
"epoch": 0.6673424564960297,
|
| 2835 |
+
"grad_norm": 0.6071267127990723,
|
| 2836 |
+
"learning_rate": 0.00027187408545199977,
|
| 2837 |
+
"loss": 4.73607177734375,
|
| 2838 |
+
"step": 3950
|
| 2839 |
+
},
|
| 2840 |
+
{
|
| 2841 |
+
"epoch": 0.6690319310694374,
|
| 2842 |
+
"grad_norm": 0.5916706919670105,
|
| 2843 |
+
"learning_rate": 0.00027159422282327204,
|
| 2844 |
+
"loss": 4.747200775146484,
|
| 2845 |
+
"step": 3960
|
| 2846 |
+
},
|
| 2847 |
+
{
|
| 2848 |
+
"epoch": 0.6707214056428451,
|
| 2849 |
+
"grad_norm": 0.5538118481636047,
|
| 2850 |
+
"learning_rate": 0.0002713131202602238,
|
| 2851 |
+
"loss": 4.765713119506836,
|
| 2852 |
+
"step": 3970
|
| 2853 |
+
},
|
| 2854 |
+
{
|
| 2855 |
+
"epoch": 0.6724108802162527,
|
| 2856 |
+
"grad_norm": 0.5321049094200134,
|
| 2857 |
+
"learning_rate": 0.0002710307806293458,
|
| 2858 |
+
"loss": 4.7143207550048825,
|
| 2859 |
+
"step": 3980
|
| 2860 |
+
},
|
| 2861 |
+
{
|
| 2862 |
+
"epoch": 0.6741003547896605,
|
| 2863 |
+
"grad_norm": 0.5859444737434387,
|
| 2864 |
+
"learning_rate": 0.0002707472068097435,
|
| 2865 |
+
"loss": 4.749985122680664,
|
| 2866 |
+
"step": 3990
|
| 2867 |
+
},
|
| 2868 |
+
{
|
| 2869 |
+
"epoch": 0.6757898293630681,
|
| 2870 |
+
"grad_norm": 0.520622730255127,
|
| 2871 |
+
"learning_rate": 0.0002704624016931079,
|
| 2872 |
+
"loss": 4.7440532684326175,
|
| 2873 |
+
"step": 4000
|
| 2874 |
+
},
|
| 2875 |
+
{
|
| 2876 |
+
"epoch": 0.6757898293630681,
|
| 2877 |
+
"eval_loss": 4.7194108963012695,
|
| 2878 |
+
"eval_runtime": 3.7971,
|
| 2879 |
+
"eval_samples_per_second": 263.356,
|
| 2880 |
+
"eval_steps_per_second": 5.53,
|
| 2881 |
+
"step": 4000
|
| 2882 |
}
|
| 2883 |
],
|
| 2884 |
"logging_steps": 10,
|
|
|
|
| 2898 |
"attributes": {}
|
| 2899 |
}
|
| 2900 |
},
|
| 2901 |
+
"total_flos": 1.33782728343552e+17,
|
| 2902 |
"train_batch_size": 48,
|
| 2903 |
"trial_name": null,
|
| 2904 |
"trial_params": null
|