deepmoney-34b-200k-chat-evaluator / trainer_state.json
TriadParty's picture
Upload 4 files
f87d227
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9907192575406034,
"eval_steps": 500,
"global_step": 860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2.998999272555359e-05,
"loss": 1.0627,
"step": 10
},
{
"epoch": 0.09,
"learning_rate": 2.9959984254953276e-05,
"loss": 0.9782,
"step": 20
},
{
"epoch": 0.14,
"learning_rate": 2.9910014628599188e-05,
"loss": 0.9571,
"step": 30
},
{
"epoch": 0.19,
"learning_rate": 2.9840150521126656e-05,
"loss": 0.9545,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 2.975048515244199e-05,
"loss": 0.9367,
"step": 50
},
{
"epoch": 0.28,
"learning_rate": 2.964113816333891e-05,
"loss": 0.9475,
"step": 60
},
{
"epoch": 0.32,
"learning_rate": 2.9512255455861378e-05,
"loss": 0.9201,
"step": 70
},
{
"epoch": 0.37,
"learning_rate": 2.936400899862609e-05,
"loss": 0.9139,
"step": 80
},
{
"epoch": 0.42,
"learning_rate": 2.919659659736414e-05,
"loss": 0.9289,
"step": 90
},
{
"epoch": 0.46,
"learning_rate": 2.901024163098822e-05,
"loss": 0.9073,
"step": 100
},
{
"epoch": 0.51,
"learning_rate": 2.880519275353739e-05,
"loss": 0.9186,
"step": 110
},
{
"epoch": 0.56,
"learning_rate": 2.8581723562397203e-05,
"loss": 0.88,
"step": 120
},
{
"epoch": 0.6,
"learning_rate": 2.8340132233237784e-05,
"loss": 0.8877,
"step": 130
},
{
"epoch": 0.65,
"learning_rate": 2.8080741122157116e-05,
"loss": 0.9036,
"step": 140
},
{
"epoch": 0.7,
"learning_rate": 2.78038963355602e-05,
"loss": 0.8827,
"step": 150
},
{
"epoch": 0.74,
"learning_rate": 2.7509967268348168e-05,
"loss": 0.8985,
"step": 160
},
{
"epoch": 0.79,
"learning_rate": 2.719934611103348e-05,
"loss": 0.8883,
"step": 170
},
{
"epoch": 0.84,
"learning_rate": 2.6872447326438813e-05,
"loss": 0.8711,
"step": 180
},
{
"epoch": 0.88,
"learning_rate": 2.652970709667798e-05,
"loss": 0.8941,
"step": 190
},
{
"epoch": 0.93,
"learning_rate": 2.617158274115673e-05,
"loss": 0.8909,
"step": 200
},
{
"epoch": 0.97,
"learning_rate": 2.579855210636994e-05,
"loss": 0.8901,
"step": 210
},
{
"epoch": 1.02,
"learning_rate": 2.541111292830951e-05,
"loss": 0.8664,
"step": 220
},
{
"epoch": 1.07,
"learning_rate": 2.500978216833359e-05,
"loss": 0.8718,
"step": 230
},
{
"epoch": 1.11,
"learning_rate": 2.459509532338337e-05,
"loss": 0.8489,
"step": 240
},
{
"epoch": 1.16,
"learning_rate": 2.416760571146774e-05,
"loss": 0.851,
"step": 250
},
{
"epoch": 1.21,
"learning_rate": 2.3727883733369295e-05,
"loss": 0.8481,
"step": 260
},
{
"epoch": 1.25,
"learning_rate": 2.327651611155669e-05,
"loss": 0.8605,
"step": 270
},
{
"epoch": 1.3,
"learning_rate": 2.2814105107318955e-05,
"loss": 0.8586,
"step": 280
},
{
"epoch": 1.35,
"learning_rate": 2.2341267717166285e-05,
"loss": 0.8642,
"step": 290
},
{
"epoch": 1.39,
"learning_rate": 2.1858634849569578e-05,
"loss": 0.8676,
"step": 300
},
{
"epoch": 1.44,
"learning_rate": 2.136685048313723e-05,
"loss": 0.8443,
"step": 310
},
{
"epoch": 1.48,
"learning_rate": 2.086657080735234e-05,
"loss": 0.842,
"step": 320
},
{
"epoch": 1.53,
"learning_rate": 2.035846334701699e-05,
"loss": 0.8432,
"step": 330
},
{
"epoch": 1.58,
"learning_rate": 1.9843206071571692e-05,
"loss": 0.8451,
"step": 340
},
{
"epoch": 1.62,
"learning_rate": 1.9321486490478565e-05,
"loss": 0.8645,
"step": 350
},
{
"epoch": 1.67,
"learning_rate": 1.879400073587521e-05,
"loss": 0.8377,
"step": 360
},
{
"epoch": 1.72,
"learning_rate": 1.8261452633723356e-05,
"loss": 0.8374,
"step": 370
},
{
"epoch": 1.76,
"learning_rate": 1.7724552764691545e-05,
"loss": 0.8491,
"step": 380
},
{
"epoch": 1.81,
"learning_rate": 1.7184017516025075e-05,
"loss": 0.8391,
"step": 390
},
{
"epoch": 1.86,
"learning_rate": 1.664056812566812e-05,
"loss": 0.8521,
"step": 400
},
{
"epoch": 1.9,
"learning_rate": 1.6094929719913614e-05,
"loss": 0.846,
"step": 410
},
{
"epoch": 1.95,
"learning_rate": 1.5547830345864887e-05,
"loss": 0.8759,
"step": 420
},
{
"epoch": 2.0,
"learning_rate": 1.5e-05,
"loss": 0.8411,
"step": 430
},
{
"epoch": 2.04,
"learning_rate": 1.4452169654135116e-05,
"loss": 0.8106,
"step": 440
},
{
"epoch": 2.09,
"learning_rate": 1.3905070280086387e-05,
"loss": 0.8265,
"step": 450
},
{
"epoch": 2.13,
"learning_rate": 1.3359431874331888e-05,
"loss": 0.8261,
"step": 460
},
{
"epoch": 2.18,
"learning_rate": 1.281598248397493e-05,
"loss": 0.8107,
"step": 470
},
{
"epoch": 2.23,
"learning_rate": 1.2275447235308454e-05,
"loss": 0.7914,
"step": 480
},
{
"epoch": 2.27,
"learning_rate": 1.1738547366276648e-05,
"loss": 0.7966,
"step": 490
},
{
"epoch": 2.32,
"learning_rate": 1.1205999264124788e-05,
"loss": 0.8102,
"step": 500
},
{
"epoch": 2.37,
"learning_rate": 1.0678513509521436e-05,
"loss": 0.794,
"step": 510
},
{
"epoch": 2.41,
"learning_rate": 1.015679392842831e-05,
"loss": 0.7862,
"step": 520
},
{
"epoch": 2.46,
"learning_rate": 9.64153665298301e-06,
"loss": 0.8126,
"step": 530
},
{
"epoch": 2.51,
"learning_rate": 9.133429192647661e-06,
"loss": 0.8134,
"step": 540
},
{
"epoch": 2.55,
"learning_rate": 8.633149516862777e-06,
"loss": 0.8136,
"step": 550
},
{
"epoch": 2.6,
"learning_rate": 8.141365150430423e-06,
"loss": 0.7995,
"step": 560
},
{
"epoch": 2.65,
"learning_rate": 7.658732282833721e-06,
"loss": 0.7874,
"step": 570
},
{
"epoch": 2.69,
"learning_rate": 7.185894892681048e-06,
"loss": 0.7907,
"step": 580
},
{
"epoch": 2.74,
"learning_rate": 6.72348388844331e-06,
"loss": 0.7957,
"step": 590
},
{
"epoch": 2.78,
"learning_rate": 6.27211626663071e-06,
"loss": 0.8031,
"step": 600
},
{
"epoch": 2.83,
"learning_rate": 5.832394288532261e-06,
"loss": 0.8071,
"step": 610
},
{
"epoch": 2.88,
"learning_rate": 5.404904676616634e-06,
"loss": 0.7953,
"step": 620
},
{
"epoch": 2.92,
"learning_rate": 4.99021783166641e-06,
"loss": 0.8028,
"step": 630
},
{
"epoch": 2.97,
"learning_rate": 4.588887071690491e-06,
"loss": 0.8043,
"step": 640
},
{
"epoch": 3.02,
"learning_rate": 4.201447893630065e-06,
"loss": 0.7826,
"step": 650
},
{
"epoch": 3.06,
"learning_rate": 3.828417258843272e-06,
"loss": 0.7723,
"step": 660
},
{
"epoch": 3.11,
"learning_rate": 3.4702929033220176e-06,
"loss": 0.7734,
"step": 670
},
{
"epoch": 3.16,
"learning_rate": 3.12755267356119e-06,
"loss": 0.7834,
"step": 680
},
{
"epoch": 3.2,
"learning_rate": 2.8006538889665194e-06,
"loss": 0.7712,
"step": 690
},
{
"epoch": 3.25,
"learning_rate": 2.490032731651833e-06,
"loss": 0.7634,
"step": 700
},
{
"epoch": 3.29,
"learning_rate": 2.1961036644398034e-06,
"loss": 0.754,
"step": 710
},
{
"epoch": 3.34,
"learning_rate": 1.9192588778428844e-06,
"loss": 0.7738,
"step": 720
},
{
"epoch": 3.39,
"learning_rate": 1.6598677667622176e-06,
"loss": 0.7776,
"step": 730
},
{
"epoch": 3.43,
"learning_rate": 1.4182764376028007e-06,
"loss": 0.7706,
"step": 740
},
{
"epoch": 3.48,
"learning_rate": 1.1948072464626102e-06,
"loss": 0.7699,
"step": 750
},
{
"epoch": 3.53,
"learning_rate": 9.897583690117813e-07,
"loss": 0.7867,
"step": 760
},
{
"epoch": 3.57,
"learning_rate": 8.034034026358589e-07,
"loss": 0.766,
"step": 770
},
{
"epoch": 3.62,
"learning_rate": 6.359910013739123e-07,
"loss": 0.7704,
"step": 780
},
{
"epoch": 3.67,
"learning_rate": 4.877445441386219e-07,
"loss": 0.7601,
"step": 790
},
{
"epoch": 3.71,
"learning_rate": 3.588618366610941e-07,
"loss": 0.7838,
"step": 800
},
{
"epoch": 3.76,
"learning_rate": 2.495148475580089e-07,
"loss": 0.7715,
"step": 810
},
{
"epoch": 3.81,
"learning_rate": 1.5984947887334622e-07,
"loss": 0.7726,
"step": 820
},
{
"epoch": 3.85,
"learning_rate": 8.998537140081143e-08,
"loss": 0.7796,
"step": 830
},
{
"epoch": 3.9,
"learning_rate": 4.0015745046725336e-08,
"loss": 0.786,
"step": 840
},
{
"epoch": 3.94,
"learning_rate": 1.0007274446409143e-08,
"loss": 0.7874,
"step": 850
},
{
"epoch": 3.99,
"learning_rate": 0.0,
"loss": 0.7847,
"step": 860
},
{
"epoch": 3.99,
"step": 860,
"total_flos": 1017528113430528.0,
"train_loss": 0.8370421792185584,
"train_runtime": 34197.088,
"train_samples_per_second": 5.644,
"train_steps_per_second": 0.025
}
],
"logging_steps": 10,
"max_steps": 860,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"total_flos": 1017528113430528.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}