nisten's picture
Upload folder using huggingface_hub
ad1a955 verified
{
"best_metric": 1.1021808385849,
"best_model_checkpoint": "./0.4b_finetuned_results/checkpoint-500",
"epoch": 0.7485029940119761,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014970059880239521,
"grad_norm": 4.375,
"learning_rate": 6.666666666666667e-05,
"loss": 3.8882,
"step": 10
},
{
"epoch": 0.029940119760479042,
"grad_norm": 6.0625,
"learning_rate": 0.00013333333333333334,
"loss": 3.2257,
"step": 20
},
{
"epoch": 0.04491017964071856,
"grad_norm": 6.28125,
"learning_rate": 0.0002,
"loss": 2.92,
"step": 30
},
{
"epoch": 0.059880239520958084,
"grad_norm": 1.9765625,
"learning_rate": 0.00019999938668382333,
"loss": 2.3984,
"step": 40
},
{
"epoch": 0.0748502994011976,
"grad_norm": 1.484375,
"learning_rate": 0.00019999754674281632,
"loss": 2.1626,
"step": 50
},
{
"epoch": 0.08982035928143713,
"grad_norm": 3.375,
"learning_rate": 0.0001999944801995484,
"loss": 2.0388,
"step": 60
},
{
"epoch": 0.10479041916167664,
"grad_norm": 1.7890625,
"learning_rate": 0.0001999901870916347,
"loss": 2.0121,
"step": 70
},
{
"epoch": 0.11976047904191617,
"grad_norm": 1.609375,
"learning_rate": 0.00019998466747173592,
"loss": 1.8579,
"step": 80
},
{
"epoch": 0.1347305389221557,
"grad_norm": 0.81640625,
"learning_rate": 0.00019997792140755746,
"loss": 1.8254,
"step": 90
},
{
"epoch": 0.1497005988023952,
"grad_norm": 1.515625,
"learning_rate": 0.0001999699489818488,
"loss": 1.7037,
"step": 100
},
{
"epoch": 0.16467065868263472,
"grad_norm": 0.94140625,
"learning_rate": 0.00019996075029240219,
"loss": 1.6647,
"step": 110
},
{
"epoch": 0.17964071856287425,
"grad_norm": 0.61328125,
"learning_rate": 0.0001999503254520518,
"loss": 1.5988,
"step": 120
},
{
"epoch": 0.19461077844311378,
"grad_norm": 0.337890625,
"learning_rate": 0.00019993867458867207,
"loss": 1.6197,
"step": 130
},
{
"epoch": 0.20958083832335328,
"grad_norm": 0.47265625,
"learning_rate": 0.00019992579784517626,
"loss": 1.5954,
"step": 140
},
{
"epoch": 0.2245508982035928,
"grad_norm": 0.33203125,
"learning_rate": 0.00019991169537951468,
"loss": 1.5666,
"step": 150
},
{
"epoch": 0.23952095808383234,
"grad_norm": 0.52734375,
"learning_rate": 0.00019989636736467278,
"loss": 1.5227,
"step": 160
},
{
"epoch": 0.25449101796407186,
"grad_norm": 0.34375,
"learning_rate": 0.00019987981398866887,
"loss": 1.5048,
"step": 170
},
{
"epoch": 0.2694610778443114,
"grad_norm": 0.46875,
"learning_rate": 0.00019986203545455203,
"loss": 1.4755,
"step": 180
},
{
"epoch": 0.2844311377245509,
"grad_norm": 0.51953125,
"learning_rate": 0.0001998430319803996,
"loss": 1.4505,
"step": 190
},
{
"epoch": 0.2994011976047904,
"grad_norm": 0.38671875,
"learning_rate": 0.00019982280379931422,
"loss": 1.4295,
"step": 200
},
{
"epoch": 0.3143712574850299,
"grad_norm": 0.34765625,
"learning_rate": 0.00019980135115942136,
"loss": 1.4683,
"step": 210
},
{
"epoch": 0.32934131736526945,
"grad_norm": 0.306640625,
"learning_rate": 0.00019977867432386604,
"loss": 1.4427,
"step": 220
},
{
"epoch": 0.344311377245509,
"grad_norm": 0.357421875,
"learning_rate": 0.00019975477357080966,
"loss": 1.3852,
"step": 230
},
{
"epoch": 0.3592814371257485,
"grad_norm": 0.361328125,
"learning_rate": 0.00019972964919342663,
"loss": 1.427,
"step": 240
},
{
"epoch": 0.37425149700598803,
"grad_norm": 0.306640625,
"learning_rate": 0.00019970330149990062,
"loss": 1.3759,
"step": 250
},
{
"epoch": 0.38922155688622756,
"grad_norm": 0.3515625,
"learning_rate": 0.00019967573081342103,
"loss": 1.3559,
"step": 260
},
{
"epoch": 0.4041916167664671,
"grad_norm": 0.28515625,
"learning_rate": 0.00019964693747217874,
"loss": 1.3715,
"step": 270
},
{
"epoch": 0.41916167664670656,
"grad_norm": 0.30859375,
"learning_rate": 0.00019961692182936225,
"loss": 1.2932,
"step": 280
},
{
"epoch": 0.4341317365269461,
"grad_norm": 0.306640625,
"learning_rate": 0.00019958568425315314,
"loss": 1.3086,
"step": 290
},
{
"epoch": 0.4491017964071856,
"grad_norm": 0.291015625,
"learning_rate": 0.00019955322512672162,
"loss": 1.3091,
"step": 300
},
{
"epoch": 0.46407185628742514,
"grad_norm": 0.248046875,
"learning_rate": 0.00019951954484822182,
"loss": 1.3196,
"step": 310
},
{
"epoch": 0.47904191616766467,
"grad_norm": 0.267578125,
"learning_rate": 0.00019948464383078696,
"loss": 1.2944,
"step": 320
},
{
"epoch": 0.4940119760479042,
"grad_norm": 0.375,
"learning_rate": 0.00019944852250252418,
"loss": 1.3461,
"step": 330
},
{
"epoch": 0.5089820359281437,
"grad_norm": 0.275390625,
"learning_rate": 0.00019941118130650942,
"loss": 1.3221,
"step": 340
},
{
"epoch": 0.5239520958083832,
"grad_norm": 0.23828125,
"learning_rate": 0.00019937262070078183,
"loss": 1.3111,
"step": 350
},
{
"epoch": 0.5389221556886228,
"grad_norm": 0.2578125,
"learning_rate": 0.0001993328411583383,
"loss": 1.3128,
"step": 360
},
{
"epoch": 0.5538922155688623,
"grad_norm": 0.2578125,
"learning_rate": 0.00019929184316712758,
"loss": 1.2618,
"step": 370
},
{
"epoch": 0.5688622754491018,
"grad_norm": 0.29296875,
"learning_rate": 0.00019924962723004425,
"loss": 1.2893,
"step": 380
},
{
"epoch": 0.5838323353293413,
"grad_norm": 0.30859375,
"learning_rate": 0.0001992061938649227,
"loss": 1.2727,
"step": 390
},
{
"epoch": 0.5988023952095808,
"grad_norm": 0.3359375,
"learning_rate": 0.0001991615436045306,
"loss": 1.293,
"step": 400
},
{
"epoch": 0.6137724550898204,
"grad_norm": 0.314453125,
"learning_rate": 0.0001991156769965625,
"loss": 1.2692,
"step": 410
},
{
"epoch": 0.6287425149700598,
"grad_norm": 0.326171875,
"learning_rate": 0.00019906859460363307,
"loss": 1.2588,
"step": 420
},
{
"epoch": 0.6437125748502994,
"grad_norm": 0.26953125,
"learning_rate": 0.00019902029700327018,
"loss": 1.2576,
"step": 430
},
{
"epoch": 0.6586826347305389,
"grad_norm": 0.2890625,
"learning_rate": 0.0001989707847879078,
"loss": 1.2595,
"step": 440
},
{
"epoch": 0.6736526946107785,
"grad_norm": 0.337890625,
"learning_rate": 0.00019892005856487878,
"loss": 1.2331,
"step": 450
},
{
"epoch": 0.688622754491018,
"grad_norm": 0.28515625,
"learning_rate": 0.0001988681189564074,
"loss": 1.2161,
"step": 460
},
{
"epoch": 0.7035928143712575,
"grad_norm": 0.25390625,
"learning_rate": 0.0001988149665996017,
"loss": 1.2675,
"step": 470
},
{
"epoch": 0.718562874251497,
"grad_norm": 0.26953125,
"learning_rate": 0.00019876060214644566,
"loss": 1.269,
"step": 480
},
{
"epoch": 0.7335329341317365,
"grad_norm": 0.40625,
"learning_rate": 0.00019870502626379127,
"loss": 1.2342,
"step": 490
},
{
"epoch": 0.7485029940119761,
"grad_norm": 0.298828125,
"learning_rate": 0.00019864823963335033,
"loss": 1.2351,
"step": 500
},
{
"epoch": 0.7485029940119761,
"eval_loss": 1.1021808385849,
"eval_runtime": 109.4058,
"eval_samples_per_second": 9.14,
"eval_steps_per_second": 1.143,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 9000,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.479612424192e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}