llama-2-13b-22sep / checkpoint-600 /trainer_state.json
teachyourselfcoding's picture
Upload 99 files
bddca71
{
"best_metric": 1.2669302225112915,
"best_model_checkpoint": "../llama2-9439-21sept/checkpoint-600",
"epoch": 1.0419681620839363,
"eval_steps": 200,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5.9999999999999995e-05,
"loss": 3.4609,
"step": 20
},
{
"epoch": 0.07,
"learning_rate": 0.00011999999999999999,
"loss": 2.8346,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 0.00017999999999999998,
"loss": 1.7597,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 0.00023999999999999998,
"loss": 1.542,
"step": 80
},
{
"epoch": 0.17,
"learning_rate": 0.0003,
"loss": 1.4567,
"step": 100
},
{
"epoch": 0.21,
"learning_rate": 0.0002963076923076923,
"loss": 1.4081,
"step": 120
},
{
"epoch": 0.24,
"learning_rate": 0.0002926153846153846,
"loss": 1.3805,
"step": 140
},
{
"epoch": 0.28,
"learning_rate": 0.0002889230769230769,
"loss": 1.3605,
"step": 160
},
{
"epoch": 0.31,
"learning_rate": 0.00028523076923076923,
"loss": 1.3329,
"step": 180
},
{
"epoch": 0.35,
"learning_rate": 0.0002815384615384615,
"loss": 1.3367,
"step": 200
},
{
"epoch": 0.35,
"eval_loss": 1.3450770378112793,
"eval_runtime": 6.306,
"eval_samples_per_second": 47.574,
"eval_steps_per_second": 1.269,
"step": 200
},
{
"epoch": 0.38,
"learning_rate": 0.0002778461538461538,
"loss": 1.3152,
"step": 220
},
{
"epoch": 0.42,
"learning_rate": 0.0002741538461538461,
"loss": 1.312,
"step": 240
},
{
"epoch": 0.45,
"learning_rate": 0.00027046153846153843,
"loss": 1.2883,
"step": 260
},
{
"epoch": 0.49,
"learning_rate": 0.00026676923076923074,
"loss": 1.2843,
"step": 280
},
{
"epoch": 0.52,
"learning_rate": 0.00026307692307692306,
"loss": 1.2609,
"step": 300
},
{
"epoch": 0.56,
"learning_rate": 0.00025938461538461537,
"loss": 1.2707,
"step": 320
},
{
"epoch": 0.59,
"learning_rate": 0.0002556923076923077,
"loss": 1.2624,
"step": 340
},
{
"epoch": 0.63,
"learning_rate": 0.00025199999999999995,
"loss": 1.273,
"step": 360
},
{
"epoch": 0.66,
"learning_rate": 0.0002483076923076923,
"loss": 1.251,
"step": 380
},
{
"epoch": 0.69,
"learning_rate": 0.0002446153846153846,
"loss": 1.2648,
"step": 400
},
{
"epoch": 0.69,
"eval_loss": 1.292348861694336,
"eval_runtime": 6.304,
"eval_samples_per_second": 47.589,
"eval_steps_per_second": 1.269,
"step": 400
},
{
"epoch": 0.73,
"learning_rate": 0.0002409230769230769,
"loss": 1.265,
"step": 420
},
{
"epoch": 0.76,
"learning_rate": 0.0002372307692307692,
"loss": 1.2516,
"step": 440
},
{
"epoch": 0.8,
"learning_rate": 0.00023353846153846151,
"loss": 1.2441,
"step": 460
},
{
"epoch": 0.83,
"learning_rate": 0.00022984615384615383,
"loss": 1.2204,
"step": 480
},
{
"epoch": 0.87,
"learning_rate": 0.00022615384615384614,
"loss": 1.2221,
"step": 500
},
{
"epoch": 0.9,
"learning_rate": 0.00022246153846153846,
"loss": 1.2246,
"step": 520
},
{
"epoch": 0.94,
"learning_rate": 0.00021876923076923074,
"loss": 1.2227,
"step": 540
},
{
"epoch": 0.97,
"learning_rate": 0.00021507692307692306,
"loss": 1.2124,
"step": 560
},
{
"epoch": 1.01,
"learning_rate": 0.00021138461538461537,
"loss": 1.2065,
"step": 580
},
{
"epoch": 1.04,
"learning_rate": 0.00020769230769230766,
"loss": 1.2106,
"step": 600
},
{
"epoch": 1.04,
"eval_loss": 1.2669302225112915,
"eval_runtime": 6.3128,
"eval_samples_per_second": 47.523,
"eval_steps_per_second": 1.267,
"step": 600
}
],
"logging_steps": 20,
"max_steps": 1725,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 1.4352469020219802e+18,
"trial_name": null,
"trial_params": null
}