RedPajama-INCITE-Base-3B-v1-wikipedia-8bit
/
flyte7z9neqk5
/local_flytekit
/2002aa999dad15fe8dfd5901c73c9d2a
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 333.3333333333333, | |
"global_step": 500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 6.67, | |
"learning_rate": 1.5357481488588927e-05, | |
"loss": 1.9835, | |
"step": 10 | |
}, | |
{ | |
"epoch": 13.33, | |
"learning_rate": 2e-05, | |
"loss": 1.0763, | |
"step": 20 | |
}, | |
{ | |
"epoch": 20.0, | |
"learning_rate": 2e-05, | |
"loss": 0.2284, | |
"step": 30 | |
}, | |
{ | |
"epoch": 26.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0691, | |
"step": 40 | |
}, | |
{ | |
"epoch": 33.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0442, | |
"step": 50 | |
}, | |
{ | |
"epoch": 40.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0319, | |
"step": 60 | |
}, | |
{ | |
"epoch": 46.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0243, | |
"step": 70 | |
}, | |
{ | |
"epoch": 53.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0186, | |
"step": 80 | |
}, | |
{ | |
"epoch": 60.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0149, | |
"step": 90 | |
}, | |
{ | |
"epoch": 66.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0119, | |
"step": 100 | |
}, | |
{ | |
"epoch": 73.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0103, | |
"step": 110 | |
}, | |
{ | |
"epoch": 80.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0091, | |
"step": 120 | |
}, | |
{ | |
"epoch": 86.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0081, | |
"step": 130 | |
}, | |
{ | |
"epoch": 93.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0073, | |
"step": 140 | |
}, | |
{ | |
"epoch": 100.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0064, | |
"step": 150 | |
}, | |
{ | |
"epoch": 106.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0064, | |
"step": 160 | |
}, | |
{ | |
"epoch": 113.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0059, | |
"step": 170 | |
}, | |
{ | |
"epoch": 120.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0057, | |
"step": 180 | |
}, | |
{ | |
"epoch": 126.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0054, | |
"step": 190 | |
}, | |
{ | |
"epoch": 133.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0052, | |
"step": 200 | |
}, | |
{ | |
"epoch": 140.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0049, | |
"step": 210 | |
}, | |
{ | |
"epoch": 146.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0046, | |
"step": 220 | |
}, | |
{ | |
"epoch": 153.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0045, | |
"step": 230 | |
}, | |
{ | |
"epoch": 160.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0044, | |
"step": 240 | |
}, | |
{ | |
"epoch": 166.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 250 | |
}, | |
{ | |
"epoch": 173.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0044, | |
"step": 260 | |
}, | |
{ | |
"epoch": 180.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 270 | |
}, | |
{ | |
"epoch": 186.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0044, | |
"step": 280 | |
}, | |
{ | |
"epoch": 193.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 290 | |
}, | |
{ | |
"epoch": 200.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0042, | |
"step": 300 | |
}, | |
{ | |
"epoch": 206.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 310 | |
}, | |
{ | |
"epoch": 213.33, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 320 | |
}, | |
{ | |
"epoch": 220.0, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 330 | |
}, | |
{ | |
"epoch": 226.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 340 | |
}, | |
{ | |
"epoch": 233.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 350 | |
}, | |
{ | |
"epoch": 240.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 360 | |
}, | |
{ | |
"epoch": 246.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 370 | |
}, | |
{ | |
"epoch": 253.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 380 | |
}, | |
{ | |
"epoch": 260.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 390 | |
}, | |
{ | |
"epoch": 266.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 400 | |
}, | |
{ | |
"epoch": 273.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 410 | |
}, | |
{ | |
"epoch": 280.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 420 | |
}, | |
{ | |
"epoch": 286.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 430 | |
}, | |
{ | |
"epoch": 293.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 440 | |
}, | |
{ | |
"epoch": 300.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 450 | |
}, | |
{ | |
"epoch": 306.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 460 | |
}, | |
{ | |
"epoch": 313.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 470 | |
}, | |
{ | |
"epoch": 320.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0036, | |
"step": 480 | |
}, | |
{ | |
"epoch": 326.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 490 | |
}, | |
{ | |
"epoch": 333.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 500 | |
}, | |
{ | |
"epoch": 333.33, | |
"step": 500, | |
"total_flos": 210359990353920.0, | |
"train_loss": 0.07388268795609475, | |
"train_runtime": 21319.5402, | |
"train_samples_per_second": 3.002, | |
"train_steps_per_second": 0.023 | |
} | |
], | |
"max_steps": 500, | |
"num_train_epochs": 500, | |
"total_flos": 210359990353920.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |