|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.217932224273682, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5643, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7042606516290727, |
|
"eval_f1": 0.5981941694544958, |
|
"eval_loss": 0.5218127369880676, |
|
"eval_precision": 0.6240824392998306, |
|
"eval_recall": 0.5932442262229496, |
|
"eval_runtime": 1.7249, |
|
"eval_samples_per_second": 231.323, |
|
"eval_steps_per_second": 28.988, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.5043983459472656, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5086, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7293233082706767, |
|
"eval_f1": 0.6868131868131868, |
|
"eval_loss": 0.5021316409111023, |
|
"eval_precision": 0.6814393939393939, |
|
"eval_recall": 0.6959901800327333, |
|
"eval_runtime": 1.7316, |
|
"eval_samples_per_second": 230.424, |
|
"eval_steps_per_second": 28.875, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.2947137355804443, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4652, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7894736842105263, |
|
"eval_f1": 0.7407327186088248, |
|
"eval_loss": 0.444990336894989, |
|
"eval_precision": 0.7463777285669092, |
|
"eval_recall": 0.7360429168939807, |
|
"eval_runtime": 1.7474, |
|
"eval_samples_per_second": 228.342, |
|
"eval_steps_per_second": 28.614, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.659485816955566, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4248, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.7917273014868713, |
|
"eval_loss": 0.39323920011520386, |
|
"eval_precision": 0.8074456774536514, |
|
"eval_recall": 0.780460083651573, |
|
"eval_runtime": 1.7487, |
|
"eval_samples_per_second": 228.174, |
|
"eval_steps_per_second": 28.593, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.222157955169678, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3812, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8118502107020158, |
|
"eval_loss": 0.37043145298957825, |
|
"eval_precision": 0.8083091673078061, |
|
"eval_recall": 0.8157846881251136, |
|
"eval_runtime": 1.756, |
|
"eval_samples_per_second": 227.224, |
|
"eval_steps_per_second": 28.474, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 6.741718769073486, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3506, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8289446964056049, |
|
"eval_loss": 0.35664382576942444, |
|
"eval_precision": 0.8266129032258065, |
|
"eval_recall": 0.8314238952536825, |
|
"eval_runtime": 1.751, |
|
"eval_samples_per_second": 227.875, |
|
"eval_steps_per_second": 28.556, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.737555503845215, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3323, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.820640215771169, |
|
"eval_loss": 0.3437691330909729, |
|
"eval_precision": 0.8364527629233511, |
|
"eval_recall": 0.8089198036006546, |
|
"eval_runtime": 1.756, |
|
"eval_samples_per_second": 227.216, |
|
"eval_steps_per_second": 28.473, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 6.206509113311768, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3108, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8279052989013229, |
|
"eval_loss": 0.33262690901756287, |
|
"eval_precision": 0.8414113428943938, |
|
"eval_recall": 0.8174668121476631, |
|
"eval_runtime": 1.7517, |
|
"eval_samples_per_second": 227.78, |
|
"eval_steps_per_second": 28.544, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.5029311180114746, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2998, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8385441718775052, |
|
"eval_loss": 0.32504233717918396, |
|
"eval_precision": 0.8412280701754387, |
|
"eval_recall": 0.8360156392071285, |
|
"eval_runtime": 1.7542, |
|
"eval_samples_per_second": 227.46, |
|
"eval_steps_per_second": 28.504, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.729393005371094, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2923, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8263588263588264, |
|
"eval_loss": 0.31815892457962036, |
|
"eval_precision": 0.8289473684210527, |
|
"eval_recall": 0.8239225313693399, |
|
"eval_runtime": 1.7579, |
|
"eval_samples_per_second": 226.98, |
|
"eval_steps_per_second": 28.444, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 11.397634506225586, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2887, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8438308224802573, |
|
"eval_loss": 0.31445056200027466, |
|
"eval_precision": 0.8485409407665505, |
|
"eval_recall": 0.8395617384979087, |
|
"eval_runtime": 1.7554, |
|
"eval_samples_per_second": 227.301, |
|
"eval_steps_per_second": 28.484, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.077719688415527, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2716, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8430067043674827, |
|
"eval_loss": 0.30924713611602783, |
|
"eval_precision": 0.8498269896193772, |
|
"eval_recall": 0.8370612838697945, |
|
"eval_runtime": 1.7521, |
|
"eval_samples_per_second": 227.724, |
|
"eval_steps_per_second": 28.537, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.046454906463623, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2598, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8458135188208294, |
|
"eval_loss": 0.30985820293426514, |
|
"eval_precision": 0.8627946127946129, |
|
"eval_recall": 0.8331060192762321, |
|
"eval_runtime": 1.754, |
|
"eval_samples_per_second": 227.479, |
|
"eval_steps_per_second": 28.506, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.8712358474731445, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2722, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8491633041962088, |
|
"eval_loss": 0.3003251254558563, |
|
"eval_precision": 0.85610254797106, |
|
"eval_recall": 0.8431078377886888, |
|
"eval_runtime": 1.7541, |
|
"eval_samples_per_second": 227.468, |
|
"eval_steps_per_second": 28.505, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.7105746269226074, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2536, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8491633041962088, |
|
"eval_loss": 0.29776889085769653, |
|
"eval_precision": 0.85610254797106, |
|
"eval_recall": 0.8431078377886888, |
|
"eval_runtime": 1.7521, |
|
"eval_samples_per_second": 227.726, |
|
"eval_steps_per_second": 28.537, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 3.3939132690429688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2536, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8568221901555235, |
|
"eval_loss": 0.2970341145992279, |
|
"eval_precision": 0.8596491228070176, |
|
"eval_recall": 0.8541553009638116, |
|
"eval_runtime": 1.757, |
|
"eval_samples_per_second": 227.091, |
|
"eval_steps_per_second": 28.457, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.2166881561279297, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2479, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8545433391506689, |
|
"eval_loss": 0.29783862829208374, |
|
"eval_precision": 0.8639270714012982, |
|
"eval_recall": 0.846653937079469, |
|
"eval_runtime": 1.7549, |
|
"eval_samples_per_second": 227.365, |
|
"eval_steps_per_second": 28.492, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 7.031806945800781, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2487, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8491633041962088, |
|
"eval_loss": 0.29700446128845215, |
|
"eval_precision": 0.85610254797106, |
|
"eval_recall": 0.8431078377886888, |
|
"eval_runtime": 1.7527, |
|
"eval_samples_per_second": 227.643, |
|
"eval_steps_per_second": 28.527, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 5.178986549377441, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2457, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8421640488656195, |
|
"eval_loss": 0.2947324812412262, |
|
"eval_precision": 0.8512313860252005, |
|
"eval_recall": 0.8345608292416803, |
|
"eval_runtime": 1.7547, |
|
"eval_samples_per_second": 227.393, |
|
"eval_steps_per_second": 28.495, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.935147285461426, |
|
"learning_rate": 0.0, |
|
"loss": 0.2499, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8421640488656195, |
|
"eval_loss": 0.29513633251190186, |
|
"eval_precision": 0.8512313860252005, |
|
"eval_recall": 0.8345608292416803, |
|
"eval_runtime": 1.7593, |
|
"eval_samples_per_second": 226.794, |
|
"eval_steps_per_second": 28.42, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.32608639607664014, |
|
"train_runtime": 629.3107, |
|
"train_samples_per_second": 115.619, |
|
"train_steps_per_second": 3.877 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|