|
{ |
|
"best_metric": 0.8714285714285714, |
|
"best_model_checkpoint": "CP2_HAR_vit-base-patch16-224/checkpoint-908", |
|
"epoch": 29.620253164556964, |
|
"eval_steps": 500, |
|
"global_step": 1170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9873417721518988, |
|
"grad_norm": 2.9084372520446777, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.7032, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.9873417721518988, |
|
"eval_accuracy": 0.3388888888888889, |
|
"eval_loss": 2.304168224334717, |
|
"eval_runtime": 10.2204, |
|
"eval_samples_per_second": 123.283, |
|
"eval_steps_per_second": 1.957, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.045494318008423, |
|
"learning_rate": 3.376068376068376e-05, |
|
"loss": 1.7639, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7515873015873016, |
|
"eval_loss": 1.0595871210098267, |
|
"eval_runtime": 10.2792, |
|
"eval_samples_per_second": 122.577, |
|
"eval_steps_per_second": 1.946, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.9873417721518987, |
|
"grad_norm": 1.7503687143325806, |
|
"learning_rate": 4.995251661918329e-05, |
|
"loss": 0.974, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.9873417721518987, |
|
"eval_accuracy": 0.8134920634920635, |
|
"eval_loss": 0.6007124781608582, |
|
"eval_runtime": 10.208, |
|
"eval_samples_per_second": 123.432, |
|
"eval_steps_per_second": 1.959, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.026381492614746, |
|
"learning_rate": 4.8053181386514724e-05, |
|
"loss": 0.7207, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8412698412698413, |
|
"eval_loss": 0.49882617592811584, |
|
"eval_runtime": 10.3437, |
|
"eval_samples_per_second": 121.813, |
|
"eval_steps_per_second": 1.934, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.987341772151899, |
|
"grad_norm": 1.8753575086593628, |
|
"learning_rate": 4.620132953466287e-05, |
|
"loss": 0.6285, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 4.987341772151899, |
|
"eval_accuracy": 0.8507936507936508, |
|
"eval_loss": 0.4587480425834656, |
|
"eval_runtime": 10.2618, |
|
"eval_samples_per_second": 122.785, |
|
"eval_steps_per_second": 1.949, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.0627048015594482, |
|
"learning_rate": 4.4301994301994304e-05, |
|
"loss": 0.562, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8531746031746031, |
|
"eval_loss": 0.4662785232067108, |
|
"eval_runtime": 10.2093, |
|
"eval_samples_per_second": 123.417, |
|
"eval_steps_per_second": 1.959, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 6.987341772151899, |
|
"grad_norm": 1.78373122215271, |
|
"learning_rate": 4.2450142450142457e-05, |
|
"loss": 0.5258, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 6.987341772151899, |
|
"eval_accuracy": 0.8468253968253968, |
|
"eval_loss": 0.45184874534606934, |
|
"eval_runtime": 10.2263, |
|
"eval_samples_per_second": 123.211, |
|
"eval_steps_per_second": 1.956, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.4166259765625, |
|
"learning_rate": 4.0550807217473884e-05, |
|
"loss": 0.4843, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8603174603174604, |
|
"eval_loss": 0.4466171860694885, |
|
"eval_runtime": 10.1905, |
|
"eval_samples_per_second": 123.645, |
|
"eval_steps_per_second": 1.963, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 8.987341772151899, |
|
"grad_norm": 2.37298321723938, |
|
"learning_rate": 3.8698955365622036e-05, |
|
"loss": 0.4491, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 8.987341772151899, |
|
"eval_accuracy": 0.8523809523809524, |
|
"eval_loss": 0.43793126940727234, |
|
"eval_runtime": 10.2956, |
|
"eval_samples_per_second": 122.382, |
|
"eval_steps_per_second": 1.943, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.5339529514312744, |
|
"learning_rate": 3.679962013295346e-05, |
|
"loss": 0.4288, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8674603174603175, |
|
"eval_loss": 0.432355135679245, |
|
"eval_runtime": 10.268, |
|
"eval_samples_per_second": 122.712, |
|
"eval_steps_per_second": 1.948, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 10.987341772151899, |
|
"grad_norm": 1.7715898752212524, |
|
"learning_rate": 3.4947768281101616e-05, |
|
"loss": 0.4183, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 10.987341772151899, |
|
"eval_accuracy": 0.8642857142857143, |
|
"eval_loss": 0.44705930352211, |
|
"eval_runtime": 10.2591, |
|
"eval_samples_per_second": 122.817, |
|
"eval_steps_per_second": 1.949, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.3940932750701904, |
|
"learning_rate": 3.304843304843305e-05, |
|
"loss": 0.3882, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8579365079365079, |
|
"eval_loss": 0.43507805466651917, |
|
"eval_runtime": 10.2613, |
|
"eval_samples_per_second": 122.792, |
|
"eval_steps_per_second": 1.949, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 12.987341772151899, |
|
"grad_norm": 2.104583501815796, |
|
"learning_rate": 3.1196581196581195e-05, |
|
"loss": 0.3777, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 12.987341772151899, |
|
"eval_accuracy": 0.8611111111111112, |
|
"eval_loss": 0.432034432888031, |
|
"eval_runtime": 10.2065, |
|
"eval_samples_per_second": 123.451, |
|
"eval_steps_per_second": 1.96, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.3956658840179443, |
|
"learning_rate": 2.9297245963912633e-05, |
|
"loss": 0.3497, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8642857142857143, |
|
"eval_loss": 0.4432290196418762, |
|
"eval_runtime": 10.2012, |
|
"eval_samples_per_second": 123.515, |
|
"eval_steps_per_second": 1.961, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 14.987341772151899, |
|
"grad_norm": 2.0552070140838623, |
|
"learning_rate": 2.744539411206078e-05, |
|
"loss": 0.347, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 14.987341772151899, |
|
"eval_accuracy": 0.8690476190476191, |
|
"eval_loss": 0.4347086548805237, |
|
"eval_runtime": 10.2609, |
|
"eval_samples_per_second": 122.797, |
|
"eval_steps_per_second": 1.949, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.1157126426696777, |
|
"learning_rate": 2.5546058879392216e-05, |
|
"loss": 0.3331, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8603174603174604, |
|
"eval_loss": 0.4517436623573303, |
|
"eval_runtime": 10.3042, |
|
"eval_samples_per_second": 122.28, |
|
"eval_steps_per_second": 1.941, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 16.9873417721519, |
|
"grad_norm": 1.8309712409973145, |
|
"learning_rate": 2.3694207027540365e-05, |
|
"loss": 0.3219, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 16.9873417721519, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.44011229276657104, |
|
"eval_runtime": 10.2371, |
|
"eval_samples_per_second": 123.082, |
|
"eval_steps_per_second": 1.954, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.178051710128784, |
|
"learning_rate": 2.1794871794871795e-05, |
|
"loss": 0.3081, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8690476190476191, |
|
"eval_loss": 0.4321274161338806, |
|
"eval_runtime": 10.2691, |
|
"eval_samples_per_second": 122.699, |
|
"eval_steps_per_second": 1.948, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 18.9873417721519, |
|
"grad_norm": 2.0867300033569336, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.3194, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 18.9873417721519, |
|
"eval_accuracy": 0.8690476190476191, |
|
"eval_loss": 0.4421131908893585, |
|
"eval_runtime": 10.2636, |
|
"eval_samples_per_second": 122.764, |
|
"eval_steps_per_second": 1.949, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.312155246734619, |
|
"learning_rate": 1.804368471035138e-05, |
|
"loss": 0.3102, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8682539682539683, |
|
"eval_loss": 0.4470122754573822, |
|
"eval_runtime": 10.428, |
|
"eval_samples_per_second": 120.829, |
|
"eval_steps_per_second": 1.918, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.9873417721519, |
|
"grad_norm": 1.674055814743042, |
|
"learning_rate": 1.6191832858499524e-05, |
|
"loss": 0.2908, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 20.9873417721519, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.4368663430213928, |
|
"eval_runtime": 10.304, |
|
"eval_samples_per_second": 122.282, |
|
"eval_steps_per_second": 1.941, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 1.8067846298217773, |
|
"learning_rate": 1.4292497625830961e-05, |
|
"loss": 0.2794, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8642857142857143, |
|
"eval_loss": 0.4426242411136627, |
|
"eval_runtime": 10.2667, |
|
"eval_samples_per_second": 122.726, |
|
"eval_steps_per_second": 1.948, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 22.9873417721519, |
|
"grad_norm": 2.093015193939209, |
|
"learning_rate": 1.2440645773979107e-05, |
|
"loss": 0.2684, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 22.9873417721519, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.4378375709056854, |
|
"eval_runtime": 10.2839, |
|
"eval_samples_per_second": 122.522, |
|
"eval_steps_per_second": 1.945, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 1.7382984161376953, |
|
"learning_rate": 1.0541310541310543e-05, |
|
"loss": 0.2635, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8698412698412699, |
|
"eval_loss": 0.44393062591552734, |
|
"eval_runtime": 10.2502, |
|
"eval_samples_per_second": 122.924, |
|
"eval_steps_per_second": 1.951, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 24.9873417721519, |
|
"grad_norm": 1.4845259189605713, |
|
"learning_rate": 8.68945868945869e-06, |
|
"loss": 0.2754, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 24.9873417721519, |
|
"eval_accuracy": 0.8642857142857143, |
|
"eval_loss": 0.45485520362854004, |
|
"eval_runtime": 10.2015, |
|
"eval_samples_per_second": 123.511, |
|
"eval_steps_per_second": 1.96, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 1.6323109865188599, |
|
"learning_rate": 6.790123456790123e-06, |
|
"loss": 0.2669, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8674603174603175, |
|
"eval_loss": 0.44393137097358704, |
|
"eval_runtime": 10.2535, |
|
"eval_samples_per_second": 122.885, |
|
"eval_steps_per_second": 1.951, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 26.9873417721519, |
|
"grad_norm": 1.589407205581665, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.2616, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 26.9873417721519, |
|
"eval_accuracy": 0.8714285714285714, |
|
"eval_loss": 0.4428676962852478, |
|
"eval_runtime": 10.2429, |
|
"eval_samples_per_second": 123.012, |
|
"eval_steps_per_second": 1.953, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 1.8043303489685059, |
|
"learning_rate": 3.038936372269706e-06, |
|
"loss": 0.2501, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8698412698412699, |
|
"eval_loss": 0.4408431649208069, |
|
"eval_runtime": 10.1648, |
|
"eval_samples_per_second": 123.957, |
|
"eval_steps_per_second": 1.968, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 28.9873417721519, |
|
"grad_norm": 2.025970935821533, |
|
"learning_rate": 1.1870845204178538e-06, |
|
"loss": 0.2622, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 28.9873417721519, |
|
"eval_accuracy": 0.8682539682539683, |
|
"eval_loss": 0.4434352219104767, |
|
"eval_runtime": 10.25, |
|
"eval_samples_per_second": 122.927, |
|
"eval_steps_per_second": 1.951, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 29.620253164556964, |
|
"grad_norm": 1.5968279838562012, |
|
"learning_rate": 0.0, |
|
"loss": 0.2511, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 29.620253164556964, |
|
"eval_accuracy": 0.8682539682539683, |
|
"eval_loss": 0.44374439120292664, |
|
"eval_runtime": 10.1581, |
|
"eval_samples_per_second": 124.038, |
|
"eval_steps_per_second": 1.969, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 29.620253164556964, |
|
"step": 1170, |
|
"total_flos": 2.3141184141358596e+19, |
|
"train_loss": 0.5155148339067769, |
|
"train_runtime": 6443.0087, |
|
"train_samples_per_second": 46.935, |
|
"train_steps_per_second": 0.182 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3141184141358596e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|