|
{ |
|
"best_metric": 0.7794117647058824, |
|
"best_model_checkpoint": "convnext-tiny-224-finetuned-papsmear/checkpoint-269", |
|
"epoch": 14.805194805194805, |
|
"eval_steps": 500, |
|
"global_step": 285, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5194805194805194, |
|
"grad_norm": 2.7057783603668213, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 1.8151, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.987012987012987, |
|
"eval_accuracy": 0.34558823529411764, |
|
"eval_loss": 1.6491025686264038, |
|
"eval_runtime": 28.2457, |
|
"eval_samples_per_second": 4.815, |
|
"eval_steps_per_second": 0.319, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.0389610389610389, |
|
"grad_norm": 2.921726703643799, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 1.7253, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.5584415584415585, |
|
"grad_norm": 2.692876100540161, |
|
"learning_rate": 4.9804687500000004e-05, |
|
"loss": 1.6104, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.974025974025974, |
|
"eval_accuracy": 0.4264705882352941, |
|
"eval_loss": 1.4321942329406738, |
|
"eval_runtime": 28.4975, |
|
"eval_samples_per_second": 4.772, |
|
"eval_steps_per_second": 0.316, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.0779220779220777, |
|
"grad_norm": 4.681018829345703, |
|
"learning_rate": 4.78515625e-05, |
|
"loss": 1.4815, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.5974025974025974, |
|
"grad_norm": 6.916051864624023, |
|
"learning_rate": 4.58984375e-05, |
|
"loss": 1.4002, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.961038961038961, |
|
"eval_accuracy": 0.5882352941176471, |
|
"eval_loss": 1.2286293506622314, |
|
"eval_runtime": 28.7786, |
|
"eval_samples_per_second": 4.726, |
|
"eval_steps_per_second": 0.313, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.116883116883117, |
|
"grad_norm": 6.231860637664795, |
|
"learning_rate": 4.3945312500000005e-05, |
|
"loss": 1.2972, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 5.155839443206787, |
|
"learning_rate": 4.1992187500000003e-05, |
|
"loss": 1.203, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6544117647058824, |
|
"eval_loss": 1.0558613538742065, |
|
"eval_runtime": 29.1144, |
|
"eval_samples_per_second": 4.671, |
|
"eval_steps_per_second": 0.309, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.1558441558441555, |
|
"grad_norm": 4.061820983886719, |
|
"learning_rate": 4.00390625e-05, |
|
"loss": 1.1157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.675324675324675, |
|
"grad_norm": 6.426987171173096, |
|
"learning_rate": 3.80859375e-05, |
|
"loss": 1.047, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.987012987012987, |
|
"eval_accuracy": 0.6764705882352942, |
|
"eval_loss": 0.9356656074523926, |
|
"eval_runtime": 28.6745, |
|
"eval_samples_per_second": 4.743, |
|
"eval_steps_per_second": 0.314, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.194805194805195, |
|
"grad_norm": 8.75316333770752, |
|
"learning_rate": 3.6132812500000005e-05, |
|
"loss": 0.9786, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 18.9505615234375, |
|
"learning_rate": 3.41796875e-05, |
|
"loss": 0.9083, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.974025974025974, |
|
"eval_accuracy": 0.7279411764705882, |
|
"eval_loss": 0.8477045893669128, |
|
"eval_runtime": 28.1374, |
|
"eval_samples_per_second": 4.833, |
|
"eval_steps_per_second": 0.32, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.233766233766234, |
|
"grad_norm": 10.591647148132324, |
|
"learning_rate": 3.22265625e-05, |
|
"loss": 0.8763, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.753246753246753, |
|
"grad_norm": 22.407350540161133, |
|
"learning_rate": 3.02734375e-05, |
|
"loss": 0.8756, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.961038961038961, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7761698365211487, |
|
"eval_runtime": 28.746, |
|
"eval_samples_per_second": 4.731, |
|
"eval_steps_per_second": 0.313, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 14.485121726989746, |
|
"learning_rate": 2.83203125e-05, |
|
"loss": 0.7762, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.792207792207792, |
|
"grad_norm": 12.85451889038086, |
|
"learning_rate": 2.63671875e-05, |
|
"loss": 0.7853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 0.7257648706436157, |
|
"eval_runtime": 28.5887, |
|
"eval_samples_per_second": 4.757, |
|
"eval_steps_per_second": 0.315, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.311688311688311, |
|
"grad_norm": 15.272162437438965, |
|
"learning_rate": 2.44140625e-05, |
|
"loss": 0.7991, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.831168831168831, |
|
"grad_norm": 23.24527931213379, |
|
"learning_rate": 2.24609375e-05, |
|
"loss": 0.7198, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.987012987012987, |
|
"eval_accuracy": 0.7573529411764706, |
|
"eval_loss": 0.7022554874420166, |
|
"eval_runtime": 28.5238, |
|
"eval_samples_per_second": 4.768, |
|
"eval_steps_per_second": 0.316, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.35064935064935, |
|
"grad_norm": 41.553192138671875, |
|
"learning_rate": 2.05078125e-05, |
|
"loss": 0.7277, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.87012987012987, |
|
"grad_norm": 17.23739242553711, |
|
"learning_rate": 1.85546875e-05, |
|
"loss": 0.7151, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.974025974025974, |
|
"eval_accuracy": 0.7573529411764706, |
|
"eval_loss": 0.6756320595741272, |
|
"eval_runtime": 28.3293, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 0.318, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 10.38961038961039, |
|
"grad_norm": 38.664608001708984, |
|
"learning_rate": 1.66015625e-05, |
|
"loss": 0.7012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.909090909090908, |
|
"grad_norm": 16.148284912109375, |
|
"learning_rate": 1.4648437500000001e-05, |
|
"loss": 0.7049, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.96103896103896, |
|
"eval_accuracy": 0.7573529411764706, |
|
"eval_loss": 0.6492503881454468, |
|
"eval_runtime": 28.3206, |
|
"eval_samples_per_second": 4.802, |
|
"eval_steps_per_second": 0.318, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 11.50155258178711, |
|
"learning_rate": 1.2695312500000001e-05, |
|
"loss": 0.6742, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.948051948051948, |
|
"grad_norm": 7.969295978546143, |
|
"learning_rate": 1.0742187500000001e-05, |
|
"loss": 0.6387, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7720588235294118, |
|
"eval_loss": 0.6256288290023804, |
|
"eval_runtime": 28.2852, |
|
"eval_samples_per_second": 4.808, |
|
"eval_steps_per_second": 0.318, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 12.467532467532468, |
|
"grad_norm": 23.28668975830078, |
|
"learning_rate": 8.789062500000001e-06, |
|
"loss": 0.6393, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.987012987012987, |
|
"grad_norm": 20.532899856567383, |
|
"learning_rate": 6.8359375e-06, |
|
"loss": 0.6387, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.987012987012987, |
|
"eval_accuracy": 0.7720588235294118, |
|
"eval_loss": 0.6295046806335449, |
|
"eval_runtime": 27.9632, |
|
"eval_samples_per_second": 4.864, |
|
"eval_steps_per_second": 0.322, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.506493506493506, |
|
"grad_norm": 17.684839248657227, |
|
"learning_rate": 4.8828125e-06, |
|
"loss": 0.6233, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.974025974025974, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.6032511591911316, |
|
"eval_runtime": 28.638, |
|
"eval_samples_per_second": 4.749, |
|
"eval_steps_per_second": 0.314, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 14.025974025974026, |
|
"grad_norm": 7.872328281402588, |
|
"learning_rate": 2.9296875e-06, |
|
"loss": 0.6251, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 14.545454545454545, |
|
"grad_norm": 14.434428215026855, |
|
"learning_rate": 9.765625e-07, |
|
"loss": 0.632, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.805194805194805, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.6009563207626343, |
|
"eval_runtime": 28.5598, |
|
"eval_samples_per_second": 4.762, |
|
"eval_steps_per_second": 0.315, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 14.805194805194805, |
|
"step": 285, |
|
"total_flos": 4.55564191810388e+17, |
|
"train_loss": 0.9492872991059956, |
|
"train_runtime": 4189.5915, |
|
"train_samples_per_second": 4.382, |
|
"train_steps_per_second": 0.068 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 285, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.55564191810388e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|