|
{ |
|
"best_metric": 0.5122641324996948, |
|
"best_model_checkpoint": "vit-msn-small-beta-fia-manually-enhanced-HSV_test_3/checkpoint-1", |
|
"epoch": 28.571428571428573, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"eval_accuracy": 0.8873239436619719, |
|
"eval_loss": 0.5122641324996948, |
|
"eval_runtime": 0.5861, |
|
"eval_samples_per_second": 242.264, |
|
"eval_steps_per_second": 5.118, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"eval_accuracy": 0.8661971830985915, |
|
"eval_loss": 0.5494502186775208, |
|
"eval_runtime": 0.5821, |
|
"eval_samples_per_second": 243.962, |
|
"eval_steps_per_second": 5.154, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.6079620122909546, |
|
"eval_runtime": 0.5831, |
|
"eval_samples_per_second": 243.534, |
|
"eval_steps_per_second": 5.145, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5590734481811523, |
|
"eval_runtime": 0.6248, |
|
"eval_samples_per_second": 227.269, |
|
"eval_steps_per_second": 4.801, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5464029312133789, |
|
"eval_runtime": 0.6237, |
|
"eval_samples_per_second": 227.68, |
|
"eval_steps_per_second": 4.81, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 4.793102264404297, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.4241, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5981650352478027, |
|
"eval_runtime": 0.6008, |
|
"eval_samples_per_second": 236.347, |
|
"eval_steps_per_second": 4.993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.6497244238853455, |
|
"eval_runtime": 0.6066, |
|
"eval_samples_per_second": 234.109, |
|
"eval_steps_per_second": 4.946, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.852112676056338, |
|
"eval_loss": 0.5927726030349731, |
|
"eval_runtime": 0.627, |
|
"eval_samples_per_second": 226.478, |
|
"eval_steps_per_second": 4.785, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"eval_accuracy": 0.852112676056338, |
|
"eval_loss": 0.5711137056350708, |
|
"eval_runtime": 0.6061, |
|
"eval_samples_per_second": 234.301, |
|
"eval_steps_per_second": 4.95, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 9.714285714285714, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5468315482139587, |
|
"eval_runtime": 0.5742, |
|
"eval_samples_per_second": 247.304, |
|
"eval_steps_per_second": 5.225, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_accuracy": 0.852112676056338, |
|
"eval_loss": 0.5482771992683411, |
|
"eval_runtime": 0.5841, |
|
"eval_samples_per_second": 243.112, |
|
"eval_steps_per_second": 5.136, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 5.585962772369385, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.4152, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5783097147941589, |
|
"eval_runtime": 0.5721, |
|
"eval_samples_per_second": 248.198, |
|
"eval_steps_per_second": 5.244, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 12.571428571428571, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5835375785827637, |
|
"eval_runtime": 0.6296, |
|
"eval_samples_per_second": 225.543, |
|
"eval_steps_per_second": 4.765, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 13.714285714285714, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5668258666992188, |
|
"eval_runtime": 0.5869, |
|
"eval_samples_per_second": 241.934, |
|
"eval_steps_per_second": 5.111, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.555620014667511, |
|
"eval_runtime": 0.5858, |
|
"eval_samples_per_second": 242.413, |
|
"eval_steps_per_second": 5.121, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5564189553260803, |
|
"eval_runtime": 0.5987, |
|
"eval_samples_per_second": 237.192, |
|
"eval_steps_per_second": 5.011, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 16.571428571428573, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5590547919273376, |
|
"eval_runtime": 0.6483, |
|
"eval_samples_per_second": 219.036, |
|
"eval_steps_per_second": 4.628, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 5.258753299713135, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.4367, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 17.714285714285715, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5619198679924011, |
|
"eval_runtime": 0.6281, |
|
"eval_samples_per_second": 226.062, |
|
"eval_steps_per_second": 4.776, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5809253454208374, |
|
"eval_runtime": 0.5953, |
|
"eval_samples_per_second": 238.516, |
|
"eval_steps_per_second": 5.039, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8661971830985915, |
|
"eval_loss": 0.5810067057609558, |
|
"eval_runtime": 0.626, |
|
"eval_samples_per_second": 226.835, |
|
"eval_steps_per_second": 4.792, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 20.571428571428573, |
|
"eval_accuracy": 0.8661971830985915, |
|
"eval_loss": 0.5768489837646484, |
|
"eval_runtime": 0.682, |
|
"eval_samples_per_second": 208.199, |
|
"eval_steps_per_second": 4.399, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 21.714285714285715, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5590782761573792, |
|
"eval_runtime": 0.665, |
|
"eval_samples_per_second": 213.54, |
|
"eval_steps_per_second": 4.511, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 4.620666027069092, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.4241, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5452097654342651, |
|
"eval_runtime": 0.5858, |
|
"eval_samples_per_second": 242.391, |
|
"eval_steps_per_second": 5.121, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5387392640113831, |
|
"eval_runtime": 0.5833, |
|
"eval_samples_per_second": 243.447, |
|
"eval_steps_per_second": 5.143, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 24.571428571428573, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5397770404815674, |
|
"eval_runtime": 0.6666, |
|
"eval_samples_per_second": 213.023, |
|
"eval_steps_per_second": 4.5, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5457538962364197, |
|
"eval_runtime": 0.5797, |
|
"eval_samples_per_second": 244.962, |
|
"eval_steps_per_second": 5.175, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5509300827980042, |
|
"eval_runtime": 0.6395, |
|
"eval_samples_per_second": 222.035, |
|
"eval_steps_per_second": 4.691, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5549753904342651, |
|
"eval_runtime": 0.5878, |
|
"eval_samples_per_second": 241.566, |
|
"eval_steps_per_second": 5.103, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 4.701329708099365, |
|
"learning_rate": 0.0, |
|
"loss": 0.4171, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5557973980903625, |
|
"eval_runtime": 0.5942, |
|
"eval_samples_per_second": 238.984, |
|
"eval_steps_per_second": 5.049, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"step": 50, |
|
"total_flos": 2.3842598606630093e+17, |
|
"train_loss": 0.4234132957458496, |
|
"train_runtime": 127.7102, |
|
"train_samples_per_second": 166.784, |
|
"train_steps_per_second": 0.392 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3842598606630093e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|