|
{ |
|
"best_metric": 0.1927209496498108, |
|
"best_model_checkpoint": "./vit-base-brain-tumor-detection3/checkpoint-1500", |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 3840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.014016176573932171, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0028, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.013060510158538818, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0028, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 0.013337934389710426, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.0028, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 0.013658256269991398, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0027, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.90625, |
|
"grad_norm": 0.012454299256205559, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0026, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"grad_norm": 0.013191607780754566, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0026, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.46875, |
|
"grad_norm": 0.0118486937135458, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.0025, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.0345335379242897, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0024, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.03125, |
|
"grad_norm": 0.013763554394245148, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.0137, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 0.014735482633113861, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0048, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"eval_accuracy": 0.947265625, |
|
"eval_loss": 0.2336536943912506, |
|
"eval_runtime": 5.8997, |
|
"eval_samples_per_second": 173.569, |
|
"eval_steps_per_second": 21.696, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.59375, |
|
"grad_norm": 0.013056355528533459, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0022, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 0.009187333285808563, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.15625, |
|
"grad_norm": 0.0087556978687644, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.0019, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"grad_norm": 0.008410913869738579, |
|
"learning_rate": 7e-06, |
|
"loss": 0.0018, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.71875, |
|
"grad_norm": 0.008203917182981968, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0017, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.007246215827763081, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0016, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.28125, |
|
"grad_norm": 0.006727874744683504, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.0015, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.0625, |
|
"grad_norm": 0.007697463966906071, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0014, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.84375, |
|
"grad_norm": 0.005949131678789854, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.0013, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 0.0054717655293643475, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0012, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"eval_accuracy": 0.953125, |
|
"eval_loss": 0.19501826167106628, |
|
"eval_runtime": 5.9147, |
|
"eval_samples_per_second": 173.128, |
|
"eval_steps_per_second": 21.641, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.40625, |
|
"grad_norm": 0.005219893530011177, |
|
"learning_rate": 9.965181058495823e-06, |
|
"loss": 0.0011, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 17.1875, |
|
"grad_norm": 0.004757468122988939, |
|
"learning_rate": 9.930362116991644e-06, |
|
"loss": 0.0011, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.96875, |
|
"grad_norm": 0.004971610382199287, |
|
"learning_rate": 9.895543175487466e-06, |
|
"loss": 0.001, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 0.0046828743070364, |
|
"learning_rate": 9.860724233983288e-06, |
|
"loss": 0.0009, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.53125, |
|
"grad_norm": 0.004280711989849806, |
|
"learning_rate": 9.82590529247911e-06, |
|
"loss": 0.0009, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.3125, |
|
"grad_norm": 0.004425444174557924, |
|
"learning_rate": 9.79108635097493e-06, |
|
"loss": 0.0008, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 21.09375, |
|
"grad_norm": 0.0037732652854174376, |
|
"learning_rate": 9.756267409470753e-06, |
|
"loss": 0.0008, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"grad_norm": 0.0033754699397832155, |
|
"learning_rate": 9.721448467966575e-06, |
|
"loss": 0.0007, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.65625, |
|
"grad_norm": 0.003637350630015135, |
|
"learning_rate": 9.686629526462397e-06, |
|
"loss": 0.0007, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"grad_norm": 0.003412399208173156, |
|
"learning_rate": 9.651810584958218e-06, |
|
"loss": 0.0007, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"eval_accuracy": 0.9580078125, |
|
"eval_loss": 0.1927209496498108, |
|
"eval_runtime": 5.2401, |
|
"eval_samples_per_second": 195.416, |
|
"eval_steps_per_second": 24.427, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.21875, |
|
"grad_norm": 0.002839893801137805, |
|
"learning_rate": 9.61699164345404e-06, |
|
"loss": 0.0006, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.0031008291989564896, |
|
"learning_rate": 9.58217270194986e-06, |
|
"loss": 0.0006, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.78125, |
|
"grad_norm": 0.002541514113545418, |
|
"learning_rate": 9.547353760445683e-06, |
|
"loss": 0.0006, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 26.5625, |
|
"grad_norm": 0.0025104843080043793, |
|
"learning_rate": 9.512534818941505e-06, |
|
"loss": 0.0005, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.34375, |
|
"grad_norm": 0.0023143806029111147, |
|
"learning_rate": 9.477715877437327e-06, |
|
"loss": 0.0005, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"grad_norm": 0.0023780674673616886, |
|
"learning_rate": 9.442896935933148e-06, |
|
"loss": 0.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.90625, |
|
"grad_norm": 0.002274406375363469, |
|
"learning_rate": 9.40807799442897e-06, |
|
"loss": 0.0005, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 29.6875, |
|
"grad_norm": 0.002076026052236557, |
|
"learning_rate": 9.373259052924792e-06, |
|
"loss": 0.0005, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.46875, |
|
"grad_norm": 0.0024436817038804293, |
|
"learning_rate": 9.338440111420614e-06, |
|
"loss": 0.0004, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"grad_norm": 0.0018446892499923706, |
|
"learning_rate": 9.303621169916436e-06, |
|
"loss": 0.0004, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_accuracy": 0.962890625, |
|
"eval_loss": 0.1969820261001587, |
|
"eval_runtime": 5.2387, |
|
"eval_samples_per_second": 195.469, |
|
"eval_steps_per_second": 24.434, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.03125, |
|
"grad_norm": 0.0020159403793513775, |
|
"learning_rate": 9.268802228412257e-06, |
|
"loss": 0.0004, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 32.8125, |
|
"grad_norm": 0.0019202978583052754, |
|
"learning_rate": 9.23398328690808e-06, |
|
"loss": 0.0004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 33.59375, |
|
"grad_norm": 0.0030681404750794172, |
|
"learning_rate": 9.1991643454039e-06, |
|
"loss": 0.0004, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 34.375, |
|
"grad_norm": 0.0016341815935447812, |
|
"learning_rate": 9.164345403899722e-06, |
|
"loss": 0.0004, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.15625, |
|
"grad_norm": 0.0016691142227500677, |
|
"learning_rate": 9.129526462395544e-06, |
|
"loss": 0.0003, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 35.9375, |
|
"grad_norm": 0.0017921621911227703, |
|
"learning_rate": 9.094707520891366e-06, |
|
"loss": 0.0003, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 36.71875, |
|
"grad_norm": 0.00160547427367419, |
|
"learning_rate": 9.059888579387187e-06, |
|
"loss": 0.0003, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"grad_norm": 0.0014217059360817075, |
|
"learning_rate": 9.025069637883009e-06, |
|
"loss": 0.0003, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 38.28125, |
|
"grad_norm": 0.001448018359951675, |
|
"learning_rate": 8.990250696378831e-06, |
|
"loss": 0.0003, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 39.0625, |
|
"grad_norm": 0.0017675248673185706, |
|
"learning_rate": 8.955431754874653e-06, |
|
"loss": 0.0003, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 39.0625, |
|
"eval_accuracy": 0.962890625, |
|
"eval_loss": 0.20403626561164856, |
|
"eval_runtime": 5.1962, |
|
"eval_samples_per_second": 197.067, |
|
"eval_steps_per_second": 24.633, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 39.84375, |
|
"grad_norm": 0.0017623680178076029, |
|
"learning_rate": 8.920612813370474e-06, |
|
"loss": 0.0003, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 40.625, |
|
"grad_norm": 0.0011810092255473137, |
|
"learning_rate": 8.885793871866296e-06, |
|
"loss": 0.0003, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.40625, |
|
"grad_norm": 0.001152553828433156, |
|
"learning_rate": 8.850974930362117e-06, |
|
"loss": 0.0003, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 42.1875, |
|
"grad_norm": 0.0012170104309916496, |
|
"learning_rate": 8.816155988857939e-06, |
|
"loss": 0.0003, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 42.96875, |
|
"grad_norm": 0.0010642099659889936, |
|
"learning_rate": 8.781337047353761e-06, |
|
"loss": 0.0002, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"grad_norm": 0.0010462955106049776, |
|
"learning_rate": 8.746518105849583e-06, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.53125, |
|
"grad_norm": 0.0010893407743424177, |
|
"learning_rate": 8.711699164345404e-06, |
|
"loss": 0.0002, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 45.3125, |
|
"grad_norm": 0.0010920371860265732, |
|
"learning_rate": 8.676880222841226e-06, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 46.09375, |
|
"grad_norm": 0.0010040885536000133, |
|
"learning_rate": 8.642061281337048e-06, |
|
"loss": 0.0002, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 46.875, |
|
"grad_norm": 0.0009422469302080572, |
|
"learning_rate": 8.60724233983287e-06, |
|
"loss": 0.0002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 46.875, |
|
"eval_accuracy": 0.962890625, |
|
"eval_loss": 0.21138769388198853, |
|
"eval_runtime": 5.8076, |
|
"eval_samples_per_second": 176.32, |
|
"eval_steps_per_second": 22.04, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.65625, |
|
"grad_norm": 0.0011073002824559808, |
|
"learning_rate": 8.572423398328693e-06, |
|
"loss": 0.0002, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 48.4375, |
|
"grad_norm": 0.000991741195321083, |
|
"learning_rate": 8.537604456824513e-06, |
|
"loss": 0.0002, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 49.21875, |
|
"grad_norm": 0.0008712337585166097, |
|
"learning_rate": 8.502785515320335e-06, |
|
"loss": 0.0002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0008826220873743296, |
|
"learning_rate": 8.467966573816156e-06, |
|
"loss": 0.0002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 50.78125, |
|
"grad_norm": 0.0009179635089822114, |
|
"learning_rate": 8.433147632311978e-06, |
|
"loss": 0.0002, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 51.5625, |
|
"grad_norm": 0.0008320676279254258, |
|
"learning_rate": 8.3983286908078e-06, |
|
"loss": 0.0002, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 52.34375, |
|
"grad_norm": 0.0007437244057655334, |
|
"learning_rate": 8.363509749303623e-06, |
|
"loss": 0.0002, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 53.125, |
|
"grad_norm": 0.0007439731853082776, |
|
"learning_rate": 8.328690807799443e-06, |
|
"loss": 0.0002, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 53.90625, |
|
"grad_norm": 0.0007023093639872968, |
|
"learning_rate": 8.293871866295265e-06, |
|
"loss": 0.0002, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 54.6875, |
|
"grad_norm": 0.0011785700917243958, |
|
"learning_rate": 8.259052924791087e-06, |
|
"loss": 0.0002, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 54.6875, |
|
"eval_accuracy": 0.96484375, |
|
"eval_loss": 0.217063769698143, |
|
"eval_runtime": 5.3451, |
|
"eval_samples_per_second": 191.577, |
|
"eval_steps_per_second": 23.947, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 55.46875, |
|
"grad_norm": 0.0007988162687979639, |
|
"learning_rate": 8.22423398328691e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"grad_norm": 0.0009737128275446594, |
|
"learning_rate": 8.18941504178273e-06, |
|
"loss": 0.0001, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 57.03125, |
|
"grad_norm": 0.0006344786379486322, |
|
"learning_rate": 8.154596100278552e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 57.8125, |
|
"grad_norm": 0.0009238629718311131, |
|
"learning_rate": 8.119777158774373e-06, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 58.59375, |
|
"grad_norm": 0.000863746739923954, |
|
"learning_rate": 8.084958217270195e-06, |
|
"loss": 0.0001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 59.375, |
|
"grad_norm": 0.0005797584308311343, |
|
"learning_rate": 8.050139275766017e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 3840, |
|
"total_flos": 1.904477274611122e+19, |
|
"train_loss": 0.0010260362852325974, |
|
"train_runtime": 2806.5312, |
|
"train_samples_per_second": 87.567, |
|
"train_steps_per_second": 1.368 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3840, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.904477274611122e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|