|
{ |
|
"best_metric": 3.2635854592980795, |
|
"best_model_checkpoint": "OUTCOMESAI/whisper-large-v3-medical/checkpoint-5000", |
|
"epoch": 3.8255547054322876, |
|
"eval_steps": 200, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07651109410864575, |
|
"grad_norm": 4.125481605529785, |
|
"learning_rate": 4.2874883363489934e-07, |
|
"loss": 6.0929, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1530221882172915, |
|
"grad_norm": 4.159045696258545, |
|
"learning_rate": 4.971255772939331e-07, |
|
"loss": 4.2439, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1530221882172915, |
|
"eval_loss": 0.29345703125, |
|
"eval_runtime": 988.7646, |
|
"eval_samples_per_second": 1.131, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.507771356816114, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22953328232593725, |
|
"grad_norm": 2.488393545150757, |
|
"learning_rate": 4.903124999999999e-07, |
|
"loss": 3.5952, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.306044376434583, |
|
"grad_norm": 2.386241912841797, |
|
"learning_rate": 4.798958333333334e-07, |
|
"loss": 3.3374, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.306044376434583, |
|
"eval_loss": 0.2734375, |
|
"eval_runtime": 1214.5433, |
|
"eval_samples_per_second": 0.921, |
|
"eval_steps_per_second": 0.029, |
|
"eval_wer": 4.696129057058288, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38255547054322875, |
|
"grad_norm": 2.348032236099243, |
|
"learning_rate": 4.6947916666666664e-07, |
|
"loss": 3.1968, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4590665646518745, |
|
"grad_norm": 2.6516611576080322, |
|
"learning_rate": 4.5906249999999995e-07, |
|
"loss": 3.0833, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4590665646518745, |
|
"eval_loss": 0.267333984375, |
|
"eval_runtime": 991.4442, |
|
"eval_samples_per_second": 1.128, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.273285240188102, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5355776587605203, |
|
"grad_norm": 3.4489781856536865, |
|
"learning_rate": 4.486458333333333e-07, |
|
"loss": 2.6306, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.612088752869166, |
|
"grad_norm": 2.9474337100982666, |
|
"learning_rate": 4.3822916666666667e-07, |
|
"loss": 1.8243, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.612088752869166, |
|
"eval_loss": 0.26806640625, |
|
"eval_runtime": 991.8395, |
|
"eval_samples_per_second": 1.127, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.4372973873377495, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6885998469778117, |
|
"grad_norm": 2.7142157554626465, |
|
"learning_rate": 4.278125e-07, |
|
"loss": 1.382, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7651109410864575, |
|
"grad_norm": 2.386582612991333, |
|
"learning_rate": 4.1739583333333334e-07, |
|
"loss": 1.1288, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7651109410864575, |
|
"eval_loss": 0.2548828125, |
|
"eval_runtime": 991.8205, |
|
"eval_samples_per_second": 1.127, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.277129274886922, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8416220351951033, |
|
"grad_norm": 2.2802257537841797, |
|
"learning_rate": 4.0697916666666665e-07, |
|
"loss": 0.9419, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.918133129303749, |
|
"grad_norm": 2.0405514240264893, |
|
"learning_rate": 3.9656249999999996e-07, |
|
"loss": 0.8199, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.918133129303749, |
|
"eval_loss": 0.2412109375, |
|
"eval_runtime": 994.3546, |
|
"eval_samples_per_second": 1.124, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.204092615609343, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9946442234123948, |
|
"grad_norm": 1.88164222240448, |
|
"learning_rate": 3.861458333333333e-07, |
|
"loss": 0.7493, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0711553175210407, |
|
"grad_norm": 1.8802990913391113, |
|
"learning_rate": 3.7572916666666663e-07, |
|
"loss": 0.681, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0711553175210407, |
|
"eval_loss": 0.2310791015625, |
|
"eval_runtime": 990.487, |
|
"eval_samples_per_second": 1.129, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.105429058339634, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1476664116296864, |
|
"grad_norm": 1.6826539039611816, |
|
"learning_rate": 3.6531249999999994e-07, |
|
"loss": 0.6296, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2241775057383322, |
|
"grad_norm": 1.7766120433807373, |
|
"learning_rate": 3.5489583333333336e-07, |
|
"loss": 0.5798, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2241775057383322, |
|
"eval_loss": 0.21923828125, |
|
"eval_runtime": 989.3653, |
|
"eval_samples_per_second": 1.13, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 4.009328190869136, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.300688599846978, |
|
"grad_norm": 1.597964882850647, |
|
"learning_rate": 3.4447916666666667e-07, |
|
"loss": 0.5603, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3771996939556237, |
|
"grad_norm": 1.5654646158218384, |
|
"learning_rate": 3.340625e-07, |
|
"loss": 0.5233, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3771996939556237, |
|
"eval_loss": 0.2071533203125, |
|
"eval_runtime": 987.6154, |
|
"eval_samples_per_second": 1.132, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.8927258050049334, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4537107880642695, |
|
"grad_norm": 1.5115400552749634, |
|
"learning_rate": 3.2364583333333334e-07, |
|
"loss": 0.4919, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5302218821729152, |
|
"grad_norm": 1.5361956357955933, |
|
"learning_rate": 3.1322916666666665e-07, |
|
"loss": 0.463, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5302218821729152, |
|
"eval_loss": 0.19921875, |
|
"eval_runtime": 984.868, |
|
"eval_samples_per_second": 1.135, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 3.8196891457273554, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.606732976281561, |
|
"grad_norm": 1.4659453630447388, |
|
"learning_rate": 3.0281249999999996e-07, |
|
"loss": 0.4509, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6832440703902067, |
|
"grad_norm": 1.384372591972351, |
|
"learning_rate": 2.923958333333333e-07, |
|
"loss": 0.428, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6832440703902067, |
|
"eval_loss": 0.195068359375, |
|
"eval_runtime": 985.7714, |
|
"eval_samples_per_second": 1.134, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 3.7748420742411235, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7597551644988525, |
|
"grad_norm": 1.451076865196228, |
|
"learning_rate": 2.8197916666666663e-07, |
|
"loss": 0.4161, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.836266258607498, |
|
"grad_norm": 1.5242278575897217, |
|
"learning_rate": 2.715625e-07, |
|
"loss": 0.3944, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.836266258607498, |
|
"eval_loss": 0.1866455078125, |
|
"eval_runtime": 987.2353, |
|
"eval_samples_per_second": 1.132, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.67745986187102, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9127773527161438, |
|
"grad_norm": 1.31293785572052, |
|
"learning_rate": 2.6114583333333336e-07, |
|
"loss": 0.38, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9892884468247896, |
|
"grad_norm": 1.3785984516143799, |
|
"learning_rate": 2.5072916666666667e-07, |
|
"loss": 0.3682, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.9892884468247896, |
|
"eval_loss": 0.17919921875, |
|
"eval_runtime": 989.9094, |
|
"eval_samples_per_second": 1.129, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.604423202593442, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.0657995409334355, |
|
"grad_norm": 1.3718817234039307, |
|
"learning_rate": 2.403125e-07, |
|
"loss": 0.36, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.1423106350420813, |
|
"grad_norm": 1.3219904899597168, |
|
"learning_rate": 2.298958333333333e-07, |
|
"loss": 0.3543, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1423106350420813, |
|
"eval_loss": 0.1724853515625, |
|
"eval_runtime": 995.0367, |
|
"eval_samples_per_second": 1.124, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.530105198416258, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.218821729150727, |
|
"grad_norm": 1.2633302211761475, |
|
"learning_rate": 2.1947916666666665e-07, |
|
"loss": 0.3438, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.295332823259373, |
|
"grad_norm": 1.2741142511367798, |
|
"learning_rate": 2.090625e-07, |
|
"loss": 0.3368, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.295332823259373, |
|
"eval_loss": 0.17138671875, |
|
"eval_runtime": 991.227, |
|
"eval_samples_per_second": 1.128, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.490383506528452, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.371843917368018, |
|
"grad_norm": 1.2177200317382812, |
|
"learning_rate": 1.9864583333333332e-07, |
|
"loss": 0.3218, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.4483550114766643, |
|
"grad_norm": 1.24095618724823, |
|
"learning_rate": 1.8822916666666666e-07, |
|
"loss": 0.3136, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4483550114766643, |
|
"eval_loss": 0.164794921875, |
|
"eval_runtime": 995.8265, |
|
"eval_samples_per_second": 1.123, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.45706853913868, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.5248661055853097, |
|
"grad_norm": 1.146142840385437, |
|
"learning_rate": 1.778125e-07, |
|
"loss": 0.3269, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.601377199693956, |
|
"grad_norm": 1.1721028089523315, |
|
"learning_rate": 1.6739583333333333e-07, |
|
"loss": 0.3121, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.601377199693956, |
|
"eval_loss": 0.160400390625, |
|
"eval_runtime": 990.5861, |
|
"eval_samples_per_second": 1.129, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.4237535717489074, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.677888293802601, |
|
"grad_norm": 1.5002176761627197, |
|
"learning_rate": 1.5697916666666666e-07, |
|
"loss": 0.2999, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.7543993879112474, |
|
"grad_norm": 1.250117301940918, |
|
"learning_rate": 1.465625e-07, |
|
"loss": 0.2959, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7543993879112474, |
|
"eval_loss": 0.1561279296875, |
|
"eval_runtime": 997.6176, |
|
"eval_samples_per_second": 1.121, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.395563983957562, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.8309104820198927, |
|
"grad_norm": 1.2006161212921143, |
|
"learning_rate": 1.361458333333333e-07, |
|
"loss": 0.2942, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.907421576128539, |
|
"grad_norm": 1.2639755010604858, |
|
"learning_rate": 1.2572916666666667e-07, |
|
"loss": 0.2912, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.907421576128539, |
|
"eval_loss": 0.15380859375, |
|
"eval_runtime": 989.6556, |
|
"eval_samples_per_second": 1.13, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.3737811206642494, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.9839326702371842, |
|
"grad_norm": 1.1383545398712158, |
|
"learning_rate": 1.153125e-07, |
|
"loss": 0.2921, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.06044376434583, |
|
"grad_norm": 1.1553541421890259, |
|
"learning_rate": 1.0489583333333332e-07, |
|
"loss": 0.2767, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.06044376434583, |
|
"eval_loss": 0.151123046875, |
|
"eval_runtime": 994.8808, |
|
"eval_samples_per_second": 1.124, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.3455915328729033, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.1369548584544757, |
|
"grad_norm": 14.852476119995117, |
|
"learning_rate": 9.447916666666667e-08, |
|
"loss": 0.2857, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.2134659525631215, |
|
"grad_norm": 3.1388800144195557, |
|
"learning_rate": 8.406249999999999e-08, |
|
"loss": 0.2848, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.2134659525631215, |
|
"eval_loss": 0.148681640625, |
|
"eval_runtime": 1017.2976, |
|
"eval_samples_per_second": 1.099, |
|
"eval_steps_per_second": 0.034, |
|
"eval_wer": 3.319964634880771, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.2899770466717673, |
|
"grad_norm": 1.152143120765686, |
|
"learning_rate": 7.364583333333333e-08, |
|
"loss": 0.2756, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.366488140780413, |
|
"grad_norm": 1.1069058179855347, |
|
"learning_rate": 6.322916666666666e-08, |
|
"loss": 0.274, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.366488140780413, |
|
"eval_loss": 0.1474609375, |
|
"eval_runtime": 993.2425, |
|
"eval_samples_per_second": 1.126, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.284086977691785, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.442999234889059, |
|
"grad_norm": 1.054897427558899, |
|
"learning_rate": 5.291666666666667e-08, |
|
"loss": 0.2729, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.5195103289977046, |
|
"grad_norm": 1.0845283269882202, |
|
"learning_rate": 4.2500000000000003e-08, |
|
"loss": 0.2694, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.5195103289977046, |
|
"eval_loss": 0.1463623046875, |
|
"eval_runtime": 1016.9332, |
|
"eval_samples_per_second": 1.099, |
|
"eval_steps_per_second": 0.034, |
|
"eval_wer": 3.282805632792179, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.5960214231063503, |
|
"grad_norm": 1.0754883289337158, |
|
"learning_rate": 3.208333333333333e-08, |
|
"loss": 0.2635, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.672532517214996, |
|
"grad_norm": 1.3197473287582397, |
|
"learning_rate": 2.1770833333333332e-08, |
|
"loss": 0.2731, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.672532517214996, |
|
"eval_loss": 0.1455078125, |
|
"eval_runtime": 989.4621, |
|
"eval_samples_per_second": 1.13, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.268710838896506, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.749043611323642, |
|
"grad_norm": 1.156948208808899, |
|
"learning_rate": 1.1354166666666667e-08, |
|
"loss": 0.2589, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.8255547054322876, |
|
"grad_norm": 1.1573657989501953, |
|
"learning_rate": 9.375e-10, |
|
"loss": 0.2677, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.8255547054322876, |
|
"eval_loss": 0.145263671875, |
|
"eval_runtime": 992.9783, |
|
"eval_samples_per_second": 1.126, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 3.2635854592980795, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.8255547054322876, |
|
"step": 5000, |
|
"total_flos": 1.0871892140465376e+21, |
|
"train_loss": 0.9307616455078125, |
|
"train_runtime": 97022.7442, |
|
"train_samples_per_second": 3.298, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0871892140465376e+21, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|