|
{ |
|
"best_metric": 0.9040894380700206, |
|
"best_model_checkpoint": "trillsson3-ft-keyword-spotting/checkpoint-3192", |
|
"epoch": 9.999373825923607, |
|
"global_step": 7980, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.533834586466165e-05, |
|
"loss": 7.4799, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.293233082706766e-05, |
|
"loss": 4.7288, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00011052631578947366, |
|
"loss": 3.0455, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014812030075187968, |
|
"loss": 2.3495, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 1.836, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022330827067669172, |
|
"loss": 1.4179, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002609022556390977, |
|
"loss": 1.1824, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7488967343336276, |
|
"eval_loss": 0.6477929949760437, |
|
"eval_runtime": 42.5234, |
|
"eval_samples_per_second": 159.865, |
|
"eval_steps_per_second": 2.516, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002984962406015037, |
|
"loss": 1.0261, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00029598997493734334, |
|
"loss": 0.9596, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.000291812865497076, |
|
"loss": 0.8732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00028763575605680864, |
|
"loss": 0.8509, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00028345864661654134, |
|
"loss": 0.8162, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.000279281537176274, |
|
"loss": 0.7603, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00027510442773600664, |
|
"loss": 0.7638, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00027092731829573934, |
|
"loss": 0.7448, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8727566931450427, |
|
"eval_loss": 0.4273872375488281, |
|
"eval_runtime": 42.1254, |
|
"eval_samples_per_second": 161.375, |
|
"eval_steps_per_second": 2.54, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.000266750208855472, |
|
"loss": 0.7268, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00026257309941520463, |
|
"loss": 0.7503, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00025839598997493734, |
|
"loss": 0.719, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00025421888053467, |
|
"loss": 0.7174, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00025004177109440263, |
|
"loss": 0.6869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00024586466165413533, |
|
"loss": 0.7113, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00024168755221386798, |
|
"loss": 0.7011, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00023751044277360066, |
|
"loss": 0.7089, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8949691085613416, |
|
"eval_loss": 0.3723289370536804, |
|
"eval_runtime": 41.1393, |
|
"eval_samples_per_second": 165.243, |
|
"eval_steps_per_second": 2.601, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.0002333333333333333, |
|
"loss": 0.6834, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00022915622389306598, |
|
"loss": 0.6804, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00022497911445279865, |
|
"loss": 0.6771, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002208020050125313, |
|
"loss": 0.684, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00021662489557226398, |
|
"loss": 0.6636, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00021244778613199665, |
|
"loss": 0.7003, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0002082706766917293, |
|
"loss": 0.6594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00020409356725146197, |
|
"loss": 0.6781, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9040894380700206, |
|
"eval_loss": 0.3562973439693451, |
|
"eval_runtime": 42.576, |
|
"eval_samples_per_second": 159.667, |
|
"eval_steps_per_second": 2.513, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.00019991645781119465, |
|
"loss": 0.6921, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.0001957393483709273, |
|
"loss": 0.6615, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00019156223893065997, |
|
"loss": 0.6707, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00018738512949039265, |
|
"loss": 0.6511, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0001832080200501253, |
|
"loss": 0.6974, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00017903091060985797, |
|
"loss": 0.6447, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00017485380116959065, |
|
"loss": 0.643, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0001706766917293233, |
|
"loss": 0.6386, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8986466607825832, |
|
"eval_loss": 0.3440994322299957, |
|
"eval_runtime": 42.3928, |
|
"eval_samples_per_second": 160.358, |
|
"eval_steps_per_second": 2.524, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00016649958228905597, |
|
"loss": 0.6611, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0001623642439431913, |
|
"loss": 0.6634, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00015818713450292397, |
|
"loss": 0.6419, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00015401002506265662, |
|
"loss": 0.6341, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001498329156223893, |
|
"loss": 0.6283, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.00014565580618212197, |
|
"loss": 0.6551, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00014147869674185462, |
|
"loss": 0.6318, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.0001373015873015873, |
|
"loss": 0.6342, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8993821712268314, |
|
"eval_loss": 0.3380272388458252, |
|
"eval_runtime": 42.8693, |
|
"eval_samples_per_second": 158.575, |
|
"eval_steps_per_second": 2.496, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00013312447786131997, |
|
"loss": 0.6371, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00012894736842105261, |
|
"loss": 0.6484, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0001247702589807853, |
|
"loss": 0.6331, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00012059314954051795, |
|
"loss": 0.6201, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00011641604010025061, |
|
"loss": 0.6619, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00011223893065998329, |
|
"loss": 0.6337, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.00010806182121971595, |
|
"loss": 0.6356, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00010388471177944861, |
|
"loss": 0.6275, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8982053545160341, |
|
"eval_loss": 0.33755984902381897, |
|
"eval_runtime": 42.4768, |
|
"eval_samples_per_second": 160.04, |
|
"eval_steps_per_second": 2.519, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 9.970760233918129e-05, |
|
"loss": 0.6359, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 9.553049289891395e-05, |
|
"loss": 0.6502, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 9.135338345864661e-05, |
|
"loss": 0.6138, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 8.717627401837928e-05, |
|
"loss": 0.6309, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 8.299916457811194e-05, |
|
"loss": 0.6386, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 7.88220551378446e-05, |
|
"loss": 0.6149, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 7.464494569757727e-05, |
|
"loss": 0.6218, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 7.050960735171261e-05, |
|
"loss": 0.6349, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9014416004707266, |
|
"eval_loss": 0.3333294689655304, |
|
"eval_runtime": 43.296, |
|
"eval_samples_per_second": 157.012, |
|
"eval_steps_per_second": 2.471, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 6.633249791144527e-05, |
|
"loss": 0.6115, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 6.215538847117793e-05, |
|
"loss": 0.6463, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 5.79782790309106e-05, |
|
"loss": 0.6183, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 5.380116959064327e-05, |
|
"loss": 0.6475, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 4.962406015037593e-05, |
|
"loss": 0.6437, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 4.54469507101086e-05, |
|
"loss": 0.6265, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.1269841269841266e-05, |
|
"loss": 0.6326, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.709273182957393e-05, |
|
"loss": 0.6261, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9024713150926743, |
|
"eval_loss": 0.32949280738830566, |
|
"eval_runtime": 42.5233, |
|
"eval_samples_per_second": 159.865, |
|
"eval_steps_per_second": 2.516, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.2915622389306596e-05, |
|
"loss": 0.6306, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 2.873851294903926e-05, |
|
"loss": 0.6362, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.4561403508771925e-05, |
|
"loss": 0.6318, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.0384294068504594e-05, |
|
"loss": 0.6506, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 1.620718462823726e-05, |
|
"loss": 0.6202, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.2030075187969923e-05, |
|
"loss": 0.6217, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 7.852965747702588e-06, |
|
"loss": 0.6307, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.6758563074352545e-06, |
|
"loss": 0.6188, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9024713150926743, |
|
"eval_loss": 0.33220550417900085, |
|
"eval_runtime": 42.0746, |
|
"eval_samples_per_second": 161.57, |
|
"eval_steps_per_second": 2.543, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7980, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8915880279732229, |
|
"train_runtime": 4529.6112, |
|
"train_samples_per_second": 112.8, |
|
"train_steps_per_second": 1.762 |
|
} |
|
], |
|
"max_steps": 7980, |
|
"num_train_epochs": 10, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|