{ "best_metric": 0.9152691968225949, "best_model_checkpoint": "trillsson3-ft-keyword-spotting-13/checkpoint-11970", "epoch": 19.999373825923605, "global_step": 15960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 1.7669172932330825e-05, "loss": 7.8872, "step": 100 }, { "epoch": 0.25, "learning_rate": 3.646616541353383e-05, "loss": 6.0261, "step": 200 }, { "epoch": 0.38, "learning_rate": 5.526315789473683e-05, "loss": 3.8766, "step": 300 }, { "epoch": 0.5, "learning_rate": 7.406015037593984e-05, "loss": 3.1036, "step": 400 }, { "epoch": 0.63, "learning_rate": 9.285714285714286e-05, "loss": 2.5949, "step": 500 }, { "epoch": 0.75, "learning_rate": 0.00011165413533834586, "loss": 2.158, "step": 600 }, { "epoch": 0.88, "learning_rate": 0.00013045112781954885, "loss": 1.7756, "step": 700 }, { "epoch": 1.0, "eval_accuracy": 0.6395998823183289, "eval_loss": 0.9283319711685181, "eval_runtime": 153.4542, "eval_samples_per_second": 44.3, "eval_steps_per_second": 1.388, "step": 798 }, { "epoch": 1.0, "learning_rate": 0.00014924812030075185, "loss": 1.4696, "step": 800 }, { "epoch": 1.13, "learning_rate": 0.00016804511278195486, "loss": 1.2914, "step": 900 }, { "epoch": 1.25, "learning_rate": 0.0001868421052631579, "loss": 1.1715, "step": 1000 }, { "epoch": 1.38, "learning_rate": 0.0002056390977443609, "loss": 1.087, "step": 1100 }, { "epoch": 1.5, "learning_rate": 0.0002244360902255639, "loss": 0.978, "step": 1200 }, { "epoch": 1.63, "learning_rate": 0.0002432330827067669, "loss": 0.92, "step": 1300 }, { "epoch": 1.75, "learning_rate": 0.0002620300751879699, "loss": 0.8856, "step": 1400 }, { "epoch": 1.88, "learning_rate": 0.0002808270676691729, "loss": 0.8631, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.8573109738158282, "eval_loss": 0.4884476065635681, "eval_runtime": 147.3242, "eval_samples_per_second": 46.143, "eval_steps_per_second": 1.446, "step": 1596 }, { "epoch": 2.01, "learning_rate": 0.0002996240601503759, "loss": 0.837, "step": 1600 }, { "epoch": 2.13, "learning_rate": 0.000297953216374269, "loss": 0.8428, "step": 1700 }, { "epoch": 2.26, "learning_rate": 0.0002958646616541353, "loss": 0.7937, "step": 1800 }, { "epoch": 2.38, "learning_rate": 0.0002937761069340017, "loss": 0.7866, "step": 1900 }, { "epoch": 2.51, "learning_rate": 0.00029168755221386795, "loss": 0.7648, "step": 2000 }, { "epoch": 2.63, "learning_rate": 0.00028959899749373433, "loss": 0.7622, "step": 2100 }, { "epoch": 2.76, "learning_rate": 0.00028751044277360065, "loss": 0.7636, "step": 2200 }, { "epoch": 2.88, "learning_rate": 0.000285421888053467, "loss": 0.7551, "step": 2300 }, { "epoch": 3.0, "eval_accuracy": 0.8832009414533687, "eval_loss": 0.3967166543006897, "eval_runtime": 146.488, "eval_samples_per_second": 46.407, "eval_steps_per_second": 1.454, "step": 2394 }, { "epoch": 3.01, "learning_rate": 0.0002833333333333333, "loss": 0.7545, "step": 2400 }, { "epoch": 3.13, "learning_rate": 0.0002812447786131997, "loss": 0.7217, "step": 2500 }, { "epoch": 3.26, "learning_rate": 0.00027915622389306595, "loss": 0.7158, "step": 2600 }, { "epoch": 3.38, "learning_rate": 0.0002770676691729323, "loss": 0.7164, "step": 2700 }, { "epoch": 3.51, "learning_rate": 0.00027497911445279865, "loss": 0.6992, "step": 2800 }, { "epoch": 3.63, "learning_rate": 0.000272890559732665, "loss": 0.7291, "step": 2900 }, { "epoch": 3.76, "learning_rate": 0.0002708020050125313, "loss": 0.6976, "step": 3000 }, { "epoch": 3.88, "learning_rate": 0.0002687134502923976, "loss": 0.6968, "step": 3100 }, { "epoch": 4.0, "eval_accuracy": 0.8989408649602825, "eval_loss": 0.36444538831710815, "eval_runtime": 146.8663, "eval_samples_per_second": 46.287, "eval_steps_per_second": 1.45, "step": 3192 }, { "epoch": 4.01, "learning_rate": 0.00026662489557226395, "loss": 0.7219, "step": 3200 }, { "epoch": 4.14, "learning_rate": 0.0002645363408521303, "loss": 0.6859, "step": 3300 }, { "epoch": 4.26, "learning_rate": 0.00026244778613199665, "loss": 0.7076, "step": 3400 }, { "epoch": 4.39, "learning_rate": 0.00026035923141186297, "loss": 0.6935, "step": 3500 }, { "epoch": 4.51, "learning_rate": 0.0002582706766917293, "loss": 0.7098, "step": 3600 }, { "epoch": 4.64, "learning_rate": 0.0002561821219715956, "loss": 0.6902, "step": 3700 }, { "epoch": 4.76, "learning_rate": 0.00025409356725146194, "loss": 0.6824, "step": 3800 }, { "epoch": 4.89, "learning_rate": 0.0002520050125313283, "loss": 0.67, "step": 3900 }, { "epoch": 5.0, "eval_accuracy": 0.9057075610473668, "eval_loss": 0.342781126499176, "eval_runtime": 148.4976, "eval_samples_per_second": 45.779, "eval_steps_per_second": 1.434, "step": 3990 }, { "epoch": 5.01, "learning_rate": 0.00024991645781119465, "loss": 0.6909, "step": 4000 }, { "epoch": 5.14, "learning_rate": 0.00024782790309106097, "loss": 0.7105, "step": 4100 }, { "epoch": 5.26, "learning_rate": 0.0002457393483709273, "loss": 0.7022, "step": 4200 }, { "epoch": 5.39, "learning_rate": 0.00024365079365079364, "loss": 0.6867, "step": 4300 }, { "epoch": 5.51, "learning_rate": 0.00024156223893065997, "loss": 0.6881, "step": 4400 }, { "epoch": 5.64, "learning_rate": 0.00023947368421052632, "loss": 0.6882, "step": 4500 }, { "epoch": 5.76, "learning_rate": 0.00023738512949039262, "loss": 0.6711, "step": 4600 }, { "epoch": 5.89, "learning_rate": 0.00023529657477025897, "loss": 0.6854, "step": 4700 }, { "epoch": 6.0, "eval_accuracy": 0.902618417181524, "eval_loss": 0.34081852436065674, "eval_runtime": 149.584, "eval_samples_per_second": 45.446, "eval_steps_per_second": 1.424, "step": 4788 }, { "epoch": 6.02, "learning_rate": 0.0002332080200501253, "loss": 0.6815, "step": 4800 }, { "epoch": 6.14, "learning_rate": 0.00023111946532999164, "loss": 0.6863, "step": 4900 }, { "epoch": 6.27, "learning_rate": 0.00022903091060985797, "loss": 0.6843, "step": 5000 }, { "epoch": 6.39, "learning_rate": 0.0002269423558897243, "loss": 0.6677, "step": 5100 }, { "epoch": 6.52, "learning_rate": 0.00022485380116959061, "loss": 0.7096, "step": 5200 }, { "epoch": 6.64, "learning_rate": 0.00022276524644945696, "loss": 0.6534, "step": 5300 }, { "epoch": 6.77, "learning_rate": 0.0002206766917293233, "loss": 0.6773, "step": 5400 }, { "epoch": 6.89, "learning_rate": 0.00021858813700918964, "loss": 0.6701, "step": 5500 }, { "epoch": 7.0, "eval_accuracy": 0.901294498381877, "eval_loss": 0.3358515202999115, "eval_runtime": 149.9158, "eval_samples_per_second": 45.345, "eval_steps_per_second": 1.421, "step": 5586 }, { "epoch": 7.02, "learning_rate": 0.00021649958228905596, "loss": 0.6695, "step": 5600 }, { "epoch": 7.14, "learning_rate": 0.00021443191311612363, "loss": 0.7058, "step": 5700 }, { "epoch": 7.27, "learning_rate": 0.00021234335839598995, "loss": 0.6577, "step": 5800 }, { "epoch": 7.39, "learning_rate": 0.0002102548036758563, "loss": 0.6799, "step": 5900 }, { "epoch": 7.52, "learning_rate": 0.00020816624895572263, "loss": 0.6727, "step": 6000 }, { "epoch": 7.64, "learning_rate": 0.00020607769423558895, "loss": 0.6397, "step": 6100 }, { "epoch": 7.77, "learning_rate": 0.00020398913951545527, "loss": 0.6845, "step": 6200 }, { "epoch": 7.89, "learning_rate": 0.00020190058479532163, "loss": 0.6734, "step": 6300 }, { "epoch": 8.0, "eval_accuracy": 0.9058546631362165, "eval_loss": 0.3285204768180847, "eval_runtime": 149.6806, "eval_samples_per_second": 45.417, "eval_steps_per_second": 1.423, "step": 6384 }, { "epoch": 8.02, "learning_rate": 0.00019981203007518795, "loss": 0.6753, "step": 6400 }, { "epoch": 8.15, "learning_rate": 0.0001977234753550543, "loss": 0.6674, "step": 6500 }, { "epoch": 8.27, "learning_rate": 0.00019563492063492062, "loss": 0.6704, "step": 6600 }, { "epoch": 8.4, "learning_rate": 0.00019354636591478695, "loss": 0.686, "step": 6700 }, { "epoch": 8.52, "learning_rate": 0.00019145781119465327, "loss": 0.6792, "step": 6800 }, { "epoch": 8.65, "learning_rate": 0.00018936925647451962, "loss": 0.6698, "step": 6900 }, { "epoch": 8.77, "learning_rate": 0.00018728070175438595, "loss": 0.6755, "step": 7000 }, { "epoch": 8.9, "learning_rate": 0.0001851921470342523, "loss": 0.6581, "step": 7100 }, { "epoch": 9.0, "eval_accuracy": 0.9095322153574581, "eval_loss": 0.31989586353302, "eval_runtime": 148.5082, "eval_samples_per_second": 45.775, "eval_steps_per_second": 1.434, "step": 7182 }, { "epoch": 9.02, "learning_rate": 0.0001831035923141186, "loss": 0.6601, "step": 7200 }, { "epoch": 9.15, "learning_rate": 0.00018101503759398495, "loss": 0.6746, "step": 7300 }, { "epoch": 9.27, "learning_rate": 0.00017892648287385127, "loss": 0.6805, "step": 7400 }, { "epoch": 9.4, "learning_rate": 0.00017683792815371762, "loss": 0.6777, "step": 7500 }, { "epoch": 9.52, "learning_rate": 0.00017474937343358395, "loss": 0.6619, "step": 7600 }, { "epoch": 9.65, "learning_rate": 0.0001726608187134503, "loss": 0.6625, "step": 7700 }, { "epoch": 9.77, "learning_rate": 0.0001705722639933166, "loss": 0.665, "step": 7800 }, { "epoch": 9.9, "learning_rate": 0.00016848370927318294, "loss": 0.6557, "step": 7900 }, { "epoch": 10.0, "eval_accuracy": 0.8986466607825832, "eval_loss": 0.3300594091415405, "eval_runtime": 148.7316, "eval_samples_per_second": 45.706, "eval_steps_per_second": 1.432, "step": 7980 }, { "epoch": 10.03, "learning_rate": 0.00016639515455304927, "loss": 0.6688, "step": 8000 }, { "epoch": 10.15, "learning_rate": 0.00016432748538011696, "loss": 0.6642, "step": 8100 }, { "epoch": 10.28, "learning_rate": 0.00016223893065998326, "loss": 0.6542, "step": 8200 }, { "epoch": 10.4, "learning_rate": 0.0001601503759398496, "loss": 0.6461, "step": 8300 }, { "epoch": 10.53, "learning_rate": 0.00015806182121971593, "loss": 0.6721, "step": 8400 }, { "epoch": 10.65, "learning_rate": 0.00015597326649958228, "loss": 0.6642, "step": 8500 }, { "epoch": 10.78, "learning_rate": 0.0001538847117794486, "loss": 0.6529, "step": 8600 }, { "epoch": 10.9, "learning_rate": 0.00015179615705931496, "loss": 0.6768, "step": 8700 }, { "epoch": 11.0, "eval_accuracy": 0.9046778464254193, "eval_loss": 0.31744641065597534, "eval_runtime": 148.9285, "eval_samples_per_second": 45.646, "eval_steps_per_second": 1.43, "step": 8778 }, { "epoch": 11.03, "learning_rate": 0.00014970760233918125, "loss": 0.7036, "step": 8800 }, { "epoch": 11.15, "learning_rate": 0.0001476190476190476, "loss": 0.6825, "step": 8900 }, { "epoch": 11.28, "learning_rate": 0.00014553049289891393, "loss": 0.6693, "step": 9000 }, { "epoch": 11.4, "learning_rate": 0.00014344193817878025, "loss": 0.661, "step": 9100 }, { "epoch": 11.53, "learning_rate": 0.0001413533834586466, "loss": 0.6625, "step": 9200 }, { "epoch": 11.65, "learning_rate": 0.00013926482873851293, "loss": 0.6523, "step": 9300 }, { "epoch": 11.78, "learning_rate": 0.00013717627401837925, "loss": 0.6765, "step": 9400 }, { "epoch": 11.9, "learning_rate": 0.0001350877192982456, "loss": 0.6459, "step": 9500 }, { "epoch": 12.0, "eval_accuracy": 0.903059723448073, "eval_loss": 0.3192310631275177, "eval_runtime": 149.2235, "eval_samples_per_second": 45.556, "eval_steps_per_second": 1.427, "step": 9576 }, { "epoch": 12.03, "learning_rate": 0.00013299916457811193, "loss": 0.6518, "step": 9600 }, { "epoch": 12.16, "learning_rate": 0.00013091060985797825, "loss": 0.6644, "step": 9700 }, { "epoch": 12.28, "learning_rate": 0.0001288220551378446, "loss": 0.6565, "step": 9800 }, { "epoch": 12.41, "learning_rate": 0.00012673350041771093, "loss": 0.6647, "step": 9900 }, { "epoch": 12.53, "learning_rate": 0.00012464494569757725, "loss": 0.6651, "step": 10000 }, { "epoch": 12.66, "learning_rate": 0.0001225563909774436, "loss": 0.6697, "step": 10100 }, { "epoch": 12.78, "learning_rate": 0.00012046783625730992, "loss": 0.6559, "step": 10200 }, { "epoch": 12.91, "learning_rate": 0.00011837928153717626, "loss": 0.6607, "step": 10300 }, { "epoch": 13.0, "eval_accuracy": 0.9065901735804649, "eval_loss": 0.3172565996646881, "eval_runtime": 149.6048, "eval_samples_per_second": 45.44, "eval_steps_per_second": 1.424, "step": 10374 }, { "epoch": 13.03, "learning_rate": 0.00011629072681704259, "loss": 0.6435, "step": 10400 }, { "epoch": 13.16, "learning_rate": 0.00011420217209690892, "loss": 0.6727, "step": 10500 }, { "epoch": 13.28, "learning_rate": 0.00011211361737677526, "loss": 0.6701, "step": 10600 }, { "epoch": 13.41, "learning_rate": 0.00011004594820384294, "loss": 0.6474, "step": 10700 }, { "epoch": 13.53, "learning_rate": 0.00010795739348370928, "loss": 0.6307, "step": 10800 }, { "epoch": 13.66, "learning_rate": 0.0001058688387635756, "loss": 0.6748, "step": 10900 }, { "epoch": 13.78, "learning_rate": 0.00010378028404344194, "loss": 0.6382, "step": 11000 }, { "epoch": 13.91, "learning_rate": 0.00010169172932330827, "loss": 0.656, "step": 11100 }, { "epoch": 14.0, "eval_accuracy": 0.9093851132686084, "eval_loss": 0.3141985833644867, "eval_runtime": 152.4058, "eval_samples_per_second": 44.605, "eval_steps_per_second": 1.398, "step": 11172 }, { "epoch": 14.04, "learning_rate": 9.96031746031746e-05, "loss": 0.6329, "step": 11200 }, { "epoch": 14.16, "learning_rate": 9.751461988304094e-05, "loss": 0.6499, "step": 11300 }, { "epoch": 14.29, "learning_rate": 9.542606516290727e-05, "loss": 0.6651, "step": 11400 }, { "epoch": 14.41, "learning_rate": 9.33375104427736e-05, "loss": 0.6593, "step": 11500 }, { "epoch": 14.54, "learning_rate": 9.124895572263994e-05, "loss": 0.6637, "step": 11600 }, { "epoch": 14.66, "learning_rate": 8.916040100250626e-05, "loss": 0.6527, "step": 11700 }, { "epoch": 14.79, "learning_rate": 8.70718462823726e-05, "loss": 0.6358, "step": 11800 }, { "epoch": 14.91, "learning_rate": 8.498329156223893e-05, "loss": 0.6302, "step": 11900 }, { "epoch": 15.0, "eval_accuracy": 0.9152691968225949, "eval_loss": 0.30927804112434387, "eval_runtime": 149.4384, "eval_samples_per_second": 45.49, "eval_steps_per_second": 1.425, "step": 11970 }, { "epoch": 15.04, "learning_rate": 8.289473684210526e-05, "loss": 0.6972, "step": 12000 }, { "epoch": 15.16, "learning_rate": 8.08061821219716e-05, "loss": 0.6475, "step": 12100 }, { "epoch": 15.29, "learning_rate": 7.871762740183793e-05, "loss": 0.6497, "step": 12200 }, { "epoch": 15.41, "learning_rate": 7.662907268170426e-05, "loss": 0.6496, "step": 12300 }, { "epoch": 15.54, "learning_rate": 7.454051796157058e-05, "loss": 0.6397, "step": 12400 }, { "epoch": 15.66, "learning_rate": 7.245196324143692e-05, "loss": 0.6485, "step": 12500 }, { "epoch": 15.79, "learning_rate": 7.036340852130326e-05, "loss": 0.6785, "step": 12600 }, { "epoch": 15.91, "learning_rate": 6.827485380116958e-05, "loss": 0.636, "step": 12700 }, { "epoch": 16.0, "eval_accuracy": 0.9043836422477199, "eval_loss": 0.3184495270252228, "eval_runtime": 150.904, "eval_samples_per_second": 45.049, "eval_steps_per_second": 1.411, "step": 12768 }, { "epoch": 16.04, "learning_rate": 6.618629908103592e-05, "loss": 0.6455, "step": 12800 }, { "epoch": 16.17, "learning_rate": 6.409774436090225e-05, "loss": 0.629, "step": 12900 }, { "epoch": 16.29, "learning_rate": 6.200918964076858e-05, "loss": 0.6345, "step": 13000 }, { "epoch": 16.42, "learning_rate": 5.9920634920634916e-05, "loss": 0.6492, "step": 13100 }, { "epoch": 16.54, "learning_rate": 5.783208020050125e-05, "loss": 0.6486, "step": 13200 }, { "epoch": 16.67, "learning_rate": 5.5743525480367584e-05, "loss": 0.6422, "step": 13300 }, { "epoch": 16.79, "learning_rate": 5.367585630743525e-05, "loss": 0.6606, "step": 13400 }, { "epoch": 16.92, "learning_rate": 5.158730158730158e-05, "loss": 0.6327, "step": 13500 }, { "epoch": 17.0, "eval_accuracy": 0.911738746690203, "eval_loss": 0.3104247748851776, "eval_runtime": 150.9624, "eval_samples_per_second": 45.031, "eval_steps_per_second": 1.411, "step": 13566 }, { "epoch": 17.04, "learning_rate": 4.949874686716791e-05, "loss": 0.6315, "step": 13600 }, { "epoch": 17.17, "learning_rate": 4.741019214703425e-05, "loss": 0.6487, "step": 13700 }, { "epoch": 17.29, "learning_rate": 4.532163742690058e-05, "loss": 0.6465, "step": 13800 }, { "epoch": 17.42, "learning_rate": 4.323308270676691e-05, "loss": 0.6309, "step": 13900 }, { "epoch": 17.54, "learning_rate": 4.1144527986633246e-05, "loss": 0.6369, "step": 14000 }, { "epoch": 17.67, "learning_rate": 3.905597326649958e-05, "loss": 0.655, "step": 14100 }, { "epoch": 17.79, "learning_rate": 3.696741854636591e-05, "loss": 0.6558, "step": 14200 }, { "epoch": 17.92, "learning_rate": 3.4878863826232245e-05, "loss": 0.6428, "step": 14300 }, { "epoch": 18.0, "eval_accuracy": 0.9083553986466608, "eval_loss": 0.315799742937088, "eval_runtime": 150.6062, "eval_samples_per_second": 45.138, "eval_steps_per_second": 1.414, "step": 14364 }, { "epoch": 18.05, "learning_rate": 3.2790309106098576e-05, "loss": 0.6466, "step": 14400 }, { "epoch": 18.17, "learning_rate": 3.070175438596491e-05, "loss": 0.6219, "step": 14500 }, { "epoch": 18.3, "learning_rate": 2.861319966583124e-05, "loss": 0.6351, "step": 14600 }, { "epoch": 18.42, "learning_rate": 2.6524644945697575e-05, "loss": 0.6638, "step": 14700 }, { "epoch": 18.55, "learning_rate": 2.4436090225563906e-05, "loss": 0.6414, "step": 14800 }, { "epoch": 18.67, "learning_rate": 2.234753550543024e-05, "loss": 0.6617, "step": 14900 }, { "epoch": 18.8, "learning_rate": 2.025898078529657e-05, "loss": 0.6359, "step": 15000 }, { "epoch": 18.92, "learning_rate": 1.8170426065162904e-05, "loss": 0.6515, "step": 15100 }, { "epoch": 19.0, "eval_accuracy": 0.9096793174463077, "eval_loss": 0.312863290309906, "eval_runtime": 150.6838, "eval_samples_per_second": 45.114, "eval_steps_per_second": 1.414, "step": 15162 }, { "epoch": 19.05, "learning_rate": 1.608187134502924e-05, "loss": 0.6385, "step": 15200 }, { "epoch": 19.17, "learning_rate": 1.3993316624895571e-05, "loss": 0.634, "step": 15300 }, { "epoch": 19.3, "learning_rate": 1.1904761904761903e-05, "loss": 0.6509, "step": 15400 }, { "epoch": 19.42, "learning_rate": 9.816207184628236e-06, "loss": 0.6388, "step": 15500 }, { "epoch": 19.55, "learning_rate": 7.727652464494568e-06, "loss": 0.6493, "step": 15600 }, { "epoch": 19.67, "learning_rate": 5.6390977443609015e-06, "loss": 0.6526, "step": 15700 }, { "epoch": 19.8, "learning_rate": 3.5505430242272343e-06, "loss": 0.6309, "step": 15800 }, { "epoch": 19.92, "learning_rate": 1.4619883040935671e-06, "loss": 0.6441, "step": 15900 }, { "epoch": 20.0, "eval_accuracy": 0.909973521624007, "eval_loss": 0.31152603030204773, "eval_runtime": 152.3497, "eval_samples_per_second": 44.621, "eval_steps_per_second": 1.398, "step": 15960 }, { "epoch": 20.0, "step": 15960, "total_flos": 0.0, "train_loss": 0.8353394886007285, "train_runtime": 26863.0447, "train_samples_per_second": 38.04, "train_steps_per_second": 0.594 } ], "max_steps": 15960, "num_train_epochs": 20, "total_flos": 0.0, "trial_name": null, "trial_params": null }