|
{ |
|
"best_metric": 0.7119987566055331, |
|
"best_model_checkpoint": "xls-r-greek-aivaliot/checkpoint-14196", |
|
"epoch": 35.0, |
|
"eval_steps": 500, |
|
"global_step": 19110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 6.1718, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 3.1837, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.6650782444993528, |
|
"eval_loss": 2.3546931743621826, |
|
"eval_runtime": 42.745, |
|
"eval_samples_per_second": 25.547, |
|
"eval_steps_per_second": 3.205, |
|
"eval_wer": 0.972023624494871, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00029838796346050506, |
|
"loss": 2.2974, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002951638903815153, |
|
"loss": 2.0321, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00029193981730252553, |
|
"loss": 1.9583, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.45720084715848924, |
|
"eval_loss": 1.9236657619476318, |
|
"eval_runtime": 44.6726, |
|
"eval_samples_per_second": 24.444, |
|
"eval_steps_per_second": 3.067, |
|
"eval_wer": 0.8734846129934721, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0002887157442235357, |
|
"loss": 1.7559, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00028549167114454594, |
|
"loss": 1.6492, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002822675980655561, |
|
"loss": 1.6148, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.450847158489234, |
|
"eval_loss": 1.817557692527771, |
|
"eval_runtime": 41.1726, |
|
"eval_samples_per_second": 26.522, |
|
"eval_steps_per_second": 3.327, |
|
"eval_wer": 0.8234379857009636, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00027904352498656635, |
|
"loss": 1.421, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002758194519075766, |
|
"loss": 1.423, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.4232556771384869, |
|
"eval_loss": 1.8342238664627075, |
|
"eval_runtime": 39.2094, |
|
"eval_samples_per_second": 27.85, |
|
"eval_steps_per_second": 3.494, |
|
"eval_wer": 0.820329499533727, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00027259537882858676, |
|
"loss": 1.4186, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.000269371305749597, |
|
"loss": 1.2185, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00026614723267060717, |
|
"loss": 1.2324, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.4242852100247088, |
|
"eval_loss": 1.7897675037384033, |
|
"eval_runtime": 45.221, |
|
"eval_samples_per_second": 24.148, |
|
"eval_steps_per_second": 3.03, |
|
"eval_wer": 0.8047870686975443, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00026292315959161735, |
|
"loss": 1.135, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0002596990865126276, |
|
"loss": 1.0774, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0002564750134336378, |
|
"loss": 1.0871, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.40566537239675254, |
|
"eval_loss": 1.783974051475525, |
|
"eval_runtime": 39.7961, |
|
"eval_samples_per_second": 27.44, |
|
"eval_steps_per_second": 3.443, |
|
"eval_wer": 0.7671743860739819, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.000253250940354648, |
|
"loss": 1.0111, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0002500268672756582, |
|
"loss": 0.9552, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00024680279419666846, |
|
"loss": 0.9449, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.3969584657018473, |
|
"eval_loss": 1.9680403470993042, |
|
"eval_runtime": 41.0562, |
|
"eval_samples_per_second": 26.598, |
|
"eval_steps_per_second": 3.337, |
|
"eval_wer": 0.7777432390425862, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00024357872111767866, |
|
"loss": 0.8435, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.00024035464803868887, |
|
"loss": 0.8168, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.4033415695964231, |
|
"eval_loss": 1.956693172454834, |
|
"eval_runtime": 38.8124, |
|
"eval_samples_per_second": 28.135, |
|
"eval_steps_per_second": 3.53, |
|
"eval_wer": 0.7690394777743239, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00023713057495969905, |
|
"loss": 0.8366, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 0.00023390650188070925, |
|
"loss": 0.728, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00023068242880171949, |
|
"loss": 0.7254, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.408224496999647, |
|
"eval_loss": 2.184379816055298, |
|
"eval_runtime": 39.1148, |
|
"eval_samples_per_second": 27.918, |
|
"eval_steps_per_second": 3.503, |
|
"eval_wer": 0.7878458190861051, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0002274583557227297, |
|
"loss": 0.7016, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.0002242342826437399, |
|
"loss": 0.6563, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.0002210102095647501, |
|
"loss": 0.6584, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.3924579362277915, |
|
"eval_loss": 2.313096046447754, |
|
"eval_runtime": 38.9682, |
|
"eval_samples_per_second": 28.023, |
|
"eval_steps_per_second": 3.516, |
|
"eval_wer": 0.7681069319241529, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 0.0002177861364857603, |
|
"loss": 0.6053, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00021456206340677054, |
|
"loss": 0.5844, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.00021133799032778075, |
|
"loss": 0.5825, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.39419343452170846, |
|
"eval_loss": 2.4024157524108887, |
|
"eval_runtime": 38.7244, |
|
"eval_samples_per_second": 28.199, |
|
"eval_steps_per_second": 3.538, |
|
"eval_wer": 0.7614236866645944, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 0.00020811391724879095, |
|
"loss": 0.4944, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 0.00020488984416980116, |
|
"loss": 0.5214, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.37542652076714905, |
|
"eval_loss": 2.2336461544036865, |
|
"eval_runtime": 39.0775, |
|
"eval_samples_per_second": 27.944, |
|
"eval_steps_per_second": 3.506, |
|
"eval_wer": 0.7415293751942804, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.0002016657710908114, |
|
"loss": 0.4977, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.0001984416980118216, |
|
"loss": 0.4326, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 0.0001952176249328318, |
|
"loss": 0.4618, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.38816331333098014, |
|
"eval_loss": 2.4235401153564453, |
|
"eval_runtime": 38.6611, |
|
"eval_samples_per_second": 28.245, |
|
"eval_steps_per_second": 3.544, |
|
"eval_wer": 0.7475909232203917, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.000191993551853842, |
|
"loss": 0.4269, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 0.0001887694787748522, |
|
"loss": 0.3915, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 0.00018554540569586244, |
|
"loss": 0.4034, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.38966348982233207, |
|
"eval_loss": 2.432621479034424, |
|
"eval_runtime": 39.0269, |
|
"eval_samples_per_second": 27.981, |
|
"eval_steps_per_second": 3.51, |
|
"eval_wer": 0.7384208890270438, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 0.00018232133261687265, |
|
"loss": 0.3616, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 0.00017909725953788285, |
|
"loss": 0.3638, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.38351570773032123, |
|
"eval_loss": 2.5917038917541504, |
|
"eval_runtime": 38.9489, |
|
"eval_samples_per_second": 28.037, |
|
"eval_steps_per_second": 3.517, |
|
"eval_wer": 0.7409076779608331, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 0.00017587318645889303, |
|
"loss": 0.3469, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 0.00017264911337990324, |
|
"loss": 0.2989, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.0001694250403009135, |
|
"loss": 0.307, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.38233909871749616, |
|
"eval_loss": 2.653606414794922, |
|
"eval_runtime": 38.6877, |
|
"eval_samples_per_second": 28.226, |
|
"eval_steps_per_second": 3.541, |
|
"eval_wer": 0.7483680447622008, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 0.00016620096722192368, |
|
"loss": 0.3028, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 0.00016297689414293388, |
|
"loss": 0.2857, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 0.00015975282106394409, |
|
"loss": 0.2794, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.3855159430521238, |
|
"eval_loss": 2.7668089866638184, |
|
"eval_runtime": 38.9844, |
|
"eval_samples_per_second": 28.011, |
|
"eval_steps_per_second": 3.514, |
|
"eval_wer": 0.7413739508859185, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 0.00015652874798495432, |
|
"loss": 0.2484, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 0.00015330467490596452, |
|
"loss": 0.245, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 0.00015008060182697473, |
|
"loss": 0.2445, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.3990469466996117, |
|
"eval_loss": 2.955599069595337, |
|
"eval_runtime": 38.9844, |
|
"eval_samples_per_second": 28.011, |
|
"eval_steps_per_second": 3.514, |
|
"eval_wer": 0.7597140192726143, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.00014685652874798494, |
|
"loss": 0.208, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 0.00014363245566899517, |
|
"loss": 0.2209, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.38439816448994, |
|
"eval_loss": 2.9723663330078125, |
|
"eval_runtime": 38.8984, |
|
"eval_samples_per_second": 28.073, |
|
"eval_steps_per_second": 3.522, |
|
"eval_wer": 0.7396642834939384, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 0.00014040838259000535, |
|
"loss": 0.2107, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.00013718430951101558, |
|
"loss": 0.1935, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.00013396023643202578, |
|
"loss": 0.193, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.3829568184492293, |
|
"eval_loss": 3.105555534362793, |
|
"eval_runtime": 38.7297, |
|
"eval_samples_per_second": 28.195, |
|
"eval_steps_per_second": 3.537, |
|
"eval_wer": 0.7412185265775567, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 0.000130736163353036, |
|
"loss": 0.1764, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 0.0001275120902740462, |
|
"loss": 0.1683, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 20.88, |
|
"learning_rate": 0.0001242880171950564, |
|
"loss": 0.1608, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.3850452994469938, |
|
"eval_loss": 3.2178432941436768, |
|
"eval_runtime": 38.643, |
|
"eval_samples_per_second": 28.259, |
|
"eval_steps_per_second": 3.545, |
|
"eval_wer": 0.7332918868511035, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 0.00012106394411606662, |
|
"loss": 0.1514, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 0.00011783987103707682, |
|
"loss": 0.1421, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"learning_rate": 0.00011461579795808704, |
|
"loss": 0.1454, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.3789563478056242, |
|
"eval_loss": 3.3512697219848633, |
|
"eval_runtime": 38.841, |
|
"eval_samples_per_second": 28.115, |
|
"eval_steps_per_second": 3.527, |
|
"eval_wer": 0.7322039166925707, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 0.00011139172487909725, |
|
"loss": 0.1378, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 22.71, |
|
"learning_rate": 0.00010816765180010745, |
|
"loss": 0.1269, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.38342746205435935, |
|
"eval_loss": 3.4092280864715576, |
|
"eval_runtime": 39.0183, |
|
"eval_samples_per_second": 27.987, |
|
"eval_steps_per_second": 3.511, |
|
"eval_wer": 0.7311159465340379, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 0.00010494357872111767, |
|
"loss": 0.1334, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 23.44, |
|
"learning_rate": 0.00010171950564212788, |
|
"loss": 0.1069, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 9.84954325631381e-05, |
|
"loss": 0.114, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.3863101541357807, |
|
"eval_loss": 3.3670547008514404, |
|
"eval_runtime": 38.7589, |
|
"eval_samples_per_second": 28.174, |
|
"eval_steps_per_second": 3.535, |
|
"eval_wer": 0.7300279763755051, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 24.18, |
|
"learning_rate": 9.527135948414829e-05, |
|
"loss": 0.1064, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"learning_rate": 9.204728640515852e-05, |
|
"loss": 0.1056, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 8.882321332616871e-05, |
|
"loss": 0.1086, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.38175079421108365, |
|
"eval_loss": 3.6167728900909424, |
|
"eval_runtime": 38.7877, |
|
"eval_samples_per_second": 28.153, |
|
"eval_steps_per_second": 3.532, |
|
"eval_wer": 0.7179048803232826, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"learning_rate": 8.559914024717892e-05, |
|
"loss": 0.0853, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 8.237506716818914e-05, |
|
"loss": 0.0877, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.3746617249088128, |
|
"eval_loss": 3.5180928707122803, |
|
"eval_runtime": 38.7044, |
|
"eval_samples_per_second": 28.214, |
|
"eval_steps_per_second": 3.54, |
|
"eval_wer": 0.7119987566055331, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 7.915099408919934e-05, |
|
"loss": 0.0966, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 7.592692101020956e-05, |
|
"loss": 0.0855, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 26.74, |
|
"learning_rate": 7.270284793121977e-05, |
|
"loss": 0.0856, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.39136957289092833, |
|
"eval_loss": 3.875309467315674, |
|
"eval_runtime": 38.7918, |
|
"eval_samples_per_second": 28.15, |
|
"eval_steps_per_second": 3.532, |
|
"eval_wer": 0.7429281939695368, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 27.11, |
|
"learning_rate": 6.947877485222997e-05, |
|
"loss": 0.081, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 6.625470177324019e-05, |
|
"loss": 0.0708, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 6.30306286942504e-05, |
|
"loss": 0.0734, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.37516178373926345, |
|
"eval_loss": 3.8759613037109375, |
|
"eval_runtime": 38.881, |
|
"eval_samples_per_second": 28.086, |
|
"eval_steps_per_second": 3.524, |
|
"eval_wer": 0.7227230338824993, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 5.980655561526061e-05, |
|
"loss": 0.0734, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 5.658248253627082e-05, |
|
"loss": 0.0642, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 28.94, |
|
"learning_rate": 5.335840945728102e-05, |
|
"loss": 0.0702, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.37551476644311094, |
|
"eval_loss": 3.9142181873321533, |
|
"eval_runtime": 38.8332, |
|
"eval_samples_per_second": 28.12, |
|
"eval_steps_per_second": 3.528, |
|
"eval_wer": 0.723811004041032, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"learning_rate": 5.0134336378291234e-05, |
|
"loss": 0.0617, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 4.6910263299301446e-05, |
|
"loss": 0.0661, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.3737498529238734, |
|
"eval_loss": 4.010254859924316, |
|
"eval_runtime": 38.9811, |
|
"eval_samples_per_second": 28.014, |
|
"eval_steps_per_second": 3.515, |
|
"eval_wer": 0.7196145477152627, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 30.04, |
|
"learning_rate": 4.368619022031166e-05, |
|
"loss": 0.0559, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 4.046211714132187e-05, |
|
"loss": 0.0575, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 3.7238044062332076e-05, |
|
"loss": 0.0569, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.3768090363572185, |
|
"eval_loss": 4.117391109466553, |
|
"eval_runtime": 39.1729, |
|
"eval_samples_per_second": 27.876, |
|
"eval_steps_per_second": 3.497, |
|
"eval_wer": 0.729250854833696, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 3.401397098334229e-05, |
|
"loss": 0.0559, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 31.5, |
|
"learning_rate": 3.078989790435249e-05, |
|
"loss": 0.0501, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 31.87, |
|
"learning_rate": 2.756582482536271e-05, |
|
"loss": 0.0494, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.37404400517707964, |
|
"eval_loss": 4.161037921905518, |
|
"eval_runtime": 38.9916, |
|
"eval_samples_per_second": 28.006, |
|
"eval_steps_per_second": 3.514, |
|
"eval_wer": 0.7207025178737955, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 32.23, |
|
"learning_rate": 2.4341751746372914e-05, |
|
"loss": 0.048, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 32.6, |
|
"learning_rate": 2.1117678667383127e-05, |
|
"loss": 0.0509, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 32.97, |
|
"learning_rate": 1.7893605588393335e-05, |
|
"loss": 0.0493, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.37219084598188024, |
|
"eval_loss": 4.1340861320495605, |
|
"eval_runtime": 38.9947, |
|
"eval_samples_per_second": 28.004, |
|
"eval_steps_per_second": 3.513, |
|
"eval_wer": 0.7165060615480261, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.4669532509403546e-05, |
|
"loss": 0.0415, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 33.7, |
|
"learning_rate": 1.1445459430413755e-05, |
|
"loss": 0.0395, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.372132015531239, |
|
"eval_loss": 4.2251057624816895, |
|
"eval_runtime": 38.8607, |
|
"eval_samples_per_second": 28.1, |
|
"eval_steps_per_second": 3.525, |
|
"eval_wer": 0.7135529996891514, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 34.07, |
|
"learning_rate": 8.221386351423965e-06, |
|
"loss": 0.0472, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"learning_rate": 4.997313272434174e-06, |
|
"loss": 0.037, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 1.7732401934443846e-06, |
|
"loss": 0.0377, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.3725438286857277, |
|
"eval_loss": 4.262682914733887, |
|
"eval_runtime": 38.6349, |
|
"eval_samples_per_second": 28.265, |
|
"eval_steps_per_second": 3.546, |
|
"eval_wer": 0.715728940006217, |
|
"step": 19110 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 19110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 500, |
|
"total_flos": 2.6904981744760697e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|