|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.662514156285391, |
|
"eval_steps": 100, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.056625141562853906, |
|
"eval_loss": 4.158196449279785, |
|
"eval_runtime": 151.6577, |
|
"eval_samples_per_second": 37.295, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11325028312570781, |
|
"eval_loss": 3.127609968185425, |
|
"eval_runtime": 148.7785, |
|
"eval_samples_per_second": 38.016, |
|
"eval_steps_per_second": 4.752, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16987542468856173, |
|
"eval_loss": 3.407195568084717, |
|
"eval_runtime": 152.9346, |
|
"eval_samples_per_second": 36.983, |
|
"eval_steps_per_second": 4.623, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22650056625141562, |
|
"eval_loss": 2.1966774463653564, |
|
"eval_runtime": 150.1881, |
|
"eval_samples_per_second": 37.659, |
|
"eval_steps_per_second": 4.707, |
|
"eval_wer": 0.9898733770923271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28312570781426954, |
|
"grad_norm": 1.805786371231079, |
|
"learning_rate": 0.0001491, |
|
"loss": 4.6703, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28312570781426954, |
|
"eval_loss": 1.0575733184814453, |
|
"eval_runtime": 149.8633, |
|
"eval_samples_per_second": 37.741, |
|
"eval_steps_per_second": 4.718, |
|
"eval_wer": 0.7174335189613391, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33975084937712347, |
|
"eval_loss": 0.8877128958702087, |
|
"eval_runtime": 152.3639, |
|
"eval_samples_per_second": 37.122, |
|
"eval_steps_per_second": 4.64, |
|
"eval_wer": 0.640464765450723, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39637599093997733, |
|
"eval_loss": 0.7894217371940613, |
|
"eval_runtime": 156.1616, |
|
"eval_samples_per_second": 36.219, |
|
"eval_steps_per_second": 4.527, |
|
"eval_wer": 0.5861244403074899, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"eval_loss": 0.7698957324028015, |
|
"eval_runtime": 153.3515, |
|
"eval_samples_per_second": 36.883, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.5876650992601627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5096262740656852, |
|
"eval_loss": 0.7604610919952393, |
|
"eval_runtime": 153.0775, |
|
"eval_samples_per_second": 36.949, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.5402577394039576, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5662514156285391, |
|
"grad_norm": 1.723030686378479, |
|
"learning_rate": 0.0002988, |
|
"loss": 0.5242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5662514156285391, |
|
"eval_loss": 0.742854654788971, |
|
"eval_runtime": 152.8719, |
|
"eval_samples_per_second": 36.998, |
|
"eval_steps_per_second": 4.625, |
|
"eval_wer": 0.5494856445892379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.622876557191393, |
|
"eval_loss": 0.6761639714241028, |
|
"eval_runtime": 152.3166, |
|
"eval_samples_per_second": 37.133, |
|
"eval_steps_per_second": 4.642, |
|
"eval_wer": 0.5081125322976682, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6795016987542469, |
|
"eval_loss": 0.67025226354599, |
|
"eval_runtime": 153.9119, |
|
"eval_samples_per_second": 36.748, |
|
"eval_steps_per_second": 4.594, |
|
"eval_wer": 0.5071977660445186, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7361268403171007, |
|
"eval_loss": 0.6186646223068237, |
|
"eval_runtime": 155.1162, |
|
"eval_samples_per_second": 36.463, |
|
"eval_steps_per_second": 4.558, |
|
"eval_wer": 0.4623421225786779, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7927519818799547, |
|
"eval_loss": 0.6205167174339294, |
|
"eval_runtime": 155.3219, |
|
"eval_samples_per_second": 36.415, |
|
"eval_steps_per_second": 4.552, |
|
"eval_wer": 0.4741698897465937, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8493771234428086, |
|
"grad_norm": 1.6596413850784302, |
|
"learning_rate": 0.00028349999999999995, |
|
"loss": 0.4093, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8493771234428086, |
|
"eval_loss": 0.6089454889297485, |
|
"eval_runtime": 156.2384, |
|
"eval_samples_per_second": 36.201, |
|
"eval_steps_per_second": 4.525, |
|
"eval_wer": 0.46065702684919196, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"eval_loss": 0.6079407334327698, |
|
"eval_runtime": 151.5221, |
|
"eval_samples_per_second": 37.328, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 0.4563720691370705, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9626274065685164, |
|
"eval_loss": 0.5751839280128479, |
|
"eval_runtime": 157.3729, |
|
"eval_samples_per_second": 35.94, |
|
"eval_steps_per_second": 4.493, |
|
"eval_wer": 0.4487169199659771, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0192525481313703, |
|
"eval_loss": 0.5519313812255859, |
|
"eval_runtime": 157.5034, |
|
"eval_samples_per_second": 35.91, |
|
"eval_steps_per_second": 4.489, |
|
"eval_wer": 0.41742228498980916, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0758776896942241, |
|
"eval_loss": 0.5467554926872253, |
|
"eval_runtime": 156.3131, |
|
"eval_samples_per_second": 36.184, |
|
"eval_steps_per_second": 4.523, |
|
"eval_wer": 0.41071399913337936, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1325028312570782, |
|
"grad_norm": 0.5150347352027893, |
|
"learning_rate": 0.0002669, |
|
"loss": 0.3366, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1325028312570782, |
|
"eval_loss": 0.5371935963630676, |
|
"eval_runtime": 153.7972, |
|
"eval_samples_per_second": 36.776, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.4080338944969588, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.189127972819932, |
|
"eval_loss": 0.53590327501297, |
|
"eval_runtime": 153.8993, |
|
"eval_samples_per_second": 36.751, |
|
"eval_steps_per_second": 4.594, |
|
"eval_wer": 0.40715122530532327, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.245753114382786, |
|
"eval_loss": 0.530412495136261, |
|
"eval_runtime": 152.2809, |
|
"eval_samples_per_second": 37.142, |
|
"eval_steps_per_second": 4.643, |
|
"eval_wer": 0.4022885204859495, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.3023782559456398, |
|
"eval_loss": 0.5311455130577087, |
|
"eval_runtime": 152.7946, |
|
"eval_samples_per_second": 37.017, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.40111697774068783, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3590033975084936, |
|
"eval_loss": 0.5186213254928589, |
|
"eval_runtime": 152.2204, |
|
"eval_samples_per_second": 37.157, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.38644862062878144, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4156285390713477, |
|
"grad_norm": 0.7118180394172668, |
|
"learning_rate": 0.00025026666666666666, |
|
"loss": 0.2939, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4156285390713477, |
|
"eval_loss": 0.5234143733978271, |
|
"eval_runtime": 153.4973, |
|
"eval_samples_per_second": 36.848, |
|
"eval_steps_per_second": 4.606, |
|
"eval_wer": 0.3934297315080804, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4722536806342017, |
|
"eval_loss": 0.5212889909744263, |
|
"eval_runtime": 153.2132, |
|
"eval_samples_per_second": 36.916, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.39726533035900563, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5288788221970555, |
|
"eval_loss": 0.5155624151229858, |
|
"eval_runtime": 153.779, |
|
"eval_samples_per_second": 36.78, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.3876522604355571, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5855039637599093, |
|
"eval_loss": 0.5052253007888794, |
|
"eval_runtime": 153.3545, |
|
"eval_samples_per_second": 36.882, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.3897546179647253, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6421291053227633, |
|
"eval_loss": 0.49809539318084717, |
|
"eval_runtime": 154.1154, |
|
"eval_samples_per_second": 36.7, |
|
"eval_steps_per_second": 4.587, |
|
"eval_wer": 0.38331915713116466, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.6987542468856174, |
|
"grad_norm": 0.7669665217399597, |
|
"learning_rate": 0.00023359999999999996, |
|
"loss": 0.2838, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6987542468856174, |
|
"eval_loss": 0.49897971749305725, |
|
"eval_runtime": 152.4639, |
|
"eval_samples_per_second": 37.097, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.380414373064146, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7553793884484712, |
|
"eval_loss": 0.49996882677078247, |
|
"eval_runtime": 151.3476, |
|
"eval_samples_per_second": 37.371, |
|
"eval_steps_per_second": 4.671, |
|
"eval_wer": 0.38070324661777216, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.812004530011325, |
|
"eval_loss": 0.49606603384017944, |
|
"eval_runtime": 151.8767, |
|
"eval_samples_per_second": 37.241, |
|
"eval_steps_per_second": 4.655, |
|
"eval_wer": 0.37513440644508994, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8686296715741788, |
|
"eval_loss": 0.48593518137931824, |
|
"eval_runtime": 150.9613, |
|
"eval_samples_per_second": 37.467, |
|
"eval_steps_per_second": 4.683, |
|
"eval_wer": 0.3730962430389498, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9252548131370328, |
|
"eval_loss": 0.481240451335907, |
|
"eval_runtime": 151.7124, |
|
"eval_samples_per_second": 37.281, |
|
"eval_steps_per_second": 4.66, |
|
"eval_wer": 0.3657139188907256, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9818799546998869, |
|
"grad_norm": 1.395547866821289, |
|
"learning_rate": 0.00021696666666666664, |
|
"loss": 0.2694, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.9818799546998869, |
|
"eval_loss": 0.47789159417152405, |
|
"eval_runtime": 151.9051, |
|
"eval_samples_per_second": 37.234, |
|
"eval_steps_per_second": 4.654, |
|
"eval_wer": 0.36197461122434244, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0385050962627407, |
|
"eval_loss": 0.4943128526210785, |
|
"eval_runtime": 152.1624, |
|
"eval_samples_per_second": 37.171, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.36325849368490315, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.0951302378255945, |
|
"eval_loss": 0.48801928758621216, |
|
"eval_runtime": 151.6345, |
|
"eval_samples_per_second": 37.3, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.3677360337661087, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.1517553793884483, |
|
"eval_loss": 0.49899762868881226, |
|
"eval_runtime": 151.9237, |
|
"eval_samples_per_second": 37.229, |
|
"eval_steps_per_second": 4.654, |
|
"eval_wer": 0.3661632777519218, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.2083805209513026, |
|
"eval_loss": 0.5101335644721985, |
|
"eval_runtime": 151.3463, |
|
"eval_samples_per_second": 37.371, |
|
"eval_steps_per_second": 4.671, |
|
"eval_wer": 0.36991863394906194, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.2650056625141564, |
|
"grad_norm": 1.0641744136810303, |
|
"learning_rate": 0.00020036666666666664, |
|
"loss": 0.2419, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.2650056625141564, |
|
"eval_loss": 0.5392731428146362, |
|
"eval_runtime": 151.0047, |
|
"eval_samples_per_second": 37.456, |
|
"eval_steps_per_second": 4.682, |
|
"eval_wer": 0.3901718797644076, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.32163080407701, |
|
"eval_loss": 0.6454418301582336, |
|
"eval_runtime": 151.5484, |
|
"eval_samples_per_second": 37.321, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 0.4513328304793696, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.378255945639864, |
|
"eval_loss": 0.989225447177887, |
|
"eval_runtime": 151.0915, |
|
"eval_samples_per_second": 37.434, |
|
"eval_steps_per_second": 4.679, |
|
"eval_wer": 0.5936672497632842, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.434881087202718, |
|
"eval_loss": 0.7711612582206726, |
|
"eval_runtime": 151.4711, |
|
"eval_samples_per_second": 37.34, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.5166984962526681, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.491506228765572, |
|
"eval_loss": 0.6337701678276062, |
|
"eval_runtime": 152.602, |
|
"eval_samples_per_second": 37.064, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.47877581807385533, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.548131370328426, |
|
"grad_norm": 1.0334250926971436, |
|
"learning_rate": 0.00018373333333333332, |
|
"loss": 0.46, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.548131370328426, |
|
"eval_loss": 0.5562150478363037, |
|
"eval_runtime": 152.8682, |
|
"eval_samples_per_second": 36.999, |
|
"eval_steps_per_second": 4.625, |
|
"eval_wer": 0.4155606554219961, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.6047565118912797, |
|
"eval_loss": 0.5376870036125183, |
|
"eval_runtime": 151.9955, |
|
"eval_samples_per_second": 37.212, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.3905730930333328, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.661381653454134, |
|
"eval_loss": 0.5686676502227783, |
|
"eval_runtime": 151.879, |
|
"eval_samples_per_second": 37.24, |
|
"eval_steps_per_second": 4.655, |
|
"eval_wer": 0.4008120556563047, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.7180067950169873, |
|
"eval_loss": 0.6321017742156982, |
|
"eval_runtime": 152.7183, |
|
"eval_samples_per_second": 37.036, |
|
"eval_steps_per_second": 4.629, |
|
"eval_wer": 0.42905746978864084, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.7746319365798415, |
|
"eval_loss": 0.5834416151046753, |
|
"eval_runtime": 152.1246, |
|
"eval_samples_per_second": 37.18, |
|
"eval_steps_per_second": 4.648, |
|
"eval_wer": 0.4202789234645568, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.8312570781426953, |
|
"grad_norm": 0.5010664463043213, |
|
"learning_rate": 0.00016706666666666664, |
|
"loss": 0.299, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8312570781426953, |
|
"eval_loss": 0.5302273631095886, |
|
"eval_runtime": 152.1036, |
|
"eval_samples_per_second": 37.185, |
|
"eval_steps_per_second": 4.648, |
|
"eval_wer": 0.3929643241161272, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.887882219705549, |
|
"eval_loss": 0.5315878987312317, |
|
"eval_runtime": 151.9165, |
|
"eval_samples_per_second": 37.231, |
|
"eval_steps_per_second": 4.654, |
|
"eval_wer": 0.3860153102983422, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.9445073612684034, |
|
"eval_loss": 0.5343597531318665, |
|
"eval_runtime": 163.0683, |
|
"eval_samples_per_second": 34.685, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.38002920832597775, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.001132502831257, |
|
"eval_loss": 0.534857451915741, |
|
"eval_runtime": 153.2271, |
|
"eval_samples_per_second": 36.913, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.38415368073052913, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.057757644394111, |
|
"eval_loss": 0.5775672793388367, |
|
"eval_runtime": 151.3492, |
|
"eval_samples_per_second": 37.371, |
|
"eval_steps_per_second": 4.671, |
|
"eval_wer": 0.4182728571199307, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.114382785956965, |
|
"grad_norm": 1.614545226097107, |
|
"learning_rate": 0.00015039999999999997, |
|
"loss": 0.2839, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.114382785956965, |
|
"eval_loss": 0.5882839560508728, |
|
"eval_runtime": 150.3863, |
|
"eval_samples_per_second": 37.61, |
|
"eval_steps_per_second": 4.701, |
|
"eval_wer": 0.41002391231082796, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.1710079275198186, |
|
"eval_loss": 0.5722731947898865, |
|
"eval_runtime": 154.6093, |
|
"eval_samples_per_second": 36.583, |
|
"eval_steps_per_second": 4.573, |
|
"eval_wer": 0.4043587809536037, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.227633069082673, |
|
"eval_loss": 0.5630057454109192, |
|
"eval_runtime": 153.3174, |
|
"eval_samples_per_second": 36.891, |
|
"eval_steps_per_second": 4.611, |
|
"eval_wer": 0.40779316653560366, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.2842582106455267, |
|
"eval_loss": 0.5810334086418152, |
|
"eval_runtime": 153.0372, |
|
"eval_samples_per_second": 36.958, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.4191394777808092, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.3408833522083805, |
|
"eval_loss": 0.5995615720748901, |
|
"eval_runtime": 152.3384, |
|
"eval_samples_per_second": 37.128, |
|
"eval_steps_per_second": 4.641, |
|
"eval_wer": 0.4228306398549213, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.3975084937712343, |
|
"grad_norm": 9.246959686279297, |
|
"learning_rate": 0.00013373333333333332, |
|
"loss": 0.3019, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.3975084937712343, |
|
"eval_loss": 0.5681526064872742, |
|
"eval_runtime": 153.1128, |
|
"eval_samples_per_second": 36.94, |
|
"eval_steps_per_second": 4.618, |
|
"eval_wer": 0.4015502880711271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.454133635334088, |
|
"eval_loss": 0.5560505390167236, |
|
"eval_runtime": 152.0848, |
|
"eval_samples_per_second": 37.19, |
|
"eval_steps_per_second": 4.649, |
|
"eval_wer": 0.40569080900643545, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.5107587768969424, |
|
"eval_loss": 0.5905264616012573, |
|
"eval_runtime": 152.0038, |
|
"eval_samples_per_second": 37.21, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.41458169504581854, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.567383918459796, |
|
"eval_loss": 0.5875168442726135, |
|
"eval_runtime": 151.4059, |
|
"eval_samples_per_second": 37.357, |
|
"eval_steps_per_second": 4.67, |
|
"eval_wer": 0.4190271380655101, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.62400906002265, |
|
"eval_loss": 0.5877885818481445, |
|
"eval_runtime": 150.4489, |
|
"eval_samples_per_second": 37.594, |
|
"eval_steps_per_second": 4.699, |
|
"eval_wer": 0.44462454462293977, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.680634201585504, |
|
"grad_norm": 1.084346890449524, |
|
"learning_rate": 0.00011709999999999999, |
|
"loss": 0.2944, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.680634201585504, |
|
"eval_loss": 0.5938708782196045, |
|
"eval_runtime": 150.586, |
|
"eval_samples_per_second": 37.56, |
|
"eval_steps_per_second": 4.695, |
|
"eval_wer": 0.4403877325030893, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.7372593431483576, |
|
"eval_loss": 0.590270459651947, |
|
"eval_runtime": 150.087, |
|
"eval_samples_per_second": 37.685, |
|
"eval_steps_per_second": 4.711, |
|
"eval_wer": 0.4183049541814447, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.793884484711212, |
|
"eval_loss": 0.5807533264160156, |
|
"eval_runtime": 149.6534, |
|
"eval_samples_per_second": 37.794, |
|
"eval_steps_per_second": 4.724, |
|
"eval_wer": 0.4059475854985476, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.8505096262740657, |
|
"eval_loss": 0.6154611706733704, |
|
"eval_runtime": 150.8194, |
|
"eval_samples_per_second": 37.502, |
|
"eval_steps_per_second": 4.688, |
|
"eval_wer": 0.410056009372342, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.9071347678369195, |
|
"eval_loss": 0.7987228631973267, |
|
"eval_runtime": 151.5296, |
|
"eval_samples_per_second": 37.326, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 0.5822727929258077, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.9637599093997737, |
|
"grad_norm": 9.350592613220215, |
|
"learning_rate": 0.0001005, |
|
"loss": 0.3918, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.9637599093997737, |
|
"eval_loss": 0.9750258326530457, |
|
"eval_runtime": 151.1967, |
|
"eval_samples_per_second": 37.408, |
|
"eval_steps_per_second": 4.676, |
|
"eval_wer": 0.5545409317776957, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.020385050962627, |
|
"eval_loss": 1.0540127754211426, |
|
"eval_runtime": 150.2681, |
|
"eval_samples_per_second": 37.639, |
|
"eval_steps_per_second": 4.705, |
|
"eval_wer": 0.5689043668052189, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.077010192525481, |
|
"eval_loss": 0.6850923299789429, |
|
"eval_runtime": 150.3993, |
|
"eval_samples_per_second": 37.607, |
|
"eval_steps_per_second": 4.701, |
|
"eval_wer": 0.4396013544959959, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.133635334088336, |
|
"eval_loss": 0.7331786155700684, |
|
"eval_runtime": 151.5017, |
|
"eval_samples_per_second": 37.333, |
|
"eval_steps_per_second": 4.667, |
|
"eval_wer": 0.49731187109820096, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.190260475651189, |
|
"eval_loss": 0.9466453194618225, |
|
"eval_runtime": 150.2633, |
|
"eval_samples_per_second": 37.641, |
|
"eval_steps_per_second": 4.705, |
|
"eval_wer": 0.6394697565437885, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.246885617214043, |
|
"grad_norm": 4.335416793823242, |
|
"learning_rate": 8.386666666666665e-05, |
|
"loss": 0.5378, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.246885617214043, |
|
"eval_loss": 0.8257068991661072, |
|
"eval_runtime": 151.0198, |
|
"eval_samples_per_second": 37.452, |
|
"eval_steps_per_second": 4.682, |
|
"eval_wer": 0.485147084784388, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.303510758776897, |
|
"eval_loss": 0.8490071296691895, |
|
"eval_runtime": 150.818, |
|
"eval_samples_per_second": 37.502, |
|
"eval_steps_per_second": 4.688, |
|
"eval_wer": 0.4867037922678179, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.360135900339751, |
|
"eval_loss": 0.8716742396354675, |
|
"eval_runtime": 150.797, |
|
"eval_samples_per_second": 37.507, |
|
"eval_steps_per_second": 4.688, |
|
"eval_wer": 0.47112066890276194, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.416761041902605, |
|
"eval_loss": 0.883883535861969, |
|
"eval_runtime": 150.6157, |
|
"eval_samples_per_second": 37.553, |
|
"eval_steps_per_second": 4.694, |
|
"eval_wer": 0.5860281491229478, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.4733861834654585, |
|
"eval_loss": 2.911261558532715, |
|
"eval_runtime": 150.8548, |
|
"eval_samples_per_second": 37.493, |
|
"eval_steps_per_second": 4.687, |
|
"eval_wer": 1.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.530011325028313, |
|
"grad_norm": 0.2805318534374237, |
|
"learning_rate": 6.723333333333333e-05, |
|
"loss": 1.3847, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.530011325028313, |
|
"eval_loss": 2.8575596809387207, |
|
"eval_runtime": 151.2637, |
|
"eval_samples_per_second": 37.392, |
|
"eval_steps_per_second": 4.674, |
|
"eval_wer": 1.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.586636466591166, |
|
"eval_loss": 2.83913516998291, |
|
"eval_runtime": 151.4914, |
|
"eval_samples_per_second": 37.335, |
|
"eval_steps_per_second": 4.667, |
|
"eval_wer": 1.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.64326160815402, |
|
"eval_loss": 2.840644121170044, |
|
"eval_runtime": 150.8361, |
|
"eval_samples_per_second": 37.498, |
|
"eval_steps_per_second": 4.687, |
|
"eval_wer": 1.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.699886749716875, |
|
"eval_loss": 2.856635093688965, |
|
"eval_runtime": 150.3023, |
|
"eval_samples_per_second": 37.631, |
|
"eval_steps_per_second": 4.704, |
|
"eval_wer": 1.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.756511891279728, |
|
"eval_loss": 2.8454244136810303, |
|
"eval_runtime": 150.6924, |
|
"eval_samples_per_second": 37.533, |
|
"eval_steps_per_second": 4.692, |
|
"eval_wer": 0.999823466161673, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.813137032842582, |
|
"grad_norm": 0.4468824863433838, |
|
"learning_rate": 5.06e-05, |
|
"loss": 2.8136, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.813137032842582, |
|
"eval_loss": 2.8339831829071045, |
|
"eval_runtime": 151.2056, |
|
"eval_samples_per_second": 37.406, |
|
"eval_steps_per_second": 4.676, |
|
"eval_wer": 0.999919757346215, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.869762174405436, |
|
"eval_loss": 2.836662530899048, |
|
"eval_runtime": 152.389, |
|
"eval_samples_per_second": 37.116, |
|
"eval_steps_per_second": 4.639, |
|
"eval_wer": 0.999935805876972, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.92638731596829, |
|
"eval_loss": 2.833369016647339, |
|
"eval_runtime": 151.3614, |
|
"eval_samples_per_second": 37.368, |
|
"eval_steps_per_second": 4.671, |
|
"eval_wer": 0.9998876602847009, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.983012457531144, |
|
"eval_loss": 2.8321285247802734, |
|
"eval_runtime": 157.5502, |
|
"eval_samples_per_second": 35.9, |
|
"eval_steps_per_second": 4.487, |
|
"eval_wer": 0.999935805876972, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.0396375990939974, |
|
"eval_loss": 2.802509307861328, |
|
"eval_runtime": 151.5865, |
|
"eval_samples_per_second": 37.312, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.9982186130859719, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.096262740656852, |
|
"grad_norm": 0.3411979377269745, |
|
"learning_rate": 3.393333333333333e-05, |
|
"loss": 2.8007, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.096262740656852, |
|
"eval_loss": 2.8024423122406006, |
|
"eval_runtime": 151.6682, |
|
"eval_samples_per_second": 37.292, |
|
"eval_steps_per_second": 4.661, |
|
"eval_wer": 0.9974964292019066, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.152887882219706, |
|
"eval_loss": 2.8043477535247803, |
|
"eval_runtime": 153.1811, |
|
"eval_samples_per_second": 36.924, |
|
"eval_steps_per_second": 4.615, |
|
"eval_wer": 0.9981062733706729, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.209513023782559, |
|
"eval_loss": 2.810584783554077, |
|
"eval_runtime": 151.6694, |
|
"eval_samples_per_second": 37.292, |
|
"eval_steps_per_second": 4.661, |
|
"eval_wer": 0.9992457190544206, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.266138165345414, |
|
"eval_loss": 2.8066723346710205, |
|
"eval_runtime": 151.6427, |
|
"eval_samples_per_second": 37.298, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 0.9993259617082056, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.322763306908267, |
|
"eval_loss": 2.805284023284912, |
|
"eval_runtime": 151.8566, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.9985877292933832, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.379388448471121, |
|
"grad_norm": 0.15991327166557312, |
|
"learning_rate": 1.7299999999999997e-05, |
|
"loss": 2.7935, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.379388448471121, |
|
"eval_loss": 2.807739734649658, |
|
"eval_runtime": 152.8905, |
|
"eval_samples_per_second": 36.994, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.9978173998170468, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.436013590033975, |
|
"eval_loss": 2.8082854747772217, |
|
"eval_runtime": 151.7494, |
|
"eval_samples_per_second": 37.272, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.9987161175394392, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.492638731596829, |
|
"eval_loss": 2.8080484867095947, |
|
"eval_runtime": 151.6292, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.9988926513777664, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.549263873159683, |
|
"eval_loss": 2.808607578277588, |
|
"eval_runtime": 153.2376, |
|
"eval_samples_per_second": 36.91, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.9986037778241402, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.605889014722536, |
|
"eval_loss": 2.8079216480255127, |
|
"eval_runtime": 151.6181, |
|
"eval_samples_per_second": 37.304, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.998186516024458, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.662514156285391, |
|
"grad_norm": 0.3985973000526428, |
|
"learning_rate": 7e-07, |
|
"loss": 2.7861, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.662514156285391, |
|
"eval_loss": 2.807446241378784, |
|
"eval_runtime": 151.2977, |
|
"eval_samples_per_second": 37.383, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.998266758678243, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.662514156285391, |
|
"step": 10000, |
|
"total_flos": 9.138710330328565e+19, |
|
"train_loss": 1.1088516311645509, |
|
"train_runtime": 39423.4073, |
|
"train_samples_per_second": 16.234, |
|
"train_steps_per_second": 0.254 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 400, |
|
"total_flos": 9.138710330328565e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|