{ "best_metric": 10.414333706606943, "best_model_checkpoint": "/cluster/home/torstefl/Master/saved_model/Whisper/BB-02.06/checkpoint-7930", "epoch": 6.4946764946764945, "eval_steps": 305, "global_step": 7930, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 22.543981552124023, "learning_rate": 1.0000000000000002e-06, "loss": 1.3299, "step": 100 }, { "epoch": 0.16, "grad_norm": 4.610039234161377, "learning_rate": 2.0000000000000003e-06, "loss": 0.5782, "step": 200 }, { "epoch": 0.25, "grad_norm": 3.5814695358276367, "learning_rate": 3e-06, "loss": 0.3116, "step": 300 }, { "epoch": 0.25, "eval_rundkast_loss": 0.26998892426490784, "eval_rundkast_runtime": 272.1953, "eval_rundkast_samples_per_second": 4.919, "eval_rundkast_steps_per_second": 0.103, "eval_rundkast_wer": 13.821788513837785, "step": 305 }, { "epoch": 0.25, "eval_nb_samtale_loss": 0.44716086983680725, "eval_nb_samtale_runtime": 133.4016, "eval_nb_samtale_samples_per_second": 3.995, "eval_nb_samtale_steps_per_second": 0.09, "eval_nb_samtale_wer": 15.91796875, "step": 305 }, { "epoch": 0.25, "eval_bigbrother_loss": 0.7930576801300049, "eval_bigbrother_runtime": 277.9737, "eval_bigbrother_samples_per_second": 4.929, "eval_bigbrother_steps_per_second": 0.104, "eval_bigbrother_wer": 37.10625956658342, "step": 305 }, { "epoch": 0.33, "grad_norm": 3.668013572692871, "learning_rate": 4.000000000000001e-06, "loss": 0.2436, "step": 400 }, { "epoch": 0.41, "grad_norm": 2.932541847229004, "learning_rate": 5e-06, "loss": 0.2342, "step": 500 }, { "epoch": 0.49, "grad_norm": 4.445041656494141, "learning_rate": 6e-06, "loss": 0.2278, "step": 600 }, { "epoch": 0.5, "eval_rundkast_loss": 0.21992045640945435, "eval_rundkast_runtime": 267.1037, "eval_rundkast_samples_per_second": 5.013, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 12.150055991041434, "step": 610 }, { "epoch": 0.5, "eval_nb_samtale_loss": 0.4093947410583496, "eval_nb_samtale_runtime": 130.2042, "eval_nb_samtale_samples_per_second": 4.094, "eval_nb_samtale_steps_per_second": 0.092, "eval_nb_samtale_wer": 15.592447916666666, "step": 610 }, { "epoch": 0.5, "eval_bigbrother_loss": 0.727997362613678, "eval_bigbrother_runtime": 277.0718, "eval_bigbrother_samples_per_second": 4.945, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 36.22009183920083, "step": 610 }, { "epoch": 0.57, "grad_norm": 3.1760706901550293, "learning_rate": 7e-06, "loss": 0.2155, "step": 700 }, { "epoch": 0.66, "grad_norm": 3.6593875885009766, "learning_rate": 8.000000000000001e-06, "loss": 0.206, "step": 800 }, { "epoch": 0.74, "grad_norm": 2.753551721572876, "learning_rate": 9e-06, "loss": 0.1962, "step": 900 }, { "epoch": 0.75, "eval_rundkast_loss": 0.19598311185836792, "eval_rundkast_runtime": 268.1054, "eval_rundkast_samples_per_second": 4.994, "eval_rundkast_steps_per_second": 0.104, "eval_rundkast_wer": 11.590145576707727, "step": 915 }, { "epoch": 0.75, "eval_nb_samtale_loss": 0.39330345392227173, "eval_nb_samtale_runtime": 131.8338, "eval_nb_samtale_samples_per_second": 4.043, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 15.462239583333334, "step": 915 }, { "epoch": 0.75, "eval_bigbrother_loss": 0.6920289397239685, "eval_bigbrother_runtime": 278.6111, "eval_bigbrother_samples_per_second": 4.917, "eval_bigbrother_steps_per_second": 0.104, "eval_bigbrother_wer": 35.52726979779264, "step": 915 }, { "epoch": 0.82, "grad_norm": 3.255183219909668, "learning_rate": 1e-05, "loss": 0.1939, "step": 1000 }, { "epoch": 0.9, "grad_norm": 3.198969841003418, "learning_rate": 9.942246606988162e-06, "loss": 0.1827, "step": 1100 }, { "epoch": 0.98, "grad_norm": 2.9116554260253906, "learning_rate": 9.884493213976322e-06, "loss": 0.1865, "step": 1200 }, { "epoch": 1.0, "eval_rundkast_loss": 0.18417133390903473, "eval_rundkast_runtime": 268.3998, "eval_rundkast_samples_per_second": 4.989, "eval_rundkast_steps_per_second": 0.104, "eval_rundkast_wer": 11.558150695888658, "step": 1220 }, { "epoch": 1.0, "eval_nb_samtale_loss": 0.386161744594574, "eval_nb_samtale_runtime": 131.0802, "eval_nb_samtale_samples_per_second": 4.066, "eval_nb_samtale_steps_per_second": 0.092, "eval_nb_samtale_wer": 15.836588541666666, "step": 1220 }, { "epoch": 1.0, "eval_bigbrother_loss": 0.6852795481681824, "eval_bigbrother_runtime": 276.4457, "eval_bigbrother_samples_per_second": 4.956, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 35.865624748247804, "step": 1220 }, { "epoch": 1.06, "grad_norm": 2.3978002071380615, "learning_rate": 9.826739820964482e-06, "loss": 0.1426, "step": 1300 }, { "epoch": 1.15, "grad_norm": 3.1108155250549316, "learning_rate": 9.768986427952642e-06, "loss": 0.127, "step": 1400 }, { "epoch": 1.23, "grad_norm": 3.221513509750366, "learning_rate": 9.711233034940803e-06, "loss": 0.134, "step": 1500 }, { "epoch": 1.25, "eval_rundkast_loss": 0.18123188614845276, "eval_rundkast_runtime": 267.2108, "eval_rundkast_samples_per_second": 5.011, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 11.198208286674133, "step": 1525 }, { "epoch": 1.25, "eval_nb_samtale_loss": 0.3853602409362793, "eval_nb_samtale_runtime": 132.1454, "eval_nb_samtale_samples_per_second": 4.033, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 15.885416666666666, "step": 1525 }, { "epoch": 1.25, "eval_bigbrother_loss": 0.710231363773346, "eval_bigbrother_runtime": 275.285, "eval_bigbrother_samples_per_second": 4.977, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 35.81728832675421, "step": 1525 }, { "epoch": 1.31, "grad_norm": 2.821453332901001, "learning_rate": 9.653479641928965e-06, "loss": 0.1277, "step": 1600 }, { "epoch": 1.39, "grad_norm": 2.519831895828247, "learning_rate": 9.595726248917125e-06, "loss": 0.1268, "step": 1700 }, { "epoch": 1.47, "grad_norm": 2.9331092834472656, "learning_rate": 9.537972855905285e-06, "loss": 0.1349, "step": 1800 }, { "epoch": 1.5, "eval_rundkast_loss": 0.172105610370636, "eval_rundkast_runtime": 265.508, "eval_rundkast_samples_per_second": 5.043, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.686290193569029, "step": 1830 }, { "epoch": 1.5, "eval_nb_samtale_loss": 0.38620564341545105, "eval_nb_samtale_runtime": 132.9173, "eval_nb_samtale_samples_per_second": 4.01, "eval_nb_samtale_steps_per_second": 0.09, "eval_nb_samtale_wer": 16.2109375, "step": 1830 }, { "epoch": 1.5, "eval_bigbrother_loss": 0.7046501040458679, "eval_bigbrother_runtime": 277.4284, "eval_bigbrother_samples_per_second": 4.938, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 35.72061548376702, "step": 1830 }, { "epoch": 1.56, "grad_norm": 2.715769052505493, "learning_rate": 9.480219462893446e-06, "loss": 0.1264, "step": 1900 }, { "epoch": 1.64, "grad_norm": 3.3156239986419678, "learning_rate": 9.422466069881606e-06, "loss": 0.1278, "step": 2000 }, { "epoch": 1.72, "grad_norm": 2.747093915939331, "learning_rate": 9.364712676869767e-06, "loss": 0.1275, "step": 2100 }, { "epoch": 1.75, "eval_rundkast_loss": 0.16917824745178223, "eval_rundkast_runtime": 265.6106, "eval_rundkast_samples_per_second": 5.041, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.902255639097744, "step": 2135 }, { "epoch": 1.75, "eval_nb_samtale_loss": 0.37596940994262695, "eval_nb_samtale_runtime": 131.3056, "eval_nb_samtale_samples_per_second": 4.059, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 15.494791666666666, "step": 2135 }, { "epoch": 1.75, "eval_bigbrother_loss": 0.6828669309616089, "eval_bigbrother_runtime": 283.3934, "eval_bigbrother_samples_per_second": 4.834, "eval_bigbrother_steps_per_second": 0.102, "eval_bigbrother_wer": 38.814146459357126, "step": 2135 }, { "epoch": 1.8, "grad_norm": 2.3120596408843994, "learning_rate": 9.306959283857927e-06, "loss": 0.1298, "step": 2200 }, { "epoch": 1.88, "grad_norm": 3.637758731842041, "learning_rate": 9.249205890846089e-06, "loss": 0.1211, "step": 2300 }, { "epoch": 1.97, "grad_norm": 2.884577751159668, "learning_rate": 9.191452497834249e-06, "loss": 0.123, "step": 2400 }, { "epoch": 2.0, "eval_rundkast_loss": 0.16326722502708435, "eval_rundkast_runtime": 267.3107, "eval_rundkast_samples_per_second": 5.009, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.726283794592865, "step": 2440 }, { "epoch": 2.0, "eval_nb_samtale_loss": 0.3877582550048828, "eval_nb_samtale_runtime": 131.6096, "eval_nb_samtale_samples_per_second": 4.05, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.11328125, "step": 2440 }, { "epoch": 2.0, "eval_bigbrother_loss": 0.6807565689086914, "eval_bigbrother_runtime": 276.8009, "eval_bigbrother_samples_per_second": 4.949, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 35.32586804156932, "step": 2440 }, { "epoch": 2.05, "grad_norm": 1.7449294328689575, "learning_rate": 9.133699104822408e-06, "loss": 0.0954, "step": 2500 }, { "epoch": 2.13, "grad_norm": 2.5497448444366455, "learning_rate": 9.07594571181057e-06, "loss": 0.074, "step": 2600 }, { "epoch": 2.21, "grad_norm": 2.048872947692871, "learning_rate": 9.01819231879873e-06, "loss": 0.0716, "step": 2700 }, { "epoch": 2.25, "eval_rundkast_loss": 0.171921044588089, "eval_rundkast_runtime": 267.4322, "eval_rundkast_samples_per_second": 5.007, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.53431450967845, "step": 2745 }, { "epoch": 2.25, "eval_nb_samtale_loss": 0.40560728311538696, "eval_nb_samtale_runtime": 132.0733, "eval_nb_samtale_samples_per_second": 4.036, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.072591145833336, "step": 2745 }, { "epoch": 2.25, "eval_bigbrother_loss": 0.7426937818527222, "eval_bigbrother_runtime": 282.5107, "eval_bigbrother_samples_per_second": 4.849, "eval_bigbrother_steps_per_second": 0.103, "eval_bigbrother_wer": 37.31571739305567, "step": 2745 }, { "epoch": 2.29, "grad_norm": 2.1722655296325684, "learning_rate": 8.960438925786891e-06, "loss": 0.0726, "step": 2800 }, { "epoch": 2.38, "grad_norm": 2.1591341495513916, "learning_rate": 8.902685532775051e-06, "loss": 0.0731, "step": 2900 }, { "epoch": 2.46, "grad_norm": 2.1594812870025635, "learning_rate": 8.844932139763211e-06, "loss": 0.0741, "step": 3000 }, { "epoch": 2.5, "eval_rundkast_loss": 0.17113752663135529, "eval_rundkast_runtime": 267.3902, "eval_rundkast_samples_per_second": 5.008, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.758278675411933, "step": 3050 }, { "epoch": 2.5, "eval_nb_samtale_loss": 0.4135577976703644, "eval_nb_samtale_runtime": 132.7315, "eval_nb_samtale_samples_per_second": 4.016, "eval_nb_samtale_steps_per_second": 0.09, "eval_nb_samtale_wer": 15.690104166666666, "step": 3050 }, { "epoch": 2.5, "eval_bigbrother_loss": 0.7614437341690063, "eval_bigbrother_runtime": 277.5651, "eval_bigbrother_samples_per_second": 4.936, "eval_bigbrother_steps_per_second": 0.104, "eval_bigbrother_wer": 37.30766132280674, "step": 3050 }, { "epoch": 2.54, "grad_norm": 2.3434081077575684, "learning_rate": 8.787178746751373e-06, "loss": 0.0757, "step": 3100 }, { "epoch": 2.62, "grad_norm": 1.7652161121368408, "learning_rate": 8.729425353739532e-06, "loss": 0.0754, "step": 3200 }, { "epoch": 2.7, "grad_norm": 1.9667662382125854, "learning_rate": 8.671671960727694e-06, "loss": 0.0771, "step": 3300 }, { "epoch": 2.75, "eval_rundkast_loss": 0.16951075196266174, "eval_rundkast_runtime": 265.7942, "eval_rundkast_samples_per_second": 5.038, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.582306830907056, "step": 3355 }, { "epoch": 2.75, "eval_nb_samtale_loss": 0.4036692678928375, "eval_nb_samtale_runtime": 130.7522, "eval_nb_samtale_samples_per_second": 4.076, "eval_nb_samtale_steps_per_second": 0.092, "eval_nb_samtale_wer": 16.2109375, "step": 3355 }, { "epoch": 2.75, "eval_bigbrother_loss": 0.7462433576583862, "eval_bigbrother_runtime": 281.4612, "eval_bigbrother_samples_per_second": 4.867, "eval_bigbrother_steps_per_second": 0.103, "eval_bigbrother_wer": 39.410295657778136, "step": 3355 }, { "epoch": 2.78, "grad_norm": 1.771730661392212, "learning_rate": 8.613918567715854e-06, "loss": 0.0739, "step": 3400 }, { "epoch": 2.87, "grad_norm": 1.9539287090301514, "learning_rate": 8.556165174704014e-06, "loss": 0.0749, "step": 3500 }, { "epoch": 2.95, "grad_norm": 1.8422223329544067, "learning_rate": 8.498411781692175e-06, "loss": 0.0761, "step": 3600 }, { "epoch": 3.0, "eval_rundkast_loss": 0.16748666763305664, "eval_rundkast_runtime": 263.9708, "eval_rundkast_samples_per_second": 5.073, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.51031834906415, "step": 3660 }, { "epoch": 3.0, "eval_nb_samtale_loss": 0.4049259126186371, "eval_nb_samtale_runtime": 131.8245, "eval_nb_samtale_samples_per_second": 4.043, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.520182291666664, "step": 3660 }, { "epoch": 3.0, "eval_bigbrother_loss": 0.7517970204353333, "eval_bigbrother_runtime": 275.4865, "eval_bigbrother_samples_per_second": 4.973, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 36.45371787641988, "step": 3660 }, { "epoch": 3.03, "grad_norm": 1.9308370351791382, "learning_rate": 8.440658388680337e-06, "loss": 0.0672, "step": 3700 }, { "epoch": 3.11, "grad_norm": 1.7733228206634521, "learning_rate": 8.382904995668496e-06, "loss": 0.0426, "step": 3800 }, { "epoch": 3.19, "grad_norm": 1.576960802078247, "learning_rate": 8.325151602656656e-06, "loss": 0.0425, "step": 3900 }, { "epoch": 3.25, "eval_rundkast_loss": 0.18175919353961945, "eval_rundkast_runtime": 270.4272, "eval_rundkast_samples_per_second": 4.951, "eval_rundkast_steps_per_second": 0.104, "eval_rundkast_wer": 10.518317069268917, "step": 3965 }, { "epoch": 3.25, "eval_nb_samtale_loss": 0.4434772729873657, "eval_nb_samtale_runtime": 132.6807, "eval_nb_samtale_samples_per_second": 4.017, "eval_nb_samtale_steps_per_second": 0.09, "eval_nb_samtale_wer": 16.015625, "step": 3965 }, { "epoch": 3.25, "eval_bigbrother_loss": 0.8168217539787292, "eval_bigbrother_runtime": 276.801, "eval_bigbrother_samples_per_second": 4.949, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 36.429549665673086, "step": 3965 }, { "epoch": 3.28, "grad_norm": 2.2763025760650635, "learning_rate": 8.267398209644816e-06, "loss": 0.0428, "step": 4000 }, { "epoch": 3.36, "grad_norm": 1.510209560394287, "learning_rate": 8.209644816632978e-06, "loss": 0.0459, "step": 4100 }, { "epoch": 3.44, "grad_norm": 1.8989923000335693, "learning_rate": 8.15189142362114e-06, "loss": 0.046, "step": 4200 }, { "epoch": 3.5, "eval_rundkast_loss": 0.18418411910533905, "eval_rundkast_runtime": 266.3145, "eval_rundkast_samples_per_second": 5.028, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.62230043193089, "step": 4270 }, { "epoch": 3.5, "eval_nb_samtale_loss": 0.44499385356903076, "eval_nb_samtale_runtime": 132.1121, "eval_nb_samtale_samples_per_second": 4.034, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 15.958658854166666, "step": 4270 }, { "epoch": 3.5, "eval_bigbrother_loss": 0.8417752981185913, "eval_bigbrother_runtime": 274.2381, "eval_bigbrother_samples_per_second": 4.996, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 36.767904616128256, "step": 4270 }, { "epoch": 3.52, "grad_norm": 2.620262384414673, "learning_rate": 8.094138030609299e-06, "loss": 0.0436, "step": 4300 }, { "epoch": 3.6, "grad_norm": 2.0660297870635986, "learning_rate": 8.03638463759746e-06, "loss": 0.0474, "step": 4400 }, { "epoch": 3.69, "grad_norm": 2.1913418769836426, "learning_rate": 7.97863124458562e-06, "loss": 0.0463, "step": 4500 }, { "epoch": 3.75, "eval_rundkast_loss": 0.18389423191547394, "eval_rundkast_runtime": 267.3325, "eval_rundkast_samples_per_second": 5.009, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.774276115821468, "step": 4575 }, { "epoch": 3.75, "eval_nb_samtale_loss": 0.44804662466049194, "eval_nb_samtale_runtime": 130.8487, "eval_nb_samtale_samples_per_second": 4.073, "eval_nb_samtale_steps_per_second": 0.092, "eval_nb_samtale_wer": 16.324869791666664, "step": 4575 }, { "epoch": 3.75, "eval_bigbrother_loss": 0.8276214003562927, "eval_bigbrother_runtime": 274.7762, "eval_bigbrother_samples_per_second": 4.986, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 36.622895351647465, "step": 4575 }, { "epoch": 3.77, "grad_norm": 2.3918960094451904, "learning_rate": 7.92087785157378e-06, "loss": 0.0463, "step": 4600 }, { "epoch": 3.85, "grad_norm": 1.64975905418396, "learning_rate": 7.863124458561942e-06, "loss": 0.0479, "step": 4700 }, { "epoch": 3.93, "grad_norm": 1.6684168577194214, "learning_rate": 7.805371065550102e-06, "loss": 0.0462, "step": 4800 }, { "epoch": 4.0, "eval_rundkast_loss": 0.18402020633220673, "eval_rundkast_runtime": 266.1373, "eval_rundkast_samples_per_second": 5.031, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.702287633978564, "step": 4880 }, { "epoch": 4.0, "eval_nb_samtale_loss": 0.4544055163860321, "eval_nb_samtale_runtime": 131.9332, "eval_nb_samtale_samples_per_second": 4.04, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.2109375, "step": 4880 }, { "epoch": 4.0, "eval_bigbrother_loss": 0.8441748023033142, "eval_bigbrother_runtime": 273.29, "eval_bigbrother_samples_per_second": 5.013, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 37.025698864094096, "step": 4880 }, { "epoch": 4.01, "grad_norm": 1.9604337215423584, "learning_rate": 7.747617672538263e-06, "loss": 0.0422, "step": 4900 }, { "epoch": 4.1, "grad_norm": 1.223702311515808, "learning_rate": 7.689864279526423e-06, "loss": 0.0245, "step": 5000 }, { "epoch": 4.18, "grad_norm": 1.5145870447158813, "learning_rate": 7.632110886514583e-06, "loss": 0.025, "step": 5100 }, { "epoch": 4.25, "eval_rundkast_loss": 0.1977960765361786, "eval_rundkast_runtime": 269.176, "eval_rundkast_samples_per_second": 4.974, "eval_rundkast_steps_per_second": 0.104, "eval_rundkast_wer": 10.590305551111822, "step": 5185 }, { "epoch": 4.25, "eval_nb_samtale_loss": 0.5040209293365479, "eval_nb_samtale_runtime": 132.1986, "eval_nb_samtale_samples_per_second": 4.032, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.267903645833336, "step": 5185 }, { "epoch": 4.25, "eval_bigbrother_loss": 0.903715968132019, "eval_bigbrother_runtime": 275.881, "eval_bigbrother_samples_per_second": 4.966, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 36.88874566986224, "step": 5185 }, { "epoch": 4.26, "grad_norm": 1.4866691827774048, "learning_rate": 7.574357493502744e-06, "loss": 0.0264, "step": 5200 }, { "epoch": 4.34, "grad_norm": 1.4074543714523315, "learning_rate": 7.516604100490904e-06, "loss": 0.0259, "step": 5300 }, { "epoch": 4.42, "grad_norm": 1.8785744905471802, "learning_rate": 7.458850707479065e-06, "loss": 0.0261, "step": 5400 }, { "epoch": 4.5, "eval_rundkast_loss": 0.20126894116401672, "eval_rundkast_runtime": 265.3149, "eval_rundkast_samples_per_second": 5.047, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.83026715725484, "step": 5490 }, { "epoch": 4.5, "eval_nb_samtale_loss": 0.5124031901359558, "eval_nb_samtale_runtime": 130.2241, "eval_nb_samtale_samples_per_second": 4.093, "eval_nb_samtale_steps_per_second": 0.092, "eval_nb_samtale_wer": 16.7724609375, "step": 5490 }, { "epoch": 4.5, "eval_bigbrother_loss": 0.9185855984687805, "eval_bigbrother_runtime": 274.0595, "eval_bigbrother_samples_per_second": 4.999, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 37.01764279384516, "step": 5490 }, { "epoch": 4.5, "grad_norm": 0.9663652181625366, "learning_rate": 7.401097314467226e-06, "loss": 0.0266, "step": 5500 }, { "epoch": 4.59, "grad_norm": 1.0480437278747559, "learning_rate": 7.343343921455386e-06, "loss": 0.0281, "step": 5600 }, { "epoch": 4.67, "grad_norm": 1.6226842403411865, "learning_rate": 7.285590528443547e-06, "loss": 0.0279, "step": 5700 }, { "epoch": 4.75, "eval_rundkast_loss": 0.19958865642547607, "eval_rundkast_runtime": 264.6785, "eval_rundkast_samples_per_second": 5.059, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.734282514797632, "step": 5795 }, { "epoch": 4.75, "eval_nb_samtale_loss": 0.5009992122650146, "eval_nb_samtale_runtime": 131.2705, "eval_nb_samtale_samples_per_second": 4.06, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.471354166666664, "step": 5795 }, { "epoch": 4.75, "eval_bigbrother_loss": 0.9256457686424255, "eval_bigbrother_runtime": 273.4177, "eval_bigbrother_samples_per_second": 5.011, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 37.364053814549266, "step": 5795 }, { "epoch": 4.75, "grad_norm": 1.7448477745056152, "learning_rate": 7.227837135431707e-06, "loss": 0.0282, "step": 5800 }, { "epoch": 4.83, "grad_norm": 1.4728354215621948, "learning_rate": 7.170083742419867e-06, "loss": 0.028, "step": 5900 }, { "epoch": 4.91, "grad_norm": 1.2925212383270264, "learning_rate": 7.112330349408029e-06, "loss": 0.0283, "step": 6000 }, { "epoch": 5.0, "grad_norm": 1.487881064414978, "learning_rate": 7.054576956396189e-06, "loss": 0.0267, "step": 6100 }, { "epoch": 5.0, "eval_rundkast_loss": 0.2028118520975113, "eval_rundkast_runtime": 263.5903, "eval_rundkast_samples_per_second": 5.08, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.59830427131659, "step": 6100 }, { "epoch": 5.0, "eval_nb_samtale_loss": 0.5003817081451416, "eval_nb_samtale_runtime": 139.8456, "eval_nb_samtale_samples_per_second": 3.811, "eval_nb_samtale_steps_per_second": 0.086, "eval_nb_samtale_wer": 16.731770833333336, "step": 6100 }, { "epoch": 5.0, "eval_bigbrother_loss": 0.9243764877319336, "eval_bigbrother_runtime": 297.3516, "eval_bigbrother_samples_per_second": 4.607, "eval_bigbrother_steps_per_second": 0.098, "eval_bigbrother_wer": 38.0407637154596, "step": 6100 }, { "epoch": 5.08, "grad_norm": 1.4894100427627563, "learning_rate": 6.996823563384349e-06, "loss": 0.0162, "step": 6200 }, { "epoch": 5.16, "grad_norm": 1.447346568107605, "learning_rate": 6.939070170372509e-06, "loss": 0.0155, "step": 6300 }, { "epoch": 5.24, "grad_norm": 1.4276291131973267, "learning_rate": 6.881316777360671e-06, "loss": 0.0153, "step": 6400 }, { "epoch": 5.25, "eval_rundkast_loss": 0.21709373593330383, "eval_rundkast_runtime": 266.3879, "eval_rundkast_samples_per_second": 5.027, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.902255639097744, "step": 6405 }, { "epoch": 5.25, "eval_nb_samtale_loss": 0.5387881994247437, "eval_nb_samtale_runtime": 133.5414, "eval_nb_samtale_samples_per_second": 3.991, "eval_nb_samtale_steps_per_second": 0.09, "eval_nb_samtale_wer": 16.446940104166664, "step": 6405 }, { "epoch": 5.25, "eval_bigbrother_loss": 0.9813001155853271, "eval_bigbrother_runtime": 275.2598, "eval_bigbrother_samples_per_second": 4.977, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 37.025698864094096, "step": 6405 }, { "epoch": 5.32, "grad_norm": 1.255480170249939, "learning_rate": 6.823563384348831e-06, "loss": 0.016, "step": 6500 }, { "epoch": 5.41, "grad_norm": 0.8070683479309082, "learning_rate": 6.765809991336991e-06, "loss": 0.0166, "step": 6600 }, { "epoch": 5.49, "grad_norm": 2.28149151802063, "learning_rate": 6.708056598325152e-06, "loss": 0.0165, "step": 6700 }, { "epoch": 5.5, "eval_rundkast_loss": 0.21103337407112122, "eval_rundkast_runtime": 264.5865, "eval_rundkast_samples_per_second": 5.061, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.614301711726123, "step": 6710 }, { "epoch": 5.5, "eval_nb_samtale_loss": 0.5321828722953796, "eval_nb_samtale_runtime": 131.9806, "eval_nb_samtale_samples_per_second": 4.038, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.813151041666664, "step": 6710 }, { "epoch": 5.5, "eval_bigbrother_loss": 0.9772281050682068, "eval_bigbrother_runtime": 275.4923, "eval_bigbrother_samples_per_second": 4.973, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 37.23515669056634, "step": 6710 }, { "epoch": 5.57, "grad_norm": 1.1977729797363281, "learning_rate": 6.6503032053133134e-06, "loss": 0.0181, "step": 6800 }, { "epoch": 5.65, "grad_norm": 2.1766629219055176, "learning_rate": 6.592549812301473e-06, "loss": 0.0156, "step": 6900 }, { "epoch": 5.73, "grad_norm": 1.58673095703125, "learning_rate": 6.534796419289634e-06, "loss": 0.0161, "step": 7000 }, { "epoch": 5.75, "eval_rundkast_loss": 0.21978364884853363, "eval_rundkast_runtime": 263.5088, "eval_rundkast_samples_per_second": 5.081, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.85426331786914, "step": 7015 }, { "epoch": 5.75, "eval_nb_samtale_loss": 0.54365473985672, "eval_nb_samtale_runtime": 129.1766, "eval_nb_samtale_samples_per_second": 4.126, "eval_nb_samtale_steps_per_second": 0.093, "eval_nb_samtale_wer": 16.536458333333336, "step": 7015 }, { "epoch": 5.75, "eval_bigbrother_loss": 1.0064964294433594, "eval_bigbrother_runtime": 282.2247, "eval_bigbrother_samples_per_second": 4.854, "eval_bigbrother_steps_per_second": 0.103, "eval_bigbrother_wer": 39.13638926931443, "step": 7015 }, { "epoch": 5.81, "grad_norm": 1.2069010734558105, "learning_rate": 6.477043026277794e-06, "loss": 0.0162, "step": 7100 }, { "epoch": 5.9, "grad_norm": 1.3795907497406006, "learning_rate": 6.419289633265955e-06, "loss": 0.0175, "step": 7200 }, { "epoch": 5.98, "grad_norm": 1.433837890625, "learning_rate": 6.361536240254116e-06, "loss": 0.0163, "step": 7300 }, { "epoch": 6.0, "eval_rundkast_loss": 0.21738122403621674, "eval_rundkast_runtime": 266.2552, "eval_rundkast_samples_per_second": 5.029, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.662294032954728, "step": 7320 }, { "epoch": 6.0, "eval_nb_samtale_loss": 0.5511901378631592, "eval_nb_samtale_runtime": 132.1334, "eval_nb_samtale_samples_per_second": 4.034, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.3330078125, "step": 7320 }, { "epoch": 6.0, "eval_bigbrother_loss": 1.019961953163147, "eval_bigbrother_runtime": 273.9638, "eval_bigbrother_samples_per_second": 5.001, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 36.80012889712398, "step": 7320 }, { "epoch": 6.06, "grad_norm": 0.8656061887741089, "learning_rate": 6.303782847242276e-06, "loss": 0.0113, "step": 7400 }, { "epoch": 6.14, "grad_norm": 0.8669420480728149, "learning_rate": 6.2460294542304365e-06, "loss": 0.0107, "step": 7500 }, { "epoch": 6.22, "grad_norm": 1.463745355606079, "learning_rate": 6.188276061218596e-06, "loss": 0.0098, "step": 7600 }, { "epoch": 6.24, "eval_rundkast_loss": 0.22738882899284363, "eval_rundkast_runtime": 265.2657, "eval_rundkast_samples_per_second": 5.048, "eval_rundkast_steps_per_second": 0.106, "eval_rundkast_wer": 10.646296592545193, "step": 7625 }, { "epoch": 6.24, "eval_nb_samtale_loss": 0.5670144557952881, "eval_nb_samtale_runtime": 132.5232, "eval_nb_samtale_samples_per_second": 4.022, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.3330078125, "step": 7625 }, { "epoch": 6.24, "eval_bigbrother_loss": 1.0504056215286255, "eval_bigbrother_runtime": 273.4437, "eval_bigbrother_samples_per_second": 5.01, "eval_bigbrother_steps_per_second": 0.106, "eval_bigbrother_wer": 36.72762426488359, "step": 7625 }, { "epoch": 6.31, "grad_norm": 0.5193389654159546, "learning_rate": 6.130522668206758e-06, "loss": 0.0114, "step": 7700 }, { "epoch": 6.39, "grad_norm": 1.134825348854065, "learning_rate": 6.0727692751949185e-06, "loss": 0.0104, "step": 7800 }, { "epoch": 6.47, "grad_norm": 1.1079447269439697, "learning_rate": 6.015015882183078e-06, "loss": 0.0105, "step": 7900 }, { "epoch": 6.49, "eval_rundkast_loss": 0.2256346195936203, "eval_rundkast_runtime": 266.2513, "eval_rundkast_samples_per_second": 5.029, "eval_rundkast_steps_per_second": 0.105, "eval_rundkast_wer": 10.414333706606943, "step": 7930 }, { "epoch": 6.49, "eval_nb_samtale_loss": 0.5754652619361877, "eval_nb_samtale_runtime": 131.2821, "eval_nb_samtale_samples_per_second": 4.06, "eval_nb_samtale_steps_per_second": 0.091, "eval_nb_samtale_wer": 16.764322916666664, "step": 7930 }, { "epoch": 6.49, "eval_bigbrother_loss": 1.0475353002548218, "eval_bigbrother_runtime": 276.5664, "eval_bigbrother_samples_per_second": 4.954, "eval_bigbrother_steps_per_second": 0.105, "eval_bigbrother_wer": 37.11431563683235, "step": 7930 } ], "logging_steps": 100, "max_steps": 18315, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 305, "total_flos": 1.098471467778048e+20, "train_batch_size": 48, "trial_name": null, "trial_params": null }