{ "best_metric": Infinity, "best_model_checkpoint": "./checkpoint-1000", "epoch": 4.999928330824912, "global_step": 17440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 3.7499999999999997e-06, "loss": 12.8121, "step": 100 }, { "epoch": 0.06, "learning_rate": 7.499999999999999e-06, "loss": 6.7805, "step": 200 }, { "epoch": 0.09, "learning_rate": 1.1249999999999999e-05, "loss": 4.6147, "step": 300 }, { "epoch": 0.11, "learning_rate": 1.4999999999999999e-05, "loss": 3.955, "step": 400 }, { "epoch": 0.14, "learning_rate": 1.875e-05, "loss": 3.5002, "step": 500 }, { "epoch": 0.17, "learning_rate": 2.2499999999999998e-05, "loss": 3.1912, "step": 600 }, { "epoch": 0.2, "learning_rate": 2.6249999999999998e-05, "loss": 3.0349, "step": 700 }, { "epoch": 0.23, "learning_rate": 2.99625e-05, "loss": 3.0058, "step": 800 }, { "epoch": 0.26, "learning_rate": 3.37125e-05, "loss": 2.9392, "step": 900 }, { "epoch": 0.29, "learning_rate": 3.7462499999999996e-05, "loss": 2.9114, "step": 1000 }, { "epoch": 0.29, "eval_loss": Infinity, "eval_runtime": 711.5482, "eval_samples_per_second": 22.516, "eval_steps_per_second": 1.408, "eval_wer": 0.9997048122028068, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.12125e-05, "loss": 2.8881, "step": 1100 }, { "epoch": 0.34, "learning_rate": 4.4962499999999995e-05, "loss": 2.7567, "step": 1200 }, { "epoch": 0.37, "learning_rate": 4.871249999999999e-05, "loss": 2.2193, "step": 1300 }, { "epoch": 0.4, "learning_rate": 5.2462499999999994e-05, "loss": 1.8226, "step": 1400 }, { "epoch": 0.43, "learning_rate": 5.62125e-05, "loss": 1.6558, "step": 1500 }, { "epoch": 0.46, "learning_rate": 5.9962499999999994e-05, "loss": 1.5479, "step": 1600 }, { "epoch": 0.49, "learning_rate": 6.37125e-05, "loss": 1.4422, "step": 1700 }, { "epoch": 0.52, "learning_rate": 6.746249999999999e-05, "loss": 1.3521, "step": 1800 }, { "epoch": 0.54, "learning_rate": 7.121249999999999e-05, "loss": 1.2988, "step": 1900 }, { "epoch": 0.57, "learning_rate": 7.49625e-05, "loss": 1.2436, "step": 2000 }, { "epoch": 0.57, "eval_loss": Infinity, "eval_runtime": 723.1036, "eval_samples_per_second": 22.156, "eval_steps_per_second": 1.386, "eval_wer": 0.43096134964994576, "step": 2000 }, { "epoch": 0.6, "learning_rate": 7.451910621761658e-05, "loss": 1.2041, "step": 2100 }, { "epoch": 0.63, "learning_rate": 7.403335492227979e-05, "loss": 1.1714, "step": 2200 }, { "epoch": 0.66, "learning_rate": 7.354760362694299e-05, "loss": 1.1481, "step": 2300 }, { "epoch": 0.69, "learning_rate": 7.306185233160621e-05, "loss": 1.1268, "step": 2400 }, { "epoch": 0.72, "learning_rate": 7.258095854922278e-05, "loss": 1.1271, "step": 2500 }, { "epoch": 0.75, "learning_rate": 7.2095207253886e-05, "loss": 1.0927, "step": 2600 }, { "epoch": 0.77, "learning_rate": 7.160945595854922e-05, "loss": 1.0879, "step": 2700 }, { "epoch": 0.8, "learning_rate": 7.112370466321243e-05, "loss": 1.0699, "step": 2800 }, { "epoch": 0.83, "learning_rate": 7.063795336787565e-05, "loss": 1.0669, "step": 2900 }, { "epoch": 0.86, "learning_rate": 7.015220207253885e-05, "loss": 1.0552, "step": 3000 }, { "epoch": 0.86, "eval_loss": Infinity, "eval_runtime": 706.7689, "eval_samples_per_second": 22.668, "eval_steps_per_second": 1.418, "eval_wer": 0.3144455923969891, "step": 3000 }, { "epoch": 0.89, "learning_rate": 6.966645077720206e-05, "loss": 1.0525, "step": 3100 }, { "epoch": 0.92, "learning_rate": 6.918069948186528e-05, "loss": 1.0475, "step": 3200 }, { "epoch": 0.95, "learning_rate": 6.86949481865285e-05, "loss": 1.0333, "step": 3300 }, { "epoch": 0.97, "learning_rate": 6.82091968911917e-05, "loss": 1.0227, "step": 3400 }, { "epoch": 1.0, "learning_rate": 6.772344559585491e-05, "loss": 1.0154, "step": 3500 }, { "epoch": 1.03, "learning_rate": 6.723769430051813e-05, "loss": 1.0084, "step": 3600 }, { "epoch": 1.06, "learning_rate": 6.675194300518135e-05, "loss": 1.0153, "step": 3700 }, { "epoch": 1.09, "learning_rate": 6.626619170984455e-05, "loss": 1.0059, "step": 3800 }, { "epoch": 1.12, "learning_rate": 6.578529792746114e-05, "loss": 1.0069, "step": 3900 }, { "epoch": 1.15, "learning_rate": 6.529954663212434e-05, "loss": 1.0044, "step": 4000 }, { "epoch": 1.15, "eval_loss": Infinity, "eval_runtime": 720.8047, "eval_samples_per_second": 22.227, "eval_steps_per_second": 1.39, "eval_wer": 0.2814038104894342, "step": 4000 }, { "epoch": 1.18, "learning_rate": 6.481379533678756e-05, "loss": 0.9918, "step": 4100 }, { "epoch": 1.2, "learning_rate": 6.432804404145077e-05, "loss": 0.9913, "step": 4200 }, { "epoch": 1.23, "learning_rate": 6.384229274611399e-05, "loss": 0.9955, "step": 4300 }, { "epoch": 1.26, "learning_rate": 6.33565414507772e-05, "loss": 0.989, "step": 4400 }, { "epoch": 1.29, "learning_rate": 6.287079015544041e-05, "loss": 0.9825, "step": 4500 }, { "epoch": 1.32, "learning_rate": 6.238503886010362e-05, "loss": 0.9793, "step": 4600 }, { "epoch": 1.35, "learning_rate": 6.189928756476684e-05, "loss": 0.9768, "step": 4700 }, { "epoch": 1.38, "learning_rate": 6.141353626943006e-05, "loss": 0.9712, "step": 4800 }, { "epoch": 1.4, "learning_rate": 6.092778497409326e-05, "loss": 0.9777, "step": 4900 }, { "epoch": 1.43, "learning_rate": 6.0442033678756475e-05, "loss": 0.9718, "step": 5000 }, { "epoch": 1.43, "eval_loss": Infinity, "eval_runtime": 706.8206, "eval_samples_per_second": 22.666, "eval_steps_per_second": 1.418, "eval_wer": 0.26581661137243073, "step": 5000 }, { "epoch": 1.46, "learning_rate": 5.995628238341968e-05, "loss": 0.967, "step": 5100 }, { "epoch": 1.49, "learning_rate": 5.947053108808289e-05, "loss": 0.9688, "step": 5200 }, { "epoch": 1.52, "learning_rate": 5.898477979274611e-05, "loss": 0.9677, "step": 5300 }, { "epoch": 1.55, "learning_rate": 5.8499028497409324e-05, "loss": 0.961, "step": 5400 }, { "epoch": 1.58, "learning_rate": 5.8013277202072536e-05, "loss": 0.9528, "step": 5500 }, { "epoch": 1.61, "learning_rate": 5.752752590673574e-05, "loss": 0.959, "step": 5600 }, { "epoch": 1.63, "learning_rate": 5.7041774611398954e-05, "loss": 0.9615, "step": 5700 }, { "epoch": 1.66, "learning_rate": 5.6556023316062174e-05, "loss": 0.9587, "step": 5800 }, { "epoch": 1.69, "learning_rate": 5.607512953367875e-05, "loss": 0.9573, "step": 5900 }, { "epoch": 1.72, "learning_rate": 5.5589378238341966e-05, "loss": 0.9502, "step": 6000 }, { "epoch": 1.72, "eval_loss": Infinity, "eval_runtime": 705.7107, "eval_samples_per_second": 22.702, "eval_steps_per_second": 1.42, "eval_wer": 0.2566016183991837, "step": 6000 }, { "epoch": 1.75, "learning_rate": 5.510362694300518e-05, "loss": 0.9515, "step": 6100 }, { "epoch": 1.78, "learning_rate": 5.461787564766839e-05, "loss": 0.9479, "step": 6200 }, { "epoch": 1.81, "learning_rate": 5.4132124352331596e-05, "loss": 0.94, "step": 6300 }, { "epoch": 1.83, "learning_rate": 5.3646373056994816e-05, "loss": 0.9363, "step": 6400 }, { "epoch": 1.86, "learning_rate": 5.316062176165803e-05, "loss": 0.9419, "step": 6500 }, { "epoch": 1.89, "learning_rate": 5.267487046632124e-05, "loss": 0.9435, "step": 6600 }, { "epoch": 1.92, "learning_rate": 5.218911917098445e-05, "loss": 0.9406, "step": 6700 }, { "epoch": 1.95, "learning_rate": 5.170336787564766e-05, "loss": 0.9457, "step": 6800 }, { "epoch": 1.98, "learning_rate": 5.121761658031088e-05, "loss": 0.9343, "step": 6900 }, { "epoch": 2.01, "learning_rate": 5.073186528497409e-05, "loss": 0.9418, "step": 7000 }, { "epoch": 2.01, "eval_loss": Infinity, "eval_runtime": 712.8454, "eval_samples_per_second": 22.475, "eval_steps_per_second": 1.406, "eval_wer": 0.24762405908889645, "step": 7000 }, { "epoch": 2.04, "learning_rate": 5.02461139896373e-05, "loss": 0.9401, "step": 7100 }, { "epoch": 2.06, "learning_rate": 4.9760362694300515e-05, "loss": 0.9375, "step": 7200 }, { "epoch": 2.09, "learning_rate": 4.927461139896372e-05, "loss": 0.9362, "step": 7300 }, { "epoch": 2.12, "learning_rate": 4.878886010362694e-05, "loss": 0.9249, "step": 7400 }, { "epoch": 2.15, "learning_rate": 4.830310880829015e-05, "loss": 0.9283, "step": 7500 }, { "epoch": 2.18, "learning_rate": 4.782221502590673e-05, "loss": 0.9371, "step": 7600 }, { "epoch": 2.21, "learning_rate": 4.7336463730569944e-05, "loss": 0.9346, "step": 7700 }, { "epoch": 2.24, "learning_rate": 4.6850712435233157e-05, "loss": 0.9266, "step": 7800 }, { "epoch": 2.26, "learning_rate": 4.636496113989637e-05, "loss": 0.9285, "step": 7900 }, { "epoch": 2.29, "learning_rate": 4.587920984455958e-05, "loss": 0.9215, "step": 8000 }, { "epoch": 2.29, "eval_loss": Infinity, "eval_runtime": 699.6113, "eval_samples_per_second": 22.9, "eval_steps_per_second": 1.432, "eval_wer": 0.24195131968196723, "step": 8000 }, { "epoch": 2.32, "learning_rate": 4.5393458549222794e-05, "loss": 0.9211, "step": 8100 }, { "epoch": 2.35, "learning_rate": 4.4907707253886006e-05, "loss": 0.9198, "step": 8200 }, { "epoch": 2.38, "learning_rate": 4.442195595854922e-05, "loss": 0.9257, "step": 8300 }, { "epoch": 2.41, "learning_rate": 4.393620466321243e-05, "loss": 0.9279, "step": 8400 }, { "epoch": 2.44, "learning_rate": 4.345045336787564e-05, "loss": 0.9111, "step": 8500 }, { "epoch": 2.47, "learning_rate": 4.296470207253886e-05, "loss": 0.9176, "step": 8600 }, { "epoch": 2.49, "learning_rate": 4.247895077720207e-05, "loss": 0.9169, "step": 8700 }, { "epoch": 2.52, "learning_rate": 4.199319948186528e-05, "loss": 0.9103, "step": 8800 }, { "epoch": 2.55, "learning_rate": 4.150744818652849e-05, "loss": 0.9102, "step": 8900 }, { "epoch": 2.58, "learning_rate": 4.102655440414507e-05, "loss": 0.9236, "step": 9000 }, { "epoch": 2.58, "eval_loss": Infinity, "eval_runtime": 704.4849, "eval_samples_per_second": 22.741, "eval_steps_per_second": 1.422, "eval_wer": 0.23878125942515385, "step": 9000 }, { "epoch": 2.61, "learning_rate": 4.0540803108808285e-05, "loss": 0.9129, "step": 9100 }, { "epoch": 2.64, "learning_rate": 4.0055051813471504e-05, "loss": 0.9117, "step": 9200 }, { "epoch": 2.67, "learning_rate": 3.956930051813472e-05, "loss": 0.912, "step": 9300 }, { "epoch": 2.69, "learning_rate": 3.908354922279792e-05, "loss": 0.9013, "step": 9400 }, { "epoch": 2.72, "learning_rate": 3.8597797927461135e-05, "loss": 0.9128, "step": 9500 }, { "epoch": 2.75, "learning_rate": 3.811204663212435e-05, "loss": 0.9061, "step": 9600 }, { "epoch": 2.78, "learning_rate": 3.7626295336787566e-05, "loss": 0.9055, "step": 9700 }, { "epoch": 2.81, "learning_rate": 3.714054404145078e-05, "loss": 0.9019, "step": 9800 }, { "epoch": 2.84, "learning_rate": 3.6654792746113984e-05, "loss": 0.9, "step": 9900 }, { "epoch": 2.87, "learning_rate": 3.61690414507772e-05, "loss": 0.9014, "step": 10000 }, { "epoch": 2.87, "eval_loss": Infinity, "eval_runtime": 702.0047, "eval_samples_per_second": 22.822, "eval_steps_per_second": 1.427, "eval_wer": 0.2353801826314067, "step": 10000 }, { "epoch": 2.9, "learning_rate": 3.5683290155440416e-05, "loss": 0.9045, "step": 10100 }, { "epoch": 2.92, "learning_rate": 3.519753886010362e-05, "loss": 0.8981, "step": 10200 }, { "epoch": 2.95, "learning_rate": 3.471178756476684e-05, "loss": 0.898, "step": 10300 }, { "epoch": 2.98, "learning_rate": 3.4226036269430046e-05, "loss": 0.8926, "step": 10400 }, { "epoch": 3.01, "learning_rate": 3.374028497409326e-05, "loss": 0.9, "step": 10500 }, { "epoch": 3.04, "learning_rate": 3.325453367875648e-05, "loss": 0.8928, "step": 10600 }, { "epoch": 3.07, "learning_rate": 3.276878238341968e-05, "loss": 0.8844, "step": 10700 }, { "epoch": 3.1, "learning_rate": 3.22830310880829e-05, "loss": 0.8844, "step": 10800 }, { "epoch": 3.12, "learning_rate": 3.179727979274611e-05, "loss": 0.89, "step": 10900 }, { "epoch": 3.15, "learning_rate": 3.131152849740932e-05, "loss": 0.8814, "step": 11000 }, { "epoch": 3.15, "eval_loss": Infinity, "eval_runtime": 712.5543, "eval_samples_per_second": 22.484, "eval_steps_per_second": 1.406, "eval_wer": 0.23118979933646916, "step": 11000 }, { "epoch": 3.18, "learning_rate": 3.082577720207254e-05, "loss": 0.8912, "step": 11100 }, { "epoch": 3.21, "learning_rate": 3.034002590673575e-05, "loss": 0.8956, "step": 11200 }, { "epoch": 3.24, "learning_rate": 2.985427461139896e-05, "loss": 0.8866, "step": 11300 }, { "epoch": 3.27, "learning_rate": 2.9368523316062173e-05, "loss": 0.8851, "step": 11400 }, { "epoch": 3.3, "learning_rate": 2.888277202072539e-05, "loss": 0.8897, "step": 11500 }, { "epoch": 3.33, "learning_rate": 2.8397020725388598e-05, "loss": 0.8832, "step": 11600 }, { "epoch": 3.35, "learning_rate": 2.791126943005181e-05, "loss": 0.8887, "step": 11700 }, { "epoch": 3.38, "learning_rate": 2.7425518134715023e-05, "loss": 0.8847, "step": 11800 }, { "epoch": 3.41, "learning_rate": 2.6939766839378235e-05, "loss": 0.8754, "step": 11900 }, { "epoch": 3.44, "learning_rate": 2.645401554404145e-05, "loss": 0.8809, "step": 12000 }, { "epoch": 3.44, "eval_loss": Infinity, "eval_runtime": 721.0245, "eval_samples_per_second": 22.22, "eval_steps_per_second": 1.39, "eval_wer": 0.22846893790147144, "step": 12000 }, { "epoch": 3.47, "learning_rate": 2.596826424870466e-05, "loss": 0.8739, "step": 12100 }, { "epoch": 3.5, "learning_rate": 2.5482512953367873e-05, "loss": 0.8713, "step": 12200 }, { "epoch": 3.53, "learning_rate": 2.4996761658031085e-05, "loss": 0.8833, "step": 12300 }, { "epoch": 3.56, "learning_rate": 2.4511010362694297e-05, "loss": 0.8756, "step": 12400 }, { "epoch": 3.58, "learning_rate": 2.4025259067357513e-05, "loss": 0.8705, "step": 12500 }, { "epoch": 3.61, "learning_rate": 2.3539507772020722e-05, "loss": 0.874, "step": 12600 }, { "epoch": 3.64, "learning_rate": 2.3053756476683938e-05, "loss": 0.8681, "step": 12700 }, { "epoch": 3.67, "learning_rate": 2.2568005181347147e-05, "loss": 0.8699, "step": 12800 }, { "epoch": 3.7, "learning_rate": 2.208225388601036e-05, "loss": 0.8749, "step": 12900 }, { "epoch": 3.73, "learning_rate": 2.1596502590673575e-05, "loss": 0.8717, "step": 13000 }, { "epoch": 3.73, "eval_loss": Infinity, "eval_runtime": 704.6479, "eval_samples_per_second": 22.736, "eval_steps_per_second": 1.422, "eval_wer": 0.22629353217867845, "step": 13000 }, { "epoch": 3.76, "learning_rate": 2.1110751295336784e-05, "loss": 0.8725, "step": 13100 }, { "epoch": 3.78, "learning_rate": 2.0625e-05, "loss": 0.8678, "step": 13200 }, { "epoch": 3.81, "learning_rate": 2.0139248704663212e-05, "loss": 0.8618, "step": 13300 }, { "epoch": 3.84, "learning_rate": 1.9658354922279792e-05, "loss": 0.8646, "step": 13400 }, { "epoch": 3.87, "learning_rate": 1.9172603626943005e-05, "loss": 0.8746, "step": 13500 }, { "epoch": 3.9, "learning_rate": 1.8686852331606217e-05, "loss": 0.863, "step": 13600 }, { "epoch": 3.93, "learning_rate": 1.8205958549222797e-05, "loss": 0.862, "step": 13700 }, { "epoch": 3.96, "learning_rate": 1.772020725388601e-05, "loss": 0.8678, "step": 13800 }, { "epoch": 3.99, "learning_rate": 1.723445595854922e-05, "loss": 0.8694, "step": 13900 }, { "epoch": 4.01, "learning_rate": 1.6748704663212434e-05, "loss": 0.8787, "step": 14000 }, { "epoch": 4.01, "eval_loss": Infinity, "eval_runtime": 708.1909, "eval_samples_per_second": 22.622, "eval_steps_per_second": 1.415, "eval_wer": 0.2217630412043662, "step": 14000 }, { "epoch": 4.04, "learning_rate": 1.6262953367875646e-05, "loss": 0.8657, "step": 14100 }, { "epoch": 4.07, "learning_rate": 1.577720207253886e-05, "loss": 0.8631, "step": 14200 }, { "epoch": 4.1, "learning_rate": 1.529630829015544e-05, "loss": 0.8658, "step": 14300 }, { "epoch": 4.13, "learning_rate": 1.4810556994818651e-05, "loss": 0.855, "step": 14400 }, { "epoch": 4.16, "learning_rate": 1.4324805699481864e-05, "loss": 0.8502, "step": 14500 }, { "epoch": 4.19, "learning_rate": 1.3839054404145076e-05, "loss": 0.8625, "step": 14600 }, { "epoch": 4.21, "learning_rate": 1.335330310880829e-05, "loss": 0.8598, "step": 14700 }, { "epoch": 4.24, "learning_rate": 1.2867551813471502e-05, "loss": 0.8574, "step": 14800 }, { "epoch": 4.27, "learning_rate": 1.2381800518134713e-05, "loss": 0.8575, "step": 14900 }, { "epoch": 4.3, "learning_rate": 1.1896049222797926e-05, "loss": 0.8567, "step": 15000 }, { "epoch": 4.3, "eval_loss": Infinity, "eval_runtime": 705.2463, "eval_samples_per_second": 22.717, "eval_steps_per_second": 1.421, "eval_wer": 0.21933736756656164, "step": 15000 }, { "epoch": 4.33, "learning_rate": 1.1410297927461138e-05, "loss": 0.8474, "step": 15100 }, { "epoch": 4.36, "learning_rate": 1.0924546632124352e-05, "loss": 0.8519, "step": 15200 }, { "epoch": 4.39, "learning_rate": 1.0438795336787564e-05, "loss": 0.8487, "step": 15300 }, { "epoch": 4.42, "learning_rate": 9.953044041450777e-06, "loss": 0.8479, "step": 15400 }, { "epoch": 4.44, "learning_rate": 9.467292746113987e-06, "loss": 0.8463, "step": 15500 }, { "epoch": 4.47, "learning_rate": 8.981541450777202e-06, "loss": 0.8459, "step": 15600 }, { "epoch": 4.5, "learning_rate": 8.500647668393781e-06, "loss": 0.853, "step": 15700 }, { "epoch": 4.53, "learning_rate": 8.014896373056994e-06, "loss": 0.8456, "step": 15800 }, { "epoch": 4.56, "learning_rate": 7.529145077720206e-06, "loss": 0.8501, "step": 15900 }, { "epoch": 4.59, "learning_rate": 7.0433937823834195e-06, "loss": 0.8488, "step": 16000 }, { "epoch": 4.59, "eval_loss": Infinity, "eval_runtime": 702.6461, "eval_samples_per_second": 22.801, "eval_steps_per_second": 1.426, "eval_wer": 0.2187084892160197, "step": 16000 }, { "epoch": 4.62, "learning_rate": 6.557642487046632e-06, "loss": 0.8491, "step": 16100 }, { "epoch": 4.64, "learning_rate": 6.0718911917098434e-06, "loss": 0.8415, "step": 16200 }, { "epoch": 4.67, "learning_rate": 5.586139896373057e-06, "loss": 0.8467, "step": 16300 }, { "epoch": 4.7, "learning_rate": 5.100388601036269e-06, "loss": 0.8356, "step": 16400 }, { "epoch": 4.73, "learning_rate": 4.6146373056994814e-06, "loss": 0.8444, "step": 16500 }, { "epoch": 4.76, "learning_rate": 4.128886010362694e-06, "loss": 0.8485, "step": 16600 }, { "epoch": 4.79, "learning_rate": 3.6431347150259062e-06, "loss": 0.8424, "step": 16700 }, { "epoch": 4.82, "learning_rate": 3.1573834196891186e-06, "loss": 0.8435, "step": 16800 }, { "epoch": 4.85, "learning_rate": 2.6716321243523314e-06, "loss": 0.8444, "step": 16900 }, { "epoch": 4.87, "learning_rate": 2.185880829015544e-06, "loss": 0.8359, "step": 17000 }, { "epoch": 4.87, "eval_loss": Infinity, "eval_runtime": 698.574, "eval_samples_per_second": 22.934, "eval_steps_per_second": 1.434, "eval_wer": 0.2171683789697946, "step": 17000 }, { "epoch": 4.9, "learning_rate": 1.7001295336787564e-06, "loss": 0.8419, "step": 17100 }, { "epoch": 4.93, "learning_rate": 1.2143782383419686e-06, "loss": 0.8392, "step": 17200 }, { "epoch": 4.96, "learning_rate": 7.286269430051813e-07, "loss": 0.8369, "step": 17300 }, { "epoch": 4.99, "learning_rate": 2.4287564766839375e-07, "loss": 0.8428, "step": 17400 }, { "epoch": 5.0, "step": 17440, "total_flos": 3.487279250614594e+20, "train_loss": 1.187884036554109, "train_runtime": 143109.8834, "train_samples_per_second": 15.599, "train_steps_per_second": 0.122 } ], "max_steps": 17440, "num_train_epochs": 5, "total_flos": 3.487279250614594e+20, "trial_name": null, "trial_params": null }