{ "best_metric": 43.218, "best_model_checkpoint": "opus_big_adapt_wce_gloss_unsampled_precision_2_ubweight_1.5/checkpoint-80000", "epoch": 1.9716574245224892, "global_step": 80000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.9753635243376465e-05, "loss": 0.1832, "step": 8000 }, { "epoch": 0.2, "eval_bleu": 42.311, "eval_gen_len": 33.3634, "eval_loss": 0.0981452614068985, "eval_runtime": 129.4343, "eval_samples_per_second": 8.058, "eval_steps_per_second": 0.51, "step": 8000 }, { "epoch": 0.39, "learning_rate": 1.950723967960567e-05, "loss": 0.152, "step": 16000 }, { "epoch": 0.39, "eval_bleu": 42.5149, "eval_gen_len": 33.8552, "eval_loss": 0.09721900522708893, "eval_runtime": 121.8178, "eval_samples_per_second": 8.562, "eval_steps_per_second": 0.542, "step": 16000 }, { "epoch": 0.59, "learning_rate": 1.9260844115834877e-05, "loss": 0.1477, "step": 24000 }, { "epoch": 0.59, "eval_bleu": 42.3053, "eval_gen_len": 33.1083, "eval_loss": 0.09773427993059158, "eval_runtime": 113.6446, "eval_samples_per_second": 9.178, "eval_steps_per_second": 0.581, "step": 24000 }, { "epoch": 0.79, "learning_rate": 1.901447935921134e-05, "loss": 0.1437, "step": 32000 }, { "epoch": 0.79, "eval_bleu": 42.7689, "eval_gen_len": 34.2637, "eval_loss": 0.09667054563760757, "eval_runtime": 127.749, "eval_samples_per_second": 8.164, "eval_steps_per_second": 0.517, "step": 32000 }, { "epoch": 0.99, "learning_rate": 1.8768114602587803e-05, "loss": 0.1422, "step": 40000 }, { "epoch": 0.99, "eval_bleu": 42.1023, "eval_gen_len": 34.2656, "eval_loss": 0.09601164609193802, "eval_runtime": 127.2919, "eval_samples_per_second": 8.194, "eval_steps_per_second": 0.518, "step": 40000 }, { "epoch": 1.18, "learning_rate": 1.8521749845964266e-05, "loss": 0.1265, "step": 48000 }, { "epoch": 1.18, "eval_bleu": 42.4637, "eval_gen_len": 34.1064, "eval_loss": 0.09702879190444946, "eval_runtime": 125.1346, "eval_samples_per_second": 8.335, "eval_steps_per_second": 0.527, "step": 48000 }, { "epoch": 1.38, "learning_rate": 1.8275415896487988e-05, "loss": 0.1269, "step": 56000 }, { "epoch": 1.38, "eval_bleu": 42.9093, "eval_gen_len": 33.7507, "eval_loss": 0.09716643393039703, "eval_runtime": 127.4089, "eval_samples_per_second": 8.186, "eval_steps_per_second": 0.518, "step": 56000 }, { "epoch": 1.58, "learning_rate": 1.8029020332717192e-05, "loss": 0.1275, "step": 64000 }, { "epoch": 1.58, "eval_bleu": 42.6221, "eval_gen_len": 34.4372, "eval_loss": 0.09658621996641159, "eval_runtime": 249.3691, "eval_samples_per_second": 4.183, "eval_steps_per_second": 0.265, "step": 64000 }, { "epoch": 1.77, "learning_rate": 1.7782655576093655e-05, "loss": 0.127, "step": 72000 }, { "epoch": 1.77, "eval_bleu": 42.3284, "eval_gen_len": 34.0441, "eval_loss": 0.09654054045677185, "eval_runtime": 197.536, "eval_samples_per_second": 5.28, "eval_steps_per_second": 0.334, "step": 72000 }, { "epoch": 1.97, "learning_rate": 1.7536321626617377e-05, "loss": 0.1266, "step": 80000 }, { "epoch": 1.97, "eval_bleu": 43.218, "eval_gen_len": 34.1035, "eval_loss": 0.09614226967096329, "eval_runtime": 124.4592, "eval_samples_per_second": 8.38, "eval_steps_per_second": 0.53, "step": 80000 } ], "max_steps": 649200, "num_train_epochs": 16, "total_flos": 2.5844916958632346e+17, "trial_name": null, "trial_params": null }