{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0321027287319422, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9193548387096775e-05, "loss": 2.714, "step": 5 }, { "epoch": 0.06, "learning_rate": 4.8387096774193554e-05, "loss": 1.6354, "step": 10 }, { "epoch": 0.06, "eval_loss": 1.7578704357147217, "eval_rouge2_fmeasure": 0.0827, "eval_rouge2_precision": 0.0554, "eval_rouge2_recall": 0.1855, "eval_runtime": 1288.6002, "eval_samples_per_second": 0.388, "eval_steps_per_second": 0.039, "step": 10 }, { "epoch": 0.1, "learning_rate": 4.7580645161290326e-05, "loss": 1.5202, "step": 15 }, { "epoch": 0.13, "learning_rate": 4.67741935483871e-05, "loss": 1.5048, "step": 20 }, { "epoch": 0.13, "eval_loss": 1.6580469608306885, "eval_rouge2_fmeasure": 0.0917, "eval_rouge2_precision": 0.0625, "eval_rouge2_recall": 0.1921, "eval_runtime": 1296.1289, "eval_samples_per_second": 0.386, "eval_steps_per_second": 0.039, "step": 20 }, { "epoch": 0.16, "learning_rate": 4.596774193548387e-05, "loss": 1.4322, "step": 25 }, { "epoch": 0.19, "learning_rate": 4.516129032258064e-05, "loss": 1.4194, "step": 30 }, { "epoch": 0.19, "eval_loss": 1.5461347103118896, "eval_rouge2_fmeasure": 0.0919, "eval_rouge2_precision": 0.0613, "eval_rouge2_recall": 0.2071, "eval_runtime": 1357.8126, "eval_samples_per_second": 0.368, "eval_steps_per_second": 0.037, "step": 30 }, { "epoch": 0.22, "learning_rate": 4.435483870967742e-05, "loss": 1.3347, "step": 35 }, { "epoch": 0.26, "learning_rate": 4.3548387096774194e-05, "loss": 1.3539, "step": 40 }, { "epoch": 0.26, "eval_loss": 1.5223164558410645, "eval_rouge2_fmeasure": 0.0909, "eval_rouge2_precision": 0.0606, "eval_rouge2_recall": 0.2068, "eval_runtime": 1335.0407, "eval_samples_per_second": 0.375, "eval_steps_per_second": 0.037, "step": 40 }, { "epoch": 0.29, "learning_rate": 4.2741935483870973e-05, "loss": 1.351, "step": 45 }, { "epoch": 0.32, "learning_rate": 4.1935483870967746e-05, "loss": 1.3326, "step": 50 }, { "epoch": 0.32, "eval_loss": 1.483912706375122, "eval_rouge2_fmeasure": 0.0902, "eval_rouge2_precision": 0.0594, "eval_rouge2_recall": 0.2113, "eval_runtime": 1337.6258, "eval_samples_per_second": 0.374, "eval_steps_per_second": 0.037, "step": 50 }, { "epoch": 0.35, "learning_rate": 4.112903225806452e-05, "loss": 1.2886, "step": 55 }, { "epoch": 0.39, "learning_rate": 4.032258064516129e-05, "loss": 1.2789, "step": 60 }, { "epoch": 0.39, "eval_loss": 1.5512304306030273, "eval_rouge2_fmeasure": 0.0954, "eval_rouge2_precision": 0.0639, "eval_rouge2_recall": 0.2123, "eval_runtime": 1377.3491, "eval_samples_per_second": 0.363, "eval_steps_per_second": 0.036, "step": 60 }, { "epoch": 0.42, "learning_rate": 3.951612903225806e-05, "loss": 1.2981, "step": 65 }, { "epoch": 0.45, "learning_rate": 3.870967741935484e-05, "loss": 1.2861, "step": 70 }, { "epoch": 0.45, "eval_loss": 1.461083173751831, "eval_rouge2_fmeasure": 0.098, "eval_rouge2_precision": 0.0651, "eval_rouge2_recall": 0.2234, "eval_runtime": 1306.3575, "eval_samples_per_second": 0.383, "eval_steps_per_second": 0.038, "step": 70 }, { "epoch": 0.48, "learning_rate": 3.7903225806451614e-05, "loss": 1.3188, "step": 75 }, { "epoch": 0.51, "learning_rate": 3.7096774193548386e-05, "loss": 1.2977, "step": 80 }, { "epoch": 0.51, "eval_loss": 1.4521564245224, "eval_rouge2_fmeasure": 0.1013, "eval_rouge2_precision": 0.0677, "eval_rouge2_recall": 0.2272, "eval_runtime": 1315.4665, "eval_samples_per_second": 0.38, "eval_steps_per_second": 0.038, "step": 80 }, { "epoch": 0.55, "learning_rate": 3.6290322580645165e-05, "loss": 1.3004, "step": 85 }, { "epoch": 0.58, "learning_rate": 3.548387096774194e-05, "loss": 1.2403, "step": 90 }, { "epoch": 0.58, "eval_loss": 1.4190800189971924, "eval_rouge2_fmeasure": 0.1044, "eval_rouge2_precision": 0.0695, "eval_rouge2_recall": 0.2376, "eval_runtime": 1335.9699, "eval_samples_per_second": 0.374, "eval_steps_per_second": 0.037, "step": 90 }, { "epoch": 0.61, "learning_rate": 3.467741935483872e-05, "loss": 1.2567, "step": 95 }, { "epoch": 0.64, "learning_rate": 3.387096774193548e-05, "loss": 1.2235, "step": 100 }, { "epoch": 0.64, "eval_loss": 1.4258784055709839, "eval_rouge2_fmeasure": 0.0988, "eval_rouge2_precision": 0.0658, "eval_rouge2_recall": 0.2201, "eval_runtime": 1320.053, "eval_samples_per_second": 0.379, "eval_steps_per_second": 0.038, "step": 100 }, { "epoch": 0.67, "learning_rate": 3.306451612903226e-05, "loss": 1.1957, "step": 105 }, { "epoch": 0.71, "learning_rate": 3.2258064516129034e-05, "loss": 1.2944, "step": 110 }, { "epoch": 0.71, "eval_loss": 1.3971573114395142, "eval_rouge2_fmeasure": 0.1008, "eval_rouge2_precision": 0.0668, "eval_rouge2_recall": 0.2303, "eval_runtime": 1291.1344, "eval_samples_per_second": 0.387, "eval_steps_per_second": 0.039, "step": 110 }, { "epoch": 0.74, "learning_rate": 3.1451612903225806e-05, "loss": 1.236, "step": 115 }, { "epoch": 0.77, "learning_rate": 3.0645161290322585e-05, "loss": 1.2184, "step": 120 }, { "epoch": 0.77, "eval_loss": 1.4044127464294434, "eval_rouge2_fmeasure": 0.1045, "eval_rouge2_precision": 0.0688, "eval_rouge2_recall": 0.2434, "eval_runtime": 1297.8926, "eval_samples_per_second": 0.385, "eval_steps_per_second": 0.039, "step": 120 }, { "epoch": 0.8, "learning_rate": 2.9838709677419357e-05, "loss": 1.1964, "step": 125 }, { "epoch": 0.83, "learning_rate": 2.9032258064516133e-05, "loss": 1.2579, "step": 130 }, { "epoch": 0.83, "eval_loss": 1.3979231119155884, "eval_rouge2_fmeasure": 0.102, "eval_rouge2_precision": 0.0678, "eval_rouge2_recall": 0.2317, "eval_runtime": 1320.6473, "eval_samples_per_second": 0.379, "eval_steps_per_second": 0.038, "step": 130 }, { "epoch": 0.87, "learning_rate": 2.822580645161291e-05, "loss": 1.2123, "step": 135 }, { "epoch": 0.9, "learning_rate": 2.7419354838709678e-05, "loss": 1.1875, "step": 140 }, { "epoch": 0.9, "eval_loss": 1.431972861289978, "eval_rouge2_fmeasure": 0.104, "eval_rouge2_precision": 0.0694, "eval_rouge2_recall": 0.2333, "eval_runtime": 1301.699, "eval_samples_per_second": 0.384, "eval_steps_per_second": 0.038, "step": 140 }, { "epoch": 0.93, "learning_rate": 2.661290322580645e-05, "loss": 1.2311, "step": 145 }, { "epoch": 0.96, "learning_rate": 2.5806451612903226e-05, "loss": 1.2116, "step": 150 }, { "epoch": 0.96, "eval_loss": 1.417688012123108, "eval_rouge2_fmeasure": 0.1034, "eval_rouge2_precision": 0.069, "eval_rouge2_recall": 0.2334, "eval_runtime": 1305.2333, "eval_samples_per_second": 0.383, "eval_steps_per_second": 0.038, "step": 150 }, { "epoch": 1.0, "learning_rate": 2.5e-05, "loss": 1.2515, "step": 155 }, { "epoch": 1.03, "learning_rate": 2.4193548387096777e-05, "loss": 1.3298, "step": 160 }, { "epoch": 1.03, "eval_loss": 1.4143891334533691, "eval_rouge2_fmeasure": 0.105, "eval_rouge2_precision": 0.0705, "eval_rouge2_recall": 0.2332, "eval_runtime": 1300.2169, "eval_samples_per_second": 0.385, "eval_steps_per_second": 0.038, "step": 160 } ], "max_steps": 310, "num_train_epochs": 2, "total_flos": 6.944926835146752e+16, "trial_name": null, "trial_params": null }