{ "best_metric": 0.5942, "best_model_checkpoint": "save_model/flan-t5-small-amazon-reviews-multi-zero-shot/checkpoint-31250", "epoch": 5.0, "global_step": 31250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.92e-05, "loss": 0.7233, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.8400000000000004e-05, "loss": 0.5502, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.76e-05, "loss": 0.53, "step": 1500 }, { "epoch": 0.32, "learning_rate": 4.6800000000000006e-05, "loss": 0.5205, "step": 2000 }, { "epoch": 0.4, "learning_rate": 4.600000000000001e-05, "loss": 0.5167, "step": 2500 }, { "epoch": 0.48, "learning_rate": 4.52e-05, "loss": 0.5169, "step": 3000 }, { "epoch": 0.56, "learning_rate": 4.44e-05, "loss": 0.5075, "step": 3500 }, { "epoch": 0.64, "learning_rate": 4.36e-05, "loss": 0.5072, "step": 4000 }, { "epoch": 0.72, "learning_rate": 4.2800000000000004e-05, "loss": 0.5038, "step": 4500 }, { "epoch": 0.8, "learning_rate": 4.2e-05, "loss": 0.4952, "step": 5000 }, { "epoch": 0.88, "learning_rate": 4.12e-05, "loss": 0.5005, "step": 5500 }, { "epoch": 0.96, "learning_rate": 4.0400000000000006e-05, "loss": 0.4927, "step": 6000 }, { "epoch": 1.0, "eval_accuracy": 0.5824, "eval_loss": 0.4850093126296997, "eval_runtime": 9.725, "eval_samples_per_second": 514.137, "eval_steps_per_second": 16.144, "step": 6250 }, { "epoch": 1.04, "learning_rate": 3.960000000000001e-05, "loss": 0.4896, "step": 6500 }, { "epoch": 1.12, "learning_rate": 3.88e-05, "loss": 0.4845, "step": 7000 }, { "epoch": 1.2, "learning_rate": 3.8e-05, "loss": 0.4827, "step": 7500 }, { "epoch": 1.28, "learning_rate": 3.72e-05, "loss": 0.4795, "step": 8000 }, { "epoch": 1.36, "learning_rate": 3.6400000000000004e-05, "loss": 0.4804, "step": 8500 }, { "epoch": 1.44, "learning_rate": 3.56e-05, "loss": 0.4822, "step": 9000 }, { "epoch": 1.52, "learning_rate": 3.48e-05, "loss": 0.4804, "step": 9500 }, { "epoch": 1.6, "learning_rate": 3.4000000000000007e-05, "loss": 0.4803, "step": 10000 }, { "epoch": 1.68, "learning_rate": 3.32e-05, "loss": 0.482, "step": 10500 }, { "epoch": 1.76, "learning_rate": 3.24e-05, "loss": 0.4791, "step": 11000 }, { "epoch": 1.84, "learning_rate": 3.16e-05, "loss": 0.4803, "step": 11500 }, { "epoch": 1.92, "learning_rate": 3.08e-05, "loss": 0.476, "step": 12000 }, { "epoch": 2.0, "learning_rate": 3e-05, "loss": 0.4756, "step": 12500 }, { "epoch": 2.0, "eval_accuracy": 0.5892, "eval_loss": 0.4798971712589264, "eval_runtime": 9.5745, "eval_samples_per_second": 522.222, "eval_steps_per_second": 16.398, "step": 12500 }, { "epoch": 2.08, "learning_rate": 2.9199999999999998e-05, "loss": 0.4657, "step": 13000 }, { "epoch": 2.16, "learning_rate": 2.84e-05, "loss": 0.469, "step": 13500 }, { "epoch": 2.24, "learning_rate": 2.7600000000000003e-05, "loss": 0.4719, "step": 14000 }, { "epoch": 2.32, "learning_rate": 2.6800000000000004e-05, "loss": 0.4698, "step": 14500 }, { "epoch": 2.4, "learning_rate": 2.6000000000000002e-05, "loss": 0.4718, "step": 15000 }, { "epoch": 2.48, "learning_rate": 2.5200000000000003e-05, "loss": 0.4693, "step": 15500 }, { "epoch": 2.56, "learning_rate": 2.44e-05, "loss": 0.4659, "step": 16000 }, { "epoch": 2.64, "learning_rate": 2.36e-05, "loss": 0.4665, "step": 16500 }, { "epoch": 2.72, "learning_rate": 2.2800000000000002e-05, "loss": 0.4654, "step": 17000 }, { "epoch": 2.8, "learning_rate": 2.2000000000000003e-05, "loss": 0.4634, "step": 17500 }, { "epoch": 2.88, "learning_rate": 2.12e-05, "loss": 0.466, "step": 18000 }, { "epoch": 2.96, "learning_rate": 2.04e-05, "loss": 0.4679, "step": 18500 }, { "epoch": 3.0, "eval_accuracy": 0.591, "eval_loss": 0.475551575422287, "eval_runtime": 9.5718, "eval_samples_per_second": 522.366, "eval_steps_per_second": 16.402, "step": 18750 }, { "epoch": 3.04, "learning_rate": 1.9600000000000002e-05, "loss": 0.4569, "step": 19000 }, { "epoch": 3.12, "learning_rate": 1.88e-05, "loss": 0.4539, "step": 19500 }, { "epoch": 3.2, "learning_rate": 1.8e-05, "loss": 0.4567, "step": 20000 }, { "epoch": 3.28, "learning_rate": 1.7199999999999998e-05, "loss": 0.4573, "step": 20500 }, { "epoch": 3.36, "learning_rate": 1.6400000000000002e-05, "loss": 0.4575, "step": 21000 }, { "epoch": 3.44, "learning_rate": 1.56e-05, "loss": 0.4651, "step": 21500 }, { "epoch": 3.52, "learning_rate": 1.48e-05, "loss": 0.4602, "step": 22000 }, { "epoch": 3.6, "learning_rate": 1.4000000000000001e-05, "loss": 0.4562, "step": 22500 }, { "epoch": 3.68, "learning_rate": 1.32e-05, "loss": 0.4605, "step": 23000 }, { "epoch": 3.76, "learning_rate": 1.24e-05, "loss": 0.4621, "step": 23500 }, { "epoch": 3.84, "learning_rate": 1.16e-05, "loss": 0.4614, "step": 24000 }, { "epoch": 3.92, "learning_rate": 1.08e-05, "loss": 0.4589, "step": 24500 }, { "epoch": 4.0, "learning_rate": 1e-05, "loss": 0.4568, "step": 25000 }, { "epoch": 4.0, "eval_accuracy": 0.594, "eval_loss": 0.4779850244522095, "eval_runtime": 9.6408, "eval_samples_per_second": 518.631, "eval_steps_per_second": 16.285, "step": 25000 }, { "epoch": 4.08, "learning_rate": 9.2e-06, "loss": 0.4567, "step": 25500 }, { "epoch": 4.16, "learning_rate": 8.400000000000001e-06, "loss": 0.4577, "step": 26000 }, { "epoch": 4.24, "learning_rate": 7.6e-06, "loss": 0.4592, "step": 26500 }, { "epoch": 4.32, "learning_rate": 6.800000000000001e-06, "loss": 0.4595, "step": 27000 }, { "epoch": 4.4, "learning_rate": 6e-06, "loss": 0.4534, "step": 27500 }, { "epoch": 4.48, "learning_rate": 5.2e-06, "loss": 0.4535, "step": 28000 }, { "epoch": 4.56, "learning_rate": 4.4e-06, "loss": 0.4534, "step": 28500 }, { "epoch": 4.64, "learning_rate": 3.6e-06, "loss": 0.4559, "step": 29000 }, { "epoch": 4.72, "learning_rate": 2.8000000000000003e-06, "loss": 0.4527, "step": 29500 }, { "epoch": 4.8, "learning_rate": 2.0000000000000003e-06, "loss": 0.4491, "step": 30000 }, { "epoch": 4.88, "learning_rate": 1.2000000000000002e-06, "loss": 0.4515, "step": 30500 }, { "epoch": 4.96, "learning_rate": 4.0000000000000003e-07, "loss": 0.4586, "step": 31000 }, { "epoch": 5.0, "eval_accuracy": 0.5942, "eval_loss": 0.4768580198287964, "eval_runtime": 9.7269, "eval_samples_per_second": 514.04, "eval_steps_per_second": 16.141, "step": 31250 }, { "epoch": 5.0, "step": 31250, "total_flos": 4.6472626176e+16, "train_loss": 0.4780897451171875, "train_runtime": 2648.3755, "train_samples_per_second": 377.59, "train_steps_per_second": 11.8 } ], "max_steps": 31250, "num_train_epochs": 5, "total_flos": 4.6472626176e+16, "trial_name": null, "trial_params": null }