{ "best_metric": 0.948394495412844, "best_model_checkpoint": "output/fine_tuned/t5-base/SST2/checkpoint-8420", "epoch": 10.0, "eval_steps": 500, "global_step": 21050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2375296912114014, "grad_norm": 3.5314207077026367, "learning_rate": 4.8812351543942996e-05, "loss": 0.2839, "step": 500 }, { "epoch": 0.4750593824228028, "grad_norm": 3.3338215351104736, "learning_rate": 4.7624703087885984e-05, "loss": 0.1706, "step": 1000 }, { "epoch": 0.7125890736342043, "grad_norm": 2.111057996749878, "learning_rate": 4.643705463182898e-05, "loss": 0.1579, "step": 1500 }, { "epoch": 0.9501187648456056, "grad_norm": 5.4748430252075195, "learning_rate": 4.524940617577197e-05, "loss": 0.1443, "step": 2000 }, { "epoch": 1.0, "eval_accuracy": 0.9323394495412844, "eval_loss": 0.20724497735500336, "eval_runtime": 2.2627, "eval_samples_per_second": 385.376, "eval_steps_per_second": 48.172, "step": 2105 }, { "epoch": 1.187648456057007, "grad_norm": 4.547887325286865, "learning_rate": 4.406175771971497e-05, "loss": 0.1207, "step": 2500 }, { "epoch": 1.4251781472684084, "grad_norm": 3.1552112102508545, "learning_rate": 4.287410926365796e-05, "loss": 0.113, "step": 3000 }, { "epoch": 1.66270783847981, "grad_norm": 0.2997496724128723, "learning_rate": 4.168646080760095e-05, "loss": 0.1112, "step": 3500 }, { "epoch": 1.9002375296912115, "grad_norm": 1.9386916160583496, "learning_rate": 4.049881235154395e-05, "loss": 0.1152, "step": 4000 }, { "epoch": 2.0, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.21269434690475464, "eval_runtime": 1.9647, "eval_samples_per_second": 443.834, "eval_steps_per_second": 55.479, "step": 4210 }, { "epoch": 2.137767220902613, "grad_norm": 0.23684050142765045, "learning_rate": 3.9311163895486934e-05, "loss": 0.0952, "step": 4500 }, { "epoch": 2.375296912114014, "grad_norm": 1.943021297454834, "learning_rate": 3.812351543942993e-05, "loss": 0.0881, "step": 5000 }, { "epoch": 2.6128266033254155, "grad_norm": 4.209224700927734, "learning_rate": 3.693586698337292e-05, "loss": 0.0913, "step": 5500 }, { "epoch": 2.850356294536817, "grad_norm": 4.389578819274902, "learning_rate": 3.5748218527315916e-05, "loss": 0.0849, "step": 6000 }, { "epoch": 3.0, "eval_accuracy": 0.9438073394495413, "eval_loss": 0.2156314253807068, "eval_runtime": 1.9686, "eval_samples_per_second": 442.964, "eval_steps_per_second": 55.371, "step": 6315 }, { "epoch": 3.0878859857482186, "grad_norm": 0.4832597076892853, "learning_rate": 3.456057007125891e-05, "loss": 0.075, "step": 6500 }, { "epoch": 3.32541567695962, "grad_norm": 6.154821395874023, "learning_rate": 3.3372921615201904e-05, "loss": 0.0719, "step": 7000 }, { "epoch": 3.5629453681710213, "grad_norm": 2.8581926822662354, "learning_rate": 3.21852731591449e-05, "loss": 0.0727, "step": 7500 }, { "epoch": 3.800475059382423, "grad_norm": 4.29818868637085, "learning_rate": 3.0997624703087885e-05, "loss": 0.0709, "step": 8000 }, { "epoch": 4.0, "eval_accuracy": 0.948394495412844, "eval_loss": 0.222471222281456, "eval_runtime": 1.9642, "eval_samples_per_second": 443.937, "eval_steps_per_second": 55.492, "step": 8420 }, { "epoch": 4.038004750593824, "grad_norm": 1.9306527376174927, "learning_rate": 2.980997624703088e-05, "loss": 0.0724, "step": 8500 }, { "epoch": 4.275534441805226, "grad_norm": 2.4774186611175537, "learning_rate": 2.8622327790973873e-05, "loss": 0.0541, "step": 9000 }, { "epoch": 4.513064133016627, "grad_norm": 2.690717935562134, "learning_rate": 2.7434679334916867e-05, "loss": 0.0568, "step": 9500 }, { "epoch": 4.750593824228028, "grad_norm": 1.1091784238815308, "learning_rate": 2.6247030878859858e-05, "loss": 0.0602, "step": 10000 }, { "epoch": 4.98812351543943, "grad_norm": 1.5014886856079102, "learning_rate": 2.5059382422802852e-05, "loss": 0.06, "step": 10500 }, { "epoch": 5.0, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.2718953788280487, "eval_runtime": 1.9734, "eval_samples_per_second": 441.874, "eval_steps_per_second": 55.234, "step": 10525 }, { "epoch": 5.225653206650831, "grad_norm": 1.5026357173919678, "learning_rate": 2.3871733966745842e-05, "loss": 0.0428, "step": 11000 }, { "epoch": 5.463182897862232, "grad_norm": 6.937454700469971, "learning_rate": 2.2684085510688836e-05, "loss": 0.0428, "step": 11500 }, { "epoch": 5.7007125890736345, "grad_norm": 2.493163824081421, "learning_rate": 2.149643705463183e-05, "loss": 0.0502, "step": 12000 }, { "epoch": 5.938242280285036, "grad_norm": 0.8779876828193665, "learning_rate": 2.0308788598574824e-05, "loss": 0.0507, "step": 12500 }, { "epoch": 6.0, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.29110807180404663, "eval_runtime": 1.965, "eval_samples_per_second": 443.769, "eval_steps_per_second": 55.471, "step": 12630 }, { "epoch": 6.175771971496437, "grad_norm": 1.818724513053894, "learning_rate": 1.9121140142517815e-05, "loss": 0.0409, "step": 13000 }, { "epoch": 6.4133016627078385, "grad_norm": 0.3184266984462738, "learning_rate": 1.793349168646081e-05, "loss": 0.0378, "step": 13500 }, { "epoch": 6.65083135391924, "grad_norm": 0.03643421456217766, "learning_rate": 1.6745843230403803e-05, "loss": 0.0397, "step": 14000 }, { "epoch": 6.888361045130641, "grad_norm": 0.04083951190114021, "learning_rate": 1.5558194774346793e-05, "loss": 0.0435, "step": 14500 }, { "epoch": 7.0, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.3279486894607544, "eval_runtime": 1.9653, "eval_samples_per_second": 443.688, "eval_steps_per_second": 55.461, "step": 14735 }, { "epoch": 7.1258907363420425, "grad_norm": 0.06795825809240341, "learning_rate": 1.4370546318289787e-05, "loss": 0.0371, "step": 15000 }, { "epoch": 7.363420427553444, "grad_norm": 3.7526097297668457, "learning_rate": 1.318289786223278e-05, "loss": 0.0299, "step": 15500 }, { "epoch": 7.600950118764846, "grad_norm": 2.537559747695923, "learning_rate": 1.1995249406175772e-05, "loss": 0.0302, "step": 16000 }, { "epoch": 7.838479809976247, "grad_norm": 3.0933053493499756, "learning_rate": 1.0807600950118766e-05, "loss": 0.0357, "step": 16500 }, { "epoch": 8.0, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.3565831780433655, "eval_runtime": 1.9696, "eval_samples_per_second": 442.724, "eval_steps_per_second": 55.341, "step": 16840 }, { "epoch": 8.076009501187649, "grad_norm": 2.52713680267334, "learning_rate": 9.619952494061758e-06, "loss": 0.037, "step": 17000 }, { "epoch": 8.31353919239905, "grad_norm": 0.04675888270139694, "learning_rate": 8.432304038004752e-06, "loss": 0.0276, "step": 17500 }, { "epoch": 8.551068883610451, "grad_norm": 5.1887593269348145, "learning_rate": 7.244655581947744e-06, "loss": 0.0294, "step": 18000 }, { "epoch": 8.788598574821853, "grad_norm": 0.14877742528915405, "learning_rate": 6.0570071258907366e-06, "loss": 0.0274, "step": 18500 }, { "epoch": 9.0, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.38757723569869995, "eval_runtime": 1.9636, "eval_samples_per_second": 444.076, "eval_steps_per_second": 55.509, "step": 18945 }, { "epoch": 9.026128266033254, "grad_norm": 0.024900976568460464, "learning_rate": 4.86935866983373e-06, "loss": 0.0282, "step": 19000 }, { "epoch": 9.263657957244655, "grad_norm": 0.1904192417860031, "learning_rate": 3.681710213776722e-06, "loss": 0.0213, "step": 19500 }, { "epoch": 9.501187648456057, "grad_norm": 5.56980562210083, "learning_rate": 2.494061757719715e-06, "loss": 0.0218, "step": 20000 }, { "epoch": 9.738717339667458, "grad_norm": 3.3612709045410156, "learning_rate": 1.306413301662708e-06, "loss": 0.0272, "step": 20500 }, { "epoch": 9.97624703087886, "grad_norm": 4.481935024261475, "learning_rate": 1.1876484560570073e-07, "loss": 0.0253, "step": 21000 }, { "epoch": 10.0, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.4033842086791992, "eval_runtime": 1.9746, "eval_samples_per_second": 441.601, "eval_steps_per_second": 55.2, "step": 21050 }, { "epoch": 10.0, "step": 21050, "total_flos": 1.028380359389952e+17, "train_loss": 0.06886951911477748, "train_runtime": 3499.6769, "train_samples_per_second": 192.443, "train_steps_per_second": 6.015 } ], "logging_steps": 500, "max_steps": 21050, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.028380359389952e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }