{ "best_metric": 0.3048795163631439, "best_model_checkpoint": "./models/bert-italian-xxl-cased-ItaCoLA/checkpoint-700", "epoch": 10.0, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.41, "learning_rate": 1e-05, "loss": 0.4243, "step": 100 }, { "epoch": 0.41, "eval_accuracy": 0.854122621564482, "eval_loss": 0.38053709268569946, "eval_runtime": 3.6758, "eval_samples_per_second": 257.362, "eval_steps_per_second": 16.323, "step": 100 }, { "epoch": 0.82, "learning_rate": 1e-05, "loss": 0.3806, "step": 200 }, { "epoch": 0.82, "eval_accuracy": 0.857293868921776, "eval_loss": 0.3861564099788666, "eval_runtime": 3.5095, "eval_samples_per_second": 269.553, "eval_steps_per_second": 17.096, "step": 200 }, { "epoch": 1.23, "learning_rate": 1e-05, "loss": 0.3289, "step": 300 }, { "epoch": 1.23, "eval_accuracy": 0.8678646934460887, "eval_loss": 0.35367050766944885, "eval_runtime": 3.7169, "eval_samples_per_second": 254.517, "eval_steps_per_second": 16.143, "step": 300 }, { "epoch": 1.64, "learning_rate": 1e-05, "loss": 0.2967, "step": 400 }, { "epoch": 1.64, "eval_accuracy": 0.8826638477801269, "eval_loss": 0.3165006935596466, "eval_runtime": 3.5485, "eval_samples_per_second": 266.59, "eval_steps_per_second": 16.908, "step": 400 }, { "epoch": 2.05, "learning_rate": 1e-05, "loss": 0.2992, "step": 500 }, { "epoch": 2.05, "eval_accuracy": 0.8784355179704016, "eval_loss": 0.33816611766815186, "eval_runtime": 3.7064, "eval_samples_per_second": 255.231, "eval_steps_per_second": 16.188, "step": 500 }, { "epoch": 2.46, "learning_rate": 1e-05, "loss": 0.2273, "step": 600 }, { "epoch": 2.46, "eval_accuracy": 0.8816067653276956, "eval_loss": 0.329441100358963, "eval_runtime": 3.7539, "eval_samples_per_second": 252.003, "eval_steps_per_second": 15.983, "step": 600 }, { "epoch": 2.87, "learning_rate": 1e-05, "loss": 0.2215, "step": 700 }, { "epoch": 2.87, "eval_accuracy": 0.8911205073995772, "eval_loss": 0.3048795163631439, "eval_runtime": 3.5485, "eval_samples_per_second": 266.591, "eval_steps_per_second": 16.909, "step": 700 }, { "epoch": 3.28, "learning_rate": 1e-05, "loss": 0.1718, "step": 800 }, { "epoch": 3.28, "eval_accuracy": 0.8911205073995772, "eval_loss": 0.35308167338371277, "eval_runtime": 3.56, "eval_samples_per_second": 265.727, "eval_steps_per_second": 16.854, "step": 800 }, { "epoch": 3.69, "learning_rate": 1e-05, "loss": 0.1757, "step": 900 }, { "epoch": 3.69, "eval_accuracy": 0.8921775898520085, "eval_loss": 0.39034464955329895, "eval_runtime": 3.7329, "eval_samples_per_second": 253.42, "eval_steps_per_second": 16.073, "step": 900 }, { "epoch": 4.1, "learning_rate": 1e-05, "loss": 0.1698, "step": 1000 }, { "epoch": 4.1, "eval_accuracy": 0.8953488372093024, "eval_loss": 0.3871164321899414, "eval_runtime": 3.5471, "eval_samples_per_second": 266.695, "eval_steps_per_second": 16.915, "step": 1000 }, { "epoch": 4.51, "learning_rate": 1e-05, "loss": 0.1307, "step": 1100 }, { "epoch": 4.51, "eval_accuracy": 0.8953488372093024, "eval_loss": 0.42552220821380615, "eval_runtime": 3.5867, "eval_samples_per_second": 263.752, "eval_steps_per_second": 16.728, "step": 1100 }, { "epoch": 4.92, "learning_rate": 1e-05, "loss": 0.1426, "step": 1200 }, { "epoch": 4.92, "eval_accuracy": 0.8985200845665962, "eval_loss": 0.37290310859680176, "eval_runtime": 3.5578, "eval_samples_per_second": 265.898, "eval_steps_per_second": 16.865, "step": 1200 }, { "epoch": 5.33, "learning_rate": 1e-05, "loss": 0.1136, "step": 1300 }, { "epoch": 5.33, "eval_accuracy": 0.8964059196617337, "eval_loss": 0.49388667941093445, "eval_runtime": 3.5671, "eval_samples_per_second": 265.203, "eval_steps_per_second": 16.82, "step": 1300 }, { "epoch": 5.74, "learning_rate": 1e-05, "loss": 0.1163, "step": 1400 }, { "epoch": 5.74, "eval_accuracy": 0.8964059196617337, "eval_loss": 0.4004368782043457, "eval_runtime": 3.7365, "eval_samples_per_second": 253.18, "eval_steps_per_second": 16.058, "step": 1400 }, { "epoch": 6.15, "learning_rate": 1e-05, "loss": 0.0936, "step": 1500 }, { "epoch": 6.15, "eval_accuracy": 0.8964059196617337, "eval_loss": 0.5116058588027954, "eval_runtime": 3.5814, "eval_samples_per_second": 264.146, "eval_steps_per_second": 16.753, "step": 1500 }, { "epoch": 6.56, "learning_rate": 1e-05, "loss": 0.0973, "step": 1600 }, { "epoch": 6.56, "eval_accuracy": 0.8921775898520085, "eval_loss": 0.4807981848716736, "eval_runtime": 3.7354, "eval_samples_per_second": 253.253, "eval_steps_per_second": 16.063, "step": 1600 }, { "epoch": 6.97, "learning_rate": 1e-05, "loss": 0.0899, "step": 1700 }, { "epoch": 6.97, "eval_accuracy": 0.8868921775898521, "eval_loss": 0.48126643896102905, "eval_runtime": 3.5551, "eval_samples_per_second": 266.094, "eval_steps_per_second": 16.877, "step": 1700 }, { "epoch": 7.38, "learning_rate": 1e-05, "loss": 0.0687, "step": 1800 }, { "epoch": 7.38, "eval_accuracy": 0.8847780126849895, "eval_loss": 0.6045676469802856, "eval_runtime": 3.7224, "eval_samples_per_second": 254.138, "eval_steps_per_second": 16.119, "step": 1800 }, { "epoch": 7.79, "learning_rate": 1e-05, "loss": 0.0709, "step": 1900 }, { "epoch": 7.79, "eval_accuracy": 0.8964059196617337, "eval_loss": 0.5939581394195557, "eval_runtime": 3.7591, "eval_samples_per_second": 251.655, "eval_steps_per_second": 15.961, "step": 1900 }, { "epoch": 8.2, "learning_rate": 1e-05, "loss": 0.0694, "step": 2000 }, { "epoch": 8.2, "eval_accuracy": 0.8911205073995772, "eval_loss": 0.5791226029396057, "eval_runtime": 3.5526, "eval_samples_per_second": 266.284, "eval_steps_per_second": 16.889, "step": 2000 }, { "epoch": 8.61, "learning_rate": 1e-05, "loss": 0.0732, "step": 2100 }, { "epoch": 8.61, "eval_accuracy": 0.8921775898520085, "eval_loss": 0.5576857924461365, "eval_runtime": 3.5693, "eval_samples_per_second": 265.038, "eval_steps_per_second": 16.81, "step": 2100 }, { "epoch": 9.02, "learning_rate": 1e-05, "loss": 0.0714, "step": 2200 }, { "epoch": 9.02, "eval_accuracy": 0.8995771670190275, "eval_loss": 0.5248907804489136, "eval_runtime": 3.7426, "eval_samples_per_second": 252.768, "eval_steps_per_second": 16.032, "step": 2200 }, { "epoch": 9.43, "learning_rate": 1e-05, "loss": 0.0531, "step": 2300 }, { "epoch": 9.43, "eval_accuracy": 0.8932346723044398, "eval_loss": 0.6098359227180481, "eval_runtime": 3.5458, "eval_samples_per_second": 266.794, "eval_steps_per_second": 16.921, "step": 2300 }, { "epoch": 9.84, "learning_rate": 1e-05, "loss": 0.0713, "step": 2400 }, { "epoch": 9.84, "eval_accuracy": 0.8942917547568711, "eval_loss": 0.5609742403030396, "eval_runtime": 3.5561, "eval_samples_per_second": 266.022, "eval_steps_per_second": 16.872, "step": 2400 }, { "epoch": 10.0, "step": 2440, "total_flos": 5131323357158400.0, "train_loss": 0.16311937355604328, "train_runtime": 870.9833, "train_samples_per_second": 89.565, "train_steps_per_second": 2.801 } ], "max_steps": 2440, "num_train_epochs": 10, "total_flos": 5131323357158400.0, "trial_name": null, "trial_params": null }