{ "best_metric": 0.7999999999999999, "best_model_checkpoint": "/home/bel3/content/model_folder//finetune/sst2/checkpoint-2000", "epoch": 8.10126582278481, "global_step": 6400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "eval_accuracy": 0.7637795209884644, "eval_f1": 0.7894736842105263, "eval_loss": 0.5017877817153931, "eval_runtime": 0.8927, "eval_samples_per_second": 569.063, "eval_steps_per_second": 71.693, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.8070865869522095, "eval_f1": 0.8178438661710037, "eval_loss": 0.4368048310279846, "eval_runtime": 0.896, "eval_samples_per_second": 566.985, "eval_steps_per_second": 71.431, "step": 400 }, { "epoch": 0.63, "learning_rate": 4.683544303797468e-05, "loss": 0.4948, "step": 500 }, { "epoch": 0.76, "eval_accuracy": 0.789370059967041, "eval_f1": 0.7906066536203523, "eval_loss": 0.45398545265197754, "eval_runtime": 0.895, "eval_samples_per_second": 567.578, "eval_steps_per_second": 71.506, "step": 600 }, { "epoch": 1.01, "eval_accuracy": 0.8011810779571533, "eval_f1": 0.7908902691511387, "eval_loss": 0.44997021555900574, "eval_runtime": 0.9042, "eval_samples_per_second": 561.806, "eval_steps_per_second": 70.779, "step": 800 }, { "epoch": 1.27, "learning_rate": 4.367088607594937e-05, "loss": 0.3027, "step": 1000 }, { "epoch": 1.27, "eval_accuracy": 0.8129921555519104, "eval_f1": 0.8155339805825244, "eval_loss": 0.4525600075721741, "eval_runtime": 0.833, "eval_samples_per_second": 609.864, "eval_steps_per_second": 76.833, "step": 1000 }, { "epoch": 1.52, "eval_accuracy": 0.8208661675453186, "eval_f1": 0.8253358925143954, "eval_loss": 0.4560699462890625, "eval_runtime": 0.8459, "eval_samples_per_second": 600.52, "eval_steps_per_second": 75.656, "step": 1200 }, { "epoch": 1.77, "eval_accuracy": 0.8149606585502625, "eval_f1": 0.812, "eval_loss": 0.4809434413909912, "eval_runtime": 0.8476, "eval_samples_per_second": 599.306, "eval_steps_per_second": 75.503, "step": 1400 }, { "epoch": 1.9, "learning_rate": 4.050632911392405e-05, "loss": 0.236, "step": 1500 }, { "epoch": 2.03, "eval_accuracy": 0.8169291615486145, "eval_f1": 0.8228571428571428, "eval_loss": 0.6252030730247498, "eval_runtime": 0.9277, "eval_samples_per_second": 547.599, "eval_steps_per_second": 68.989, "step": 1600 }, { "epoch": 2.28, "eval_accuracy": 0.8149606585502625, "eval_f1": 0.8142292490118577, "eval_loss": 0.6088564395904541, "eval_runtime": 0.8559, "eval_samples_per_second": 593.542, "eval_steps_per_second": 74.777, "step": 1800 }, { "epoch": 2.53, "learning_rate": 3.7341772151898736e-05, "loss": 0.1743, "step": 2000 }, { "epoch": 2.53, "eval_accuracy": 0.8070865869522095, "eval_f1": 0.7999999999999999, "eval_loss": 0.593163251876831, "eval_runtime": 0.8297, "eval_samples_per_second": 612.294, "eval_steps_per_second": 77.139, "step": 2000 }, { "epoch": 2.78, "eval_accuracy": 0.8090550899505615, "eval_f1": 0.7991718426501037, "eval_loss": 0.5563398599624634, "eval_runtime": 0.8844, "eval_samples_per_second": 574.387, "eval_steps_per_second": 72.364, "step": 2200 }, { "epoch": 3.04, "eval_accuracy": 0.7972440719604492, "eval_f1": 0.8052930056710774, "eval_loss": 0.5898112058639526, "eval_runtime": 0.9275, "eval_samples_per_second": 547.695, "eval_steps_per_second": 69.001, "step": 2400 }, { "epoch": 3.16, "learning_rate": 3.4177215189873416e-05, "loss": 0.1599, "step": 2500 }, { "epoch": 3.29, "eval_accuracy": 0.8228346705436707, "eval_f1": 0.8228346456692914, "eval_loss": 0.538675844669342, "eval_runtime": 0.9213, "eval_samples_per_second": 551.421, "eval_steps_per_second": 69.47, "step": 2600 }, { "epoch": 3.54, "eval_accuracy": 0.8110235929489136, "eval_f1": 0.8087649402390438, "eval_loss": 0.5058029890060425, "eval_runtime": 0.9251, "eval_samples_per_second": 549.151, "eval_steps_per_second": 69.184, "step": 2800 }, { "epoch": 3.8, "learning_rate": 3.10126582278481e-05, "loss": 0.1218, "step": 3000 }, { "epoch": 3.8, "eval_accuracy": 0.8248031735420227, "eval_f1": 0.8216432865731464, "eval_loss": 0.6525737047195435, "eval_runtime": 0.8811, "eval_samples_per_second": 576.522, "eval_steps_per_second": 72.633, "step": 3000 }, { "epoch": 4.05, "eval_accuracy": 0.8110235929489136, "eval_f1": 0.8102766798418972, "eval_loss": 0.8057999610900879, "eval_runtime": 0.8465, "eval_samples_per_second": 600.11, "eval_steps_per_second": 75.604, "step": 3200 }, { "epoch": 4.3, "eval_accuracy": 0.8110235929489136, "eval_f1": 0.8032786885245902, "eval_loss": 0.7197728753089905, "eval_runtime": 0.8474, "eval_samples_per_second": 599.485, "eval_steps_per_second": 75.526, "step": 3400 }, { "epoch": 4.43, "learning_rate": 2.7848101265822786e-05, "loss": 0.0955, "step": 3500 }, { "epoch": 4.56, "eval_accuracy": 0.8031495809555054, "eval_f1": 0.7967479674796747, "eval_loss": 0.7408320903778076, "eval_runtime": 0.8491, "eval_samples_per_second": 598.276, "eval_steps_per_second": 75.373, "step": 3600 }, { "epoch": 4.81, "eval_accuracy": 0.8011810779571533, "eval_f1": 0.8112149532710279, "eval_loss": 0.7727562785148621, "eval_runtime": 0.8979, "eval_samples_per_second": 565.791, "eval_steps_per_second": 71.281, "step": 3800 }, { "epoch": 5.06, "learning_rate": 2.468354430379747e-05, "loss": 0.0858, "step": 4000 }, { "epoch": 5.06, "eval_accuracy": 0.8051180839538574, "eval_f1": 0.800804828973843, "eval_loss": 0.8653830885887146, "eval_runtime": 0.8947, "eval_samples_per_second": 567.8, "eval_steps_per_second": 71.534, "step": 4000 }, { "epoch": 5.32, "eval_accuracy": 0.789370059967041, "eval_f1": 0.8065099457504521, "eval_loss": 0.9238587021827698, "eval_runtime": 0.8912, "eval_samples_per_second": 570.005, "eval_steps_per_second": 71.812, "step": 4200 }, { "epoch": 5.57, "eval_accuracy": 0.7814960479736328, "eval_f1": 0.7940630797773653, "eval_loss": 0.8553095459938049, "eval_runtime": 0.8957, "eval_samples_per_second": 567.124, "eval_steps_per_second": 71.449, "step": 4400 }, { "epoch": 5.7, "learning_rate": 2.1518987341772153e-05, "loss": 0.0634, "step": 4500 }, { "epoch": 5.82, "eval_accuracy": 0.8110235929489136, "eval_f1": 0.817490494296578, "eval_loss": 0.6774270534515381, "eval_runtime": 0.8879, "eval_samples_per_second": 572.147, "eval_steps_per_second": 72.081, "step": 4600 }, { "epoch": 6.08, "eval_accuracy": 0.7992125749588013, "eval_f1": 0.8038461538461538, "eval_loss": 0.9549906849861145, "eval_runtime": 0.9051, "eval_samples_per_second": 561.292, "eval_steps_per_second": 70.714, "step": 4800 }, { "epoch": 6.33, "learning_rate": 1.8354430379746836e-05, "loss": 0.0548, "step": 5000 }, { "epoch": 6.33, "eval_accuracy": 0.8031495809555054, "eval_f1": 0.8091603053435114, "eval_loss": 1.1089578866958618, "eval_runtime": 0.8777, "eval_samples_per_second": 578.768, "eval_steps_per_second": 72.916, "step": 5000 }, { "epoch": 6.58, "eval_accuracy": 0.8051180839538574, "eval_f1": 0.809248554913295, "eval_loss": 1.028864860534668, "eval_runtime": 0.8922, "eval_samples_per_second": 569.357, "eval_steps_per_second": 71.73, "step": 5200 }, { "epoch": 6.84, "eval_accuracy": 0.7952755689620972, "eval_f1": 0.8066914498141264, "eval_loss": 0.9950660467147827, "eval_runtime": 0.894, "eval_samples_per_second": 568.236, "eval_steps_per_second": 71.589, "step": 5400 }, { "epoch": 6.96, "learning_rate": 1.5189873417721521e-05, "loss": 0.0473, "step": 5500 }, { "epoch": 7.09, "eval_accuracy": 0.8110235929489136, "eval_f1": 0.8117647058823529, "eval_loss": 1.1159313917160034, "eval_runtime": 0.8468, "eval_samples_per_second": 599.932, "eval_steps_per_second": 75.582, "step": 5600 }, { "epoch": 7.34, "eval_accuracy": 0.8090550899505615, "eval_f1": 0.8131021194605009, "eval_loss": 1.0203063488006592, "eval_runtime": 0.9245, "eval_samples_per_second": 549.472, "eval_steps_per_second": 69.225, "step": 5800 }, { "epoch": 7.59, "learning_rate": 1.2025316455696203e-05, "loss": 0.036, "step": 6000 }, { "epoch": 7.59, "eval_accuracy": 0.8149606585502625, "eval_f1": 0.815686274509804, "eval_loss": 1.0656845569610596, "eval_runtime": 0.9107, "eval_samples_per_second": 557.814, "eval_steps_per_second": 70.276, "step": 6000 }, { "epoch": 7.85, "eval_accuracy": 0.8129921555519104, "eval_f1": 0.8140900195694716, "eval_loss": 1.0237640142440796, "eval_runtime": 0.85, "eval_samples_per_second": 597.63, "eval_steps_per_second": 75.292, "step": 6200 }, { "epoch": 8.1, "eval_accuracy": 0.8208661675453186, "eval_f1": 0.8205128205128205, "eval_loss": 1.1597448587417603, "eval_runtime": 0.8983, "eval_samples_per_second": 565.501, "eval_steps_per_second": 71.244, "step": 6400 }, { "epoch": 8.1, "step": 6400, "total_flos": 1.3556183708860416e+16, "train_loss": 0.14836265951395033, "train_runtime": 2029.2795, "train_samples_per_second": 248.995, "train_steps_per_second": 3.893 } ], "max_steps": 7900, "num_train_epochs": 10, "total_flos": 1.3556183708860416e+16, "trial_name": null, "trial_params": null }