{ "best_metric": 0.7655586004257202, "best_model_checkpoint": "/content/drive/MyDrive/NLP/HW_2/base_aug/checkpoint-1000", "epoch": 1.370567072126092, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08566044200788075, "grad_norm": 8.965557098388672, "learning_rate": 4.914339557992119e-05, "loss": 0.7647, "step": 500 }, { "epoch": 0.08566044200788075, "eval_accuracy": 0.6901223776223776, "eval_f1": 0.6778724247853133, "eval_loss": 0.839220404624939, "eval_precision": 0.6876367115034039, "eval_recall": 0.6901223776223776, "eval_runtime": 41.9252, "eval_samples_per_second": 54.573, "eval_steps_per_second": 3.411, "step": 500 }, { "epoch": 0.1713208840157615, "grad_norm": 10.940939903259277, "learning_rate": 4.828679115984239e-05, "loss": 0.5727, "step": 1000 }, { "epoch": 0.1713208840157615, "eval_accuracy": 0.6966783216783217, "eval_f1": 0.6882537930744014, "eval_loss": 0.7655586004257202, "eval_precision": 0.689349557327687, "eval_recall": 0.6966783216783217, "eval_runtime": 41.8626, "eval_samples_per_second": 54.655, "eval_steps_per_second": 3.416, "step": 1000 }, { "epoch": 0.25698132602364226, "grad_norm": 5.878599643707275, "learning_rate": 4.743018673976358e-05, "loss": 0.533, "step": 1500 }, { "epoch": 0.25698132602364226, "eval_accuracy": 0.7298951048951049, "eval_f1": 0.7266834530856427, "eval_loss": 0.8506777286529541, "eval_precision": 0.737280751130183, "eval_recall": 0.7298951048951049, "eval_runtime": 41.9336, "eval_samples_per_second": 54.562, "eval_steps_per_second": 3.41, "step": 1500 }, { "epoch": 0.342641768031523, "grad_norm": 10.529399871826172, "learning_rate": 4.657358231968477e-05, "loss": 0.5004, "step": 2000 }, { "epoch": 0.342641768031523, "eval_accuracy": 0.7041083916083916, "eval_f1": 0.6939494318603149, "eval_loss": 0.8914118409156799, "eval_precision": 0.7081400747225877, "eval_recall": 0.7041083916083916, "eval_runtime": 41.9558, "eval_samples_per_second": 54.534, "eval_steps_per_second": 3.408, "step": 2000 }, { "epoch": 0.42830221003940383, "grad_norm": 10.260519027709961, "learning_rate": 4.5716977899605965e-05, "loss": 0.5018, "step": 2500 }, { "epoch": 0.42830221003940383, "eval_accuracy": 0.701048951048951, "eval_f1": 0.6927157276690228, "eval_loss": 0.8679888844490051, "eval_precision": 0.713404161778303, "eval_recall": 0.701048951048951, "eval_runtime": 41.8274, "eval_samples_per_second": 54.701, "eval_steps_per_second": 3.419, "step": 2500 }, { "epoch": 0.5139626520472845, "grad_norm": 17.33283042907715, "learning_rate": 4.486037347952716e-05, "loss": 0.4746, "step": 3000 }, { "epoch": 0.5139626520472845, "eval_accuracy": 0.6993006993006993, "eval_f1": 0.6865826342008821, "eval_loss": 0.8556540012359619, "eval_precision": 0.7053330885786464, "eval_recall": 0.6993006993006993, "eval_runtime": 41.8079, "eval_samples_per_second": 54.727, "eval_steps_per_second": 3.42, "step": 3000 }, { "epoch": 0.5996230940551653, "grad_norm": 8.706604957580566, "learning_rate": 4.4003769059448346e-05, "loss": 0.4636, "step": 3500 }, { "epoch": 0.5996230940551653, "eval_accuracy": 0.6971153846153846, "eval_f1": 0.6901648669957038, "eval_loss": 0.87416011095047, "eval_precision": 0.7135793303951533, "eval_recall": 0.6971153846153846, "eval_runtime": 41.8748, "eval_samples_per_second": 54.639, "eval_steps_per_second": 3.415, "step": 3500 }, { "epoch": 0.685283536063046, "grad_norm": 4.292712211608887, "learning_rate": 4.314716463936954e-05, "loss": 0.4564, "step": 4000 }, { "epoch": 0.685283536063046, "eval_accuracy": 0.6809440559440559, "eval_f1": 0.6718834983032392, "eval_loss": 1.0782394409179688, "eval_precision": 0.7048329402402282, "eval_recall": 0.6809440559440559, "eval_runtime": 41.8805, "eval_samples_per_second": 54.632, "eval_steps_per_second": 3.414, "step": 4000 }, { "epoch": 0.7709439780709268, "grad_norm": 13.842761993408203, "learning_rate": 4.229056021929073e-05, "loss": 0.4353, "step": 4500 }, { "epoch": 0.7709439780709268, "eval_accuracy": 0.7246503496503497, "eval_f1": 0.7183705973439115, "eval_loss": 0.8436803221702576, "eval_precision": 0.7235752003596029, "eval_recall": 0.7246503496503497, "eval_runtime": 41.9805, "eval_samples_per_second": 54.501, "eval_steps_per_second": 3.406, "step": 4500 }, { "epoch": 0.8566044200788077, "grad_norm": 42.55852508544922, "learning_rate": 4.143395579921193e-05, "loss": 0.43, "step": 5000 }, { "epoch": 0.8566044200788077, "eval_accuracy": 0.7229020979020979, "eval_f1": 0.7169791417746533, "eval_loss": 0.8196969032287598, "eval_precision": 0.7324462130659242, "eval_recall": 0.7229020979020979, "eval_runtime": 45.2477, "eval_samples_per_second": 50.566, "eval_steps_per_second": 3.16, "step": 5000 }, { "epoch": 0.9422648620866884, "grad_norm": 12.881892204284668, "learning_rate": 4.0577351379133114e-05, "loss": 0.4247, "step": 5500 }, { "epoch": 0.9422648620866884, "eval_accuracy": 0.7259615384615384, "eval_f1": 0.7242298712808082, "eval_loss": 0.8767142295837402, "eval_precision": 0.7354716427849548, "eval_recall": 0.7259615384615384, "eval_runtime": 45.4377, "eval_samples_per_second": 50.355, "eval_steps_per_second": 3.147, "step": 5500 }, { "epoch": 1.027925304094569, "grad_norm": 27.236953735351562, "learning_rate": 3.972074695905431e-05, "loss": 0.4195, "step": 6000 }, { "epoch": 1.027925304094569, "eval_accuracy": 0.7281468531468531, "eval_f1": 0.7253106272750476, "eval_loss": 0.9493051767349243, "eval_precision": 0.7276164450384541, "eval_recall": 0.7281468531468531, "eval_runtime": 45.351, "eval_samples_per_second": 50.451, "eval_steps_per_second": 3.153, "step": 6000 }, { "epoch": 1.1135857461024499, "grad_norm": 2.864295721054077, "learning_rate": 3.88641425389755e-05, "loss": 0.37, "step": 6500 }, { "epoch": 1.1135857461024499, "eval_accuracy": 0.736451048951049, "eval_f1": 0.7316629886143114, "eval_loss": 0.8716211318969727, "eval_precision": 0.7387658931954473, "eval_recall": 0.736451048951049, "eval_runtime": 45.3614, "eval_samples_per_second": 50.439, "eval_steps_per_second": 3.152, "step": 6500 }, { "epoch": 1.1992461881103307, "grad_norm": 8.993486404418945, "learning_rate": 3.8007538118896696e-05, "loss": 0.3578, "step": 7000 }, { "epoch": 1.1992461881103307, "eval_accuracy": 0.7316433566433567, "eval_f1": 0.7276472686005753, "eval_loss": 0.7958017587661743, "eval_precision": 0.7286870389527419, "eval_recall": 0.7316433566433567, "eval_runtime": 45.312, "eval_samples_per_second": 50.494, "eval_steps_per_second": 3.156, "step": 7000 }, { "epoch": 1.2849066301182115, "grad_norm": 4.636927127838135, "learning_rate": 3.715093369881788e-05, "loss": 0.3509, "step": 7500 }, { "epoch": 1.2849066301182115, "eval_accuracy": 0.7237762237762237, "eval_f1": 0.7167197423657734, "eval_loss": 0.9068776369094849, "eval_precision": 0.7290363119263972, "eval_recall": 0.7237762237762237, "eval_runtime": 45.2476, "eval_samples_per_second": 50.566, "eval_steps_per_second": 3.16, "step": 7500 }, { "epoch": 1.370567072126092, "grad_norm": 21.381996154785156, "learning_rate": 3.629432927873908e-05, "loss": 0.3344, "step": 8000 }, { "epoch": 1.370567072126092, "eval_accuracy": 0.7290209790209791, "eval_f1": 0.7221122020450655, "eval_loss": 0.8759622573852539, "eval_precision": 0.7303211013193632, "eval_recall": 0.7290209790209791, "eval_runtime": 45.3868, "eval_samples_per_second": 50.411, "eval_steps_per_second": 3.151, "step": 8000 } ], "logging_steps": 500, "max_steps": 29185, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 20, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.336535494602755e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }