{ "best_metric": 53.324968632371395, "best_model_checkpoint": "/scratch/camembertv2/runs/results/fquad/camembertv2-base-bf16-p2-17000/max_seq_length-896-doc_stride-128-max_answer_length-30-gradient_accumulation_steps-4-precision-fp32-learning_rate-5e-06-epochs-6-lr_scheduler-cosine-warmup_steps-0/SEED-25/checkpoint-3888", "epoch": 6.0, "eval_steps": 500, "global_step": 3888, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15432098765432098, "grad_norm": 4.547507286071777, "learning_rate": 4.99184317884152e-06, "loss": 5.1604, "step": 100 }, { "epoch": 0.30864197530864196, "grad_norm": 12.684767723083496, "learning_rate": 4.967425942351207e-06, "loss": 4.0839, "step": 200 }, { "epoch": 0.46296296296296297, "grad_norm": 14.742673873901367, "learning_rate": 4.926907624154051e-06, "loss": 3.3159, "step": 300 }, { "epoch": 0.6172839506172839, "grad_norm": 12.705907821655273, "learning_rate": 4.870552624790192e-06, "loss": 2.9494, "step": 400 }, { "epoch": 0.7716049382716049, "grad_norm": 14.801329612731934, "learning_rate": 4.798728686380588e-06, "loss": 2.7635, "step": 500 }, { "epoch": 0.9259259259259259, "grad_norm": 17.233285903930664, "learning_rate": 4.711904492941644e-06, "loss": 2.6393, "step": 600 }, { "epoch": 1.0, "eval_exact_match": 38.86449184441656, "eval_f1": 60.0036086905889, "eval_runtime": 6.9307, "eval_samples_per_second": 459.985, "eval_steps_per_second": 7.214, "step": 648 }, { "epoch": 1.0802469135802468, "grad_norm": 14.701664924621582, "learning_rate": 4.610646612007849e-06, "loss": 2.4089, "step": 700 }, { "epoch": 1.2345679012345678, "grad_norm": 17.278104782104492, "learning_rate": 4.495615797519732e-06, "loss": 2.3405, "step": 800 }, { "epoch": 1.3888888888888888, "grad_norm": 11.50146770477295, "learning_rate": 4.367562678102491e-06, "loss": 2.2084, "step": 900 }, { "epoch": 1.5432098765432098, "grad_norm": 13.203764915466309, "learning_rate": 4.22732285887122e-06, "loss": 2.1694, "step": 1000 }, { "epoch": 1.6975308641975309, "grad_norm": 21.71219825744629, "learning_rate": 4.075811468725734e-06, "loss": 2.0862, "step": 1100 }, { "epoch": 1.8518518518518519, "grad_norm": 12.909610748291016, "learning_rate": 3.914017188716347e-06, "loss": 2.0016, "step": 1200 }, { "epoch": 2.0, "eval_exact_match": 48.745294855708906, "eval_f1": 70.05708349304844, "eval_runtime": 6.5382, "eval_samples_per_second": 487.593, "eval_steps_per_second": 7.647, "step": 1296 }, { "epoch": 2.006172839506173, "grad_norm": 16.666929244995117, "learning_rate": 3.7429958004482575e-06, "loss": 1.9412, "step": 1300 }, { "epoch": 2.1604938271604937, "grad_norm": 10.462796211242676, "learning_rate": 3.5638632966241686e-06, "loss": 1.8009, "step": 1400 }, { "epoch": 2.314814814814815, "grad_norm": 13.769060134887695, "learning_rate": 3.3777885986819725e-06, "loss": 1.7928, "step": 1500 }, { "epoch": 2.4691358024691357, "grad_norm": 15.287083625793457, "learning_rate": 3.1859859290482544e-06, "loss": 1.7865, "step": 1600 }, { "epoch": 2.623456790123457, "grad_norm": 11.451101303100586, "learning_rate": 2.989706887782151e-06, "loss": 1.7489, "step": 1700 }, { "epoch": 2.7777777777777777, "grad_norm": 17.512975692749023, "learning_rate": 2.7902322853130758e-06, "loss": 1.6978, "step": 1800 }, { "epoch": 2.932098765432099, "grad_norm": 17.15248680114746, "learning_rate": 2.5888637845674276e-06, "loss": 1.6566, "step": 1900 }, { "epoch": 3.0, "eval_exact_match": 50.47051442910916, "eval_f1": 72.25048266378954, "eval_runtime": 6.5447, "eval_samples_per_second": 487.112, "eval_steps_per_second": 7.64, "step": 1944 }, { "epoch": 3.0864197530864197, "grad_norm": 11.384383201599121, "learning_rate": 2.3869154070232346e-06, "loss": 1.6309, "step": 2000 }, { "epoch": 3.240740740740741, "grad_norm": 14.492201805114746, "learning_rate": 2.185704958119594e-06, "loss": 1.5353, "step": 2100 }, { "epoch": 3.3950617283950617, "grad_norm": 15.613585472106934, "learning_rate": 1.9865454279740452e-06, "loss": 1.5249, "step": 2200 }, { "epoch": 3.549382716049383, "grad_norm": 12.233988761901855, "learning_rate": 1.7907364235221128e-06, "loss": 1.5499, "step": 2300 }, { "epoch": 3.7037037037037037, "grad_norm": 11.811338424682617, "learning_rate": 1.5995556879882246e-06, "loss": 1.5159, "step": 2400 }, { "epoch": 3.8580246913580245, "grad_norm": 18.379695892333984, "learning_rate": 1.414250763027336e-06, "loss": 1.5072, "step": 2500 }, { "epoch": 4.0, "eval_exact_match": 53.01129234629862, "eval_f1": 74.3205610049545, "eval_runtime": 6.5679, "eval_samples_per_second": 485.39, "eval_steps_per_second": 7.613, "step": 2592 }, { "epoch": 4.012345679012346, "grad_norm": 12.669611930847168, "learning_rate": 1.2360308479456027e-06, "loss": 1.5257, "step": 2600 }, { "epoch": 4.166666666666667, "grad_norm": 13.753548622131348, "learning_rate": 1.0660589091223854e-06, "loss": 1.4296, "step": 2700 }, { "epoch": 4.320987654320987, "grad_norm": 10.774425506591797, "learning_rate": 9.054440911232348e-07, "loss": 1.4796, "step": 2800 }, { "epoch": 4.4753086419753085, "grad_norm": 16.21649742126465, "learning_rate": 7.552344790248104e-07, "loss": 1.426, "step": 2900 }, { "epoch": 4.62962962962963, "grad_norm": 11.01417064666748, "learning_rate": 6.164102591808482e-07, "loss": 1.4245, "step": 3000 }, { "epoch": 4.783950617283951, "grad_norm": 10.40230941772461, "learning_rate": 4.898773230583353e-07, "loss": 1.4493, "step": 3100 }, { "epoch": 4.938271604938271, "grad_norm": 10.953381538391113, "learning_rate": 3.7646135588175676e-07, "loss": 1.404, "step": 3200 }, { "epoch": 5.0, "eval_exact_match": 53.168130489335006, "eval_f1": 74.39491719320372, "eval_runtime": 6.6406, "eval_samples_per_second": 480.08, "eval_steps_per_second": 7.529, "step": 3240 }, { "epoch": 5.092592592592593, "grad_norm": 13.173111915588379, "learning_rate": 2.7690244865973494e-07, "loss": 1.43, "step": 3300 }, { "epoch": 5.246913580246914, "grad_norm": 13.998867988586426, "learning_rate": 1.918502687530241e-07, "loss": 1.3968, "step": 3400 }, { "epoch": 5.401234567901234, "grad_norm": 12.186470985412598, "learning_rate": 1.2185982049813472e-07, "loss": 1.378, "step": 3500 }, { "epoch": 5.555555555555555, "grad_norm": 16.22747039794922, "learning_rate": 6.738782355044048e-08, "loss": 1.4347, "step": 3600 }, { "epoch": 5.709876543209877, "grad_norm": 17.75710105895996, "learning_rate": 2.878973257973955e-08, "loss": 1.422, "step": 3700 }, { "epoch": 5.864197530864198, "grad_norm": 17.476356506347656, "learning_rate": 6.317417766116829e-09, "loss": 1.3868, "step": 3800 }, { "epoch": 6.0, "eval_exact_match": 53.324968632371395, "eval_f1": 74.54839090269344, "eval_runtime": 6.5292, "eval_samples_per_second": 488.268, "eval_steps_per_second": 7.658, "step": 3888 }, { "epoch": 6.0, "step": 3888, "total_flos": 2.0387348740618656e+16, "train_loss": 1.9457146935011624, "train_runtime": 824.1497, "train_samples_per_second": 150.926, "train_steps_per_second": 4.718 } ], "logging_steps": 100, "max_steps": 3888, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0387348740618656e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }