|
{ |
|
"best_metric": 53.324968632371395, |
|
"best_model_checkpoint": "/scratch/camembertv2/runs/results/fquad/camembertv2-base-bf16-p2-17000/max_seq_length-896-doc_stride-128-max_answer_length-30-gradient_accumulation_steps-4-precision-fp32-learning_rate-5e-06-epochs-6-lr_scheduler-cosine-warmup_steps-0/SEED-25/checkpoint-3888", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 3888, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15432098765432098, |
|
"grad_norm": 4.547507286071777, |
|
"learning_rate": 4.99184317884152e-06, |
|
"loss": 5.1604, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"grad_norm": 12.684767723083496, |
|
"learning_rate": 4.967425942351207e-06, |
|
"loss": 4.0839, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46296296296296297, |
|
"grad_norm": 14.742673873901367, |
|
"learning_rate": 4.926907624154051e-06, |
|
"loss": 3.3159, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 12.705907821655273, |
|
"learning_rate": 4.870552624790192e-06, |
|
"loss": 2.9494, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7716049382716049, |
|
"grad_norm": 14.801329612731934, |
|
"learning_rate": 4.798728686380588e-06, |
|
"loss": 2.7635, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 17.233285903930664, |
|
"learning_rate": 4.711904492941644e-06, |
|
"loss": 2.6393, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 38.86449184441656, |
|
"eval_f1": 60.0036086905889, |
|
"eval_runtime": 6.9307, |
|
"eval_samples_per_second": 459.985, |
|
"eval_steps_per_second": 7.214, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.0802469135802468, |
|
"grad_norm": 14.701664924621582, |
|
"learning_rate": 4.610646612007849e-06, |
|
"loss": 2.4089, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2345679012345678, |
|
"grad_norm": 17.278104782104492, |
|
"learning_rate": 4.495615797519732e-06, |
|
"loss": 2.3405, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 11.50146770477295, |
|
"learning_rate": 4.367562678102491e-06, |
|
"loss": 2.2084, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"grad_norm": 13.203764915466309, |
|
"learning_rate": 4.22732285887122e-06, |
|
"loss": 2.1694, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6975308641975309, |
|
"grad_norm": 21.71219825744629, |
|
"learning_rate": 4.075811468725734e-06, |
|
"loss": 2.0862, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 12.909610748291016, |
|
"learning_rate": 3.914017188716347e-06, |
|
"loss": 2.0016, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 48.745294855708906, |
|
"eval_f1": 70.05708349304844, |
|
"eval_runtime": 6.5382, |
|
"eval_samples_per_second": 487.593, |
|
"eval_steps_per_second": 7.647, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.006172839506173, |
|
"grad_norm": 16.666929244995117, |
|
"learning_rate": 3.7429958004482575e-06, |
|
"loss": 1.9412, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1604938271604937, |
|
"grad_norm": 10.462796211242676, |
|
"learning_rate": 3.5638632966241686e-06, |
|
"loss": 1.8009, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"grad_norm": 13.769060134887695, |
|
"learning_rate": 3.3777885986819725e-06, |
|
"loss": 1.7928, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4691358024691357, |
|
"grad_norm": 15.287083625793457, |
|
"learning_rate": 3.1859859290482544e-06, |
|
"loss": 1.7865, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.623456790123457, |
|
"grad_norm": 11.451101303100586, |
|
"learning_rate": 2.989706887782151e-06, |
|
"loss": 1.7489, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 17.512975692749023, |
|
"learning_rate": 2.7902322853130758e-06, |
|
"loss": 1.6978, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.932098765432099, |
|
"grad_norm": 17.15248680114746, |
|
"learning_rate": 2.5888637845674276e-06, |
|
"loss": 1.6566, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 50.47051442910916, |
|
"eval_f1": 72.25048266378954, |
|
"eval_runtime": 6.5447, |
|
"eval_samples_per_second": 487.112, |
|
"eval_steps_per_second": 7.64, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"grad_norm": 11.384383201599121, |
|
"learning_rate": 2.3869154070232346e-06, |
|
"loss": 1.6309, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.240740740740741, |
|
"grad_norm": 14.492201805114746, |
|
"learning_rate": 2.185704958119594e-06, |
|
"loss": 1.5353, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.3950617283950617, |
|
"grad_norm": 15.613585472106934, |
|
"learning_rate": 1.9865454279740452e-06, |
|
"loss": 1.5249, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.549382716049383, |
|
"grad_norm": 12.233988761901855, |
|
"learning_rate": 1.7907364235221128e-06, |
|
"loss": 1.5499, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"grad_norm": 11.811338424682617, |
|
"learning_rate": 1.5995556879882246e-06, |
|
"loss": 1.5159, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.8580246913580245, |
|
"grad_norm": 18.379695892333984, |
|
"learning_rate": 1.414250763027336e-06, |
|
"loss": 1.5072, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 53.01129234629862, |
|
"eval_f1": 74.3205610049545, |
|
"eval_runtime": 6.5679, |
|
"eval_samples_per_second": 485.39, |
|
"eval_steps_per_second": 7.613, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 4.012345679012346, |
|
"grad_norm": 12.669611930847168, |
|
"learning_rate": 1.2360308479456027e-06, |
|
"loss": 1.5257, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 13.753548622131348, |
|
"learning_rate": 1.0660589091223854e-06, |
|
"loss": 1.4296, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.320987654320987, |
|
"grad_norm": 10.774425506591797, |
|
"learning_rate": 9.054440911232348e-07, |
|
"loss": 1.4796, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.4753086419753085, |
|
"grad_norm": 16.21649742126465, |
|
"learning_rate": 7.552344790248104e-07, |
|
"loss": 1.426, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"grad_norm": 11.01417064666748, |
|
"learning_rate": 6.164102591808482e-07, |
|
"loss": 1.4245, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.783950617283951, |
|
"grad_norm": 10.40230941772461, |
|
"learning_rate": 4.898773230583353e-07, |
|
"loss": 1.4493, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.938271604938271, |
|
"grad_norm": 10.953381538391113, |
|
"learning_rate": 3.7646135588175676e-07, |
|
"loss": 1.404, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 53.168130489335006, |
|
"eval_f1": 74.39491719320372, |
|
"eval_runtime": 6.6406, |
|
"eval_samples_per_second": 480.08, |
|
"eval_steps_per_second": 7.529, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 5.092592592592593, |
|
"grad_norm": 13.173111915588379, |
|
"learning_rate": 2.7690244865973494e-07, |
|
"loss": 1.43, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.246913580246914, |
|
"grad_norm": 13.998867988586426, |
|
"learning_rate": 1.918502687530241e-07, |
|
"loss": 1.3968, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.401234567901234, |
|
"grad_norm": 12.186470985412598, |
|
"learning_rate": 1.2185982049813472e-07, |
|
"loss": 1.378, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 16.22747039794922, |
|
"learning_rate": 6.738782355044048e-08, |
|
"loss": 1.4347, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.709876543209877, |
|
"grad_norm": 17.75710105895996, |
|
"learning_rate": 2.878973257973955e-08, |
|
"loss": 1.422, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.864197530864198, |
|
"grad_norm": 17.476356506347656, |
|
"learning_rate": 6.317417766116829e-09, |
|
"loss": 1.3868, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 53.324968632371395, |
|
"eval_f1": 74.54839090269344, |
|
"eval_runtime": 6.5292, |
|
"eval_samples_per_second": 488.268, |
|
"eval_steps_per_second": 7.658, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3888, |
|
"total_flos": 2.0387348740618656e+16, |
|
"train_loss": 1.9457146935011624, |
|
"train_runtime": 824.1497, |
|
"train_samples_per_second": 150.926, |
|
"train_steps_per_second": 4.718 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3888, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0387348740618656e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|