|
{ |
|
"best_metric": 0.8729121278140886, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-swa/checkpoint-4000", |
|
"epoch": 81.81818181818181, |
|
"global_step": 5400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy_score": 0.969571802285557, |
|
"eval_f1": 0.8581610833927299, |
|
"eval_loss": 0.11472320556640625, |
|
"eval_precision": 0.8257887517146777, |
|
"eval_recall": 0.8931750741839762, |
|
"eval_runtime": 4.1914, |
|
"eval_samples_per_second": 71.574, |
|
"eval_steps_per_second": 9.066, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_accuracy_score": 0.9607600165220983, |
|
"eval_f1": 0.8493342676944637, |
|
"eval_loss": 0.14969909191131592, |
|
"eval_precision": 0.8047808764940239, |
|
"eval_recall": 0.8991097922848664, |
|
"eval_runtime": 4.1964, |
|
"eval_samples_per_second": 71.491, |
|
"eval_steps_per_second": 9.055, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.1339, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy_score": 0.9611730689797604, |
|
"eval_f1": 0.8616039744499645, |
|
"eval_loss": 0.16909180581569672, |
|
"eval_precision": 0.8258503401360544, |
|
"eval_recall": 0.900593471810089, |
|
"eval_runtime": 4.1879, |
|
"eval_samples_per_second": 71.635, |
|
"eval_steps_per_second": 9.074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_accuracy_score": 0.9651659094038276, |
|
"eval_f1": 0.8659058487874465, |
|
"eval_loss": 0.16794395446777344, |
|
"eval_precision": 0.8337912087912088, |
|
"eval_recall": 0.900593471810089, |
|
"eval_runtime": 4.1904, |
|
"eval_samples_per_second": 71.593, |
|
"eval_steps_per_second": 9.068, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0084, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"eval_accuracy_score": 0.9653035935563816, |
|
"eval_f1": 0.8672817601135557, |
|
"eval_loss": 0.1805761158466339, |
|
"eval_precision": 0.8312925170068027, |
|
"eval_recall": 0.9065281899109793, |
|
"eval_runtime": 4.1859, |
|
"eval_samples_per_second": 71.669, |
|
"eval_steps_per_second": 9.078, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_accuracy_score": 0.9566294919454771, |
|
"eval_f1": 0.8536931818181818, |
|
"eval_loss": 0.2617969214916229, |
|
"eval_precision": 0.8188010899182562, |
|
"eval_recall": 0.8916913946587537, |
|
"eval_runtime": 4.1909, |
|
"eval_samples_per_second": 71.583, |
|
"eval_steps_per_second": 9.067, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"eval_accuracy_score": 0.9686080132176786, |
|
"eval_f1": 0.873661670235546, |
|
"eval_loss": 0.17235662043094635, |
|
"eval_precision": 0.8418156808803301, |
|
"eval_recall": 0.9080118694362018, |
|
"eval_runtime": 4.1851, |
|
"eval_samples_per_second": 71.683, |
|
"eval_steps_per_second": 9.08, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0044, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"eval_accuracy_score": 0.9637890678782872, |
|
"eval_f1": 0.875629043853343, |
|
"eval_loss": 0.21507766842842102, |
|
"eval_precision": 0.8493723849372385, |
|
"eval_recall": 0.9035608308605341, |
|
"eval_runtime": 4.1889, |
|
"eval_samples_per_second": 71.619, |
|
"eval_steps_per_second": 9.072, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_accuracy_score": 0.9574555968608013, |
|
"eval_f1": 0.8459900638750887, |
|
"eval_loss": 0.24175740778446198, |
|
"eval_precision": 0.8108843537414966, |
|
"eval_recall": 0.884272997032641, |
|
"eval_runtime": 4.1899, |
|
"eval_samples_per_second": 71.601, |
|
"eval_steps_per_second": 9.07, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.003, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"eval_accuracy_score": 0.9582817017761256, |
|
"eval_f1": 0.8468085106382979, |
|
"eval_loss": 0.25091126561164856, |
|
"eval_precision": 0.811141304347826, |
|
"eval_recall": 0.8857566765578635, |
|
"eval_runtime": 4.1982, |
|
"eval_samples_per_second": 71.459, |
|
"eval_steps_per_second": 9.051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_accuracy_score": 0.9658543301665978, |
|
"eval_f1": 0.8738284066330209, |
|
"eval_loss": 0.225422665476799, |
|
"eval_precision": 0.8499298737727911, |
|
"eval_recall": 0.8991097922848664, |
|
"eval_runtime": 4.2016, |
|
"eval_samples_per_second": 71.402, |
|
"eval_steps_per_second": 9.044, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"eval_accuracy_score": 0.9669558033870301, |
|
"eval_f1": 0.8775216138328531, |
|
"eval_loss": 0.22372400760650635, |
|
"eval_precision": 0.8529411764705882, |
|
"eval_recall": 0.9035608308605341, |
|
"eval_runtime": 4.1974, |
|
"eval_samples_per_second": 71.472, |
|
"eval_steps_per_second": 9.053, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 37.88, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0024, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 39.39, |
|
"eval_accuracy_score": 0.9611730689797604, |
|
"eval_f1": 0.8640915593705293, |
|
"eval_loss": 0.23863321542739868, |
|
"eval_precision": 0.8342541436464088, |
|
"eval_recall": 0.8961424332344213, |
|
"eval_runtime": 4.1973, |
|
"eval_samples_per_second": 71.474, |
|
"eval_steps_per_second": 9.053, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 42.42, |
|
"eval_accuracy_score": 0.9684703290651246, |
|
"eval_f1": 0.8781542898341744, |
|
"eval_loss": 0.1946461945772171, |
|
"eval_precision": 0.8541374474053296, |
|
"eval_recall": 0.9035608308605341, |
|
"eval_runtime": 4.1975, |
|
"eval_samples_per_second": 71.471, |
|
"eval_steps_per_second": 9.053, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_accuracy_score": 0.956354123640369, |
|
"eval_f1": 0.845993031358885, |
|
"eval_loss": 0.2664543688297272, |
|
"eval_precision": 0.797634691195795, |
|
"eval_recall": 0.900593471810089, |
|
"eval_runtime": 4.1956, |
|
"eval_samples_per_second": 71.503, |
|
"eval_steps_per_second": 9.057, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"eval_accuracy_score": 0.9624122263527468, |
|
"eval_f1": 0.8711303095752341, |
|
"eval_loss": 0.2241496741771698, |
|
"eval_precision": 0.8461538461538461, |
|
"eval_recall": 0.8976261127596439, |
|
"eval_runtime": 4.2045, |
|
"eval_samples_per_second": 71.353, |
|
"eval_steps_per_second": 9.038, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 51.52, |
|
"eval_accuracy_score": 0.9617238055899766, |
|
"eval_f1": 0.8640569395017793, |
|
"eval_loss": 0.26432353258132935, |
|
"eval_precision": 0.8303693570451436, |
|
"eval_recall": 0.900593471810089, |
|
"eval_runtime": 4.2012, |
|
"eval_samples_per_second": 71.409, |
|
"eval_steps_per_second": 9.045, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 53.03, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0017, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"eval_accuracy_score": 0.9681949607600165, |
|
"eval_f1": 0.8776249094858798, |
|
"eval_loss": 0.1974426507949829, |
|
"eval_precision": 0.8571428571428571, |
|
"eval_recall": 0.8991097922848664, |
|
"eval_runtime": 4.1963, |
|
"eval_samples_per_second": 71.491, |
|
"eval_steps_per_second": 9.056, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 57.58, |
|
"eval_accuracy_score": 0.9624122263527468, |
|
"eval_f1": 0.8690563277249451, |
|
"eval_loss": 0.25220534205436707, |
|
"eval_precision": 0.8571428571428571, |
|
"eval_recall": 0.8813056379821959, |
|
"eval_runtime": 4.205, |
|
"eval_samples_per_second": 71.343, |
|
"eval_steps_per_second": 9.037, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 60.61, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 60.61, |
|
"eval_accuracy_score": 0.9655789618614897, |
|
"eval_f1": 0.8729121278140886, |
|
"eval_loss": 0.2113661915063858, |
|
"eval_precision": 0.8549075391180654, |
|
"eval_recall": 0.8916913946587537, |
|
"eval_runtime": 4.1875, |
|
"eval_samples_per_second": 71.642, |
|
"eval_steps_per_second": 9.075, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"eval_accuracy_score": 0.9625499105053008, |
|
"eval_f1": 0.8552821997105645, |
|
"eval_loss": 0.2266554832458496, |
|
"eval_precision": 0.8347457627118644, |
|
"eval_recall": 0.8768545994065282, |
|
"eval_runtime": 4.1917, |
|
"eval_samples_per_second": 71.57, |
|
"eval_steps_per_second": 9.066, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_accuracy_score": 0.969984854743219, |
|
"eval_f1": 0.8743718592964824, |
|
"eval_loss": 0.18208536505699158, |
|
"eval_precision": 0.847009735744089, |
|
"eval_recall": 0.9035608308605341, |
|
"eval_runtime": 4.1915, |
|
"eval_samples_per_second": 71.573, |
|
"eval_steps_per_second": 9.066, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 69.7, |
|
"eval_accuracy_score": 0.9551149662673826, |
|
"eval_f1": 0.8406633020908435, |
|
"eval_loss": 0.2883528769016266, |
|
"eval_precision": 0.8176718092566619, |
|
"eval_recall": 0.8649851632047477, |
|
"eval_runtime": 4.1939, |
|
"eval_samples_per_second": 71.533, |
|
"eval_steps_per_second": 9.061, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"eval_accuracy_score": 0.9581440176235715, |
|
"eval_f1": 0.8530050687907313, |
|
"eval_loss": 0.2784726023674011, |
|
"eval_precision": 0.8330975954738331, |
|
"eval_recall": 0.8738872403560831, |
|
"eval_runtime": 4.1999, |
|
"eval_samples_per_second": 71.43, |
|
"eval_steps_per_second": 9.048, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 75.76, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0017, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 75.76, |
|
"eval_accuracy_score": 0.9633760154206251, |
|
"eval_f1": 0.8614501076812634, |
|
"eval_loss": 0.22903957962989807, |
|
"eval_precision": 0.8344923504867872, |
|
"eval_recall": 0.8902077151335311, |
|
"eval_runtime": 59.3501, |
|
"eval_samples_per_second": 5.055, |
|
"eval_steps_per_second": 0.64, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 78.79, |
|
"eval_accuracy_score": 0.9639267520308412, |
|
"eval_f1": 0.8656499636891795, |
|
"eval_loss": 0.24257344007492065, |
|
"eval_precision": 0.8477951635846372, |
|
"eval_recall": 0.884272997032641, |
|
"eval_runtime": 4.1775, |
|
"eval_samples_per_second": 71.814, |
|
"eval_steps_per_second": 9.096, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"eval_accuracy_score": 0.9650282252512736, |
|
"eval_f1": 0.868804664723032, |
|
"eval_loss": 0.23794998228549957, |
|
"eval_precision": 0.8538681948424068, |
|
"eval_recall": 0.884272997032641, |
|
"eval_runtime": 4.1803, |
|
"eval_samples_per_second": 71.765, |
|
"eval_steps_per_second": 9.09, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"step": 5400, |
|
"total_flos": 2.254571897701939e+16, |
|
"train_loss": 0.015092053589997469, |
|
"train_runtime": 8361.8493, |
|
"train_samples_per_second": 57.404, |
|
"train_steps_per_second": 1.794 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 228, |
|
"total_flos": 2.254571897701939e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|