|
{ |
|
"best_metric": 0.3843843843843844, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/canine/canine-base-finetuned-masakhaner-amh/checkpoint-7000", |
|
"epoch": 152.72727272727272, |
|
"global_step": 8400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy_score": 0.8550492938982147, |
|
"eval_f1": 0.18181818181818182, |
|
"eval_loss": 0.5423177480697632, |
|
"eval_precision": 0.25842696629213485, |
|
"eval_recall": 0.1402439024390244, |
|
"eval_runtime": 1.939, |
|
"eval_samples_per_second": 128.935, |
|
"eval_steps_per_second": 16.504, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_accuracy_score": 0.8651745270450306, |
|
"eval_f1": 0.2825484764542936, |
|
"eval_loss": 0.47274425625801086, |
|
"eval_precision": 0.25888324873096447, |
|
"eval_recall": 0.31097560975609756, |
|
"eval_runtime": 1.9444, |
|
"eval_samples_per_second": 128.574, |
|
"eval_steps_per_second": 16.457, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.5536, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"eval_accuracy_score": 0.8779642952304823, |
|
"eval_f1": 0.31172413793103443, |
|
"eval_loss": 0.4644899368286133, |
|
"eval_precision": 0.28463476070528965, |
|
"eval_recall": 0.3445121951219512, |
|
"eval_runtime": 1.955, |
|
"eval_samples_per_second": 127.88, |
|
"eval_steps_per_second": 16.369, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_accuracy_score": 0.8763655742073009, |
|
"eval_f1": 0.3186813186813187, |
|
"eval_loss": 0.4930596649646759, |
|
"eval_precision": 0.29, |
|
"eval_recall": 0.35365853658536583, |
|
"eval_runtime": 1.9394, |
|
"eval_samples_per_second": 128.906, |
|
"eval_steps_per_second": 16.5, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.1937, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_accuracy_score": 0.8840927258193445, |
|
"eval_f1": 0.33570412517780934, |
|
"eval_loss": 0.5170220732688904, |
|
"eval_precision": 0.31466666666666665, |
|
"eval_recall": 0.3597560975609756, |
|
"eval_runtime": 1.9489, |
|
"eval_samples_per_second": 128.275, |
|
"eval_steps_per_second": 16.419, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"eval_accuracy_score": 0.8878230748734346, |
|
"eval_f1": 0.33956834532374097, |
|
"eval_loss": 0.5543549656867981, |
|
"eval_precision": 0.3215258855585831, |
|
"eval_recall": 0.3597560975609756, |
|
"eval_runtime": 1.948, |
|
"eval_samples_per_second": 128.334, |
|
"eval_steps_per_second": 16.427, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"eval_accuracy_score": 0.8904876099120703, |
|
"eval_f1": 0.36811594202898545, |
|
"eval_loss": 0.5699805617332458, |
|
"eval_precision": 0.35082872928176795, |
|
"eval_recall": 0.3871951219512195, |
|
"eval_runtime": 1.9374, |
|
"eval_samples_per_second": 129.042, |
|
"eval_steps_per_second": 16.517, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0666, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"eval_accuracy_score": 0.8878230748734346, |
|
"eval_f1": 0.358974358974359, |
|
"eval_loss": 0.6034800410270691, |
|
"eval_precision": 0.33689839572192515, |
|
"eval_recall": 0.38414634146341464, |
|
"eval_runtime": 1.9432, |
|
"eval_samples_per_second": 128.651, |
|
"eval_steps_per_second": 16.467, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 32.73, |
|
"eval_accuracy_score": 0.8883559818811617, |
|
"eval_f1": 0.3724137931034482, |
|
"eval_loss": 0.6207764744758606, |
|
"eval_precision": 0.34005037783375314, |
|
"eval_recall": 0.4115853658536585, |
|
"eval_runtime": 1.9449, |
|
"eval_samples_per_second": 128.538, |
|
"eval_steps_per_second": 16.453, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0254, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"eval_accuracy_score": 0.8907540634159339, |
|
"eval_f1": 0.35460992907801414, |
|
"eval_loss": 0.6343292593955994, |
|
"eval_precision": 0.33156498673740054, |
|
"eval_recall": 0.38109756097560976, |
|
"eval_runtime": 7.9566, |
|
"eval_samples_per_second": 31.42, |
|
"eval_steps_per_second": 4.022, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy_score": 0.8928856914468425, |
|
"eval_f1": 0.3611111111111111, |
|
"eval_loss": 0.6792951822280884, |
|
"eval_precision": 0.365625, |
|
"eval_recall": 0.3567073170731707, |
|
"eval_runtime": 1.9365, |
|
"eval_samples_per_second": 129.101, |
|
"eval_steps_per_second": 16.525, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 43.64, |
|
"eval_accuracy_score": 0.8920863309352518, |
|
"eval_f1": 0.36465638148667606, |
|
"eval_loss": 0.6779835224151611, |
|
"eval_precision": 0.33766233766233766, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9311, |
|
"eval_samples_per_second": 129.461, |
|
"eval_steps_per_second": 16.571, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 47.27, |
|
"eval_accuracy_score": 0.8920863309352518, |
|
"eval_f1": 0.36775106082036774, |
|
"eval_loss": 0.6805456876754761, |
|
"eval_precision": 0.34300791556728233, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9389, |
|
"eval_samples_per_second": 128.941, |
|
"eval_steps_per_second": 16.504, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.91, |
|
"eval_accuracy_score": 0.8920863309352518, |
|
"eval_f1": 0.34452554744525543, |
|
"eval_loss": 0.7049936056137085, |
|
"eval_precision": 0.33053221288515405, |
|
"eval_recall": 0.3597560975609756, |
|
"eval_runtime": 1.9347, |
|
"eval_samples_per_second": 129.22, |
|
"eval_steps_per_second": 16.54, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0086, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"eval_accuracy_score": 0.8947508659738875, |
|
"eval_f1": 0.362555720653789, |
|
"eval_loss": 0.7268127799034119, |
|
"eval_precision": 0.3536231884057971, |
|
"eval_recall": 0.3719512195121951, |
|
"eval_runtime": 1.9322, |
|
"eval_samples_per_second": 129.384, |
|
"eval_steps_per_second": 16.561, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 58.18, |
|
"eval_accuracy_score": 0.8958166799893419, |
|
"eval_f1": 0.3654970760233918, |
|
"eval_loss": 0.7148891687393188, |
|
"eval_precision": 0.351123595505618, |
|
"eval_recall": 0.38109756097560976, |
|
"eval_runtime": 1.9378, |
|
"eval_samples_per_second": 129.014, |
|
"eval_steps_per_second": 16.514, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 61.82, |
|
"eval_accuracy_score": 0.8926192379429789, |
|
"eval_f1": 0.3541364296081277, |
|
"eval_loss": 0.7266244888305664, |
|
"eval_precision": 0.3379501385041551, |
|
"eval_recall": 0.3719512195121951, |
|
"eval_runtime": 1.9376, |
|
"eval_samples_per_second": 129.025, |
|
"eval_steps_per_second": 16.515, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0064, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 65.45, |
|
"eval_accuracy_score": 0.8902211564082068, |
|
"eval_f1": 0.37822349570200575, |
|
"eval_loss": 0.746965229511261, |
|
"eval_precision": 0.3567567567567568, |
|
"eval_recall": 0.4024390243902439, |
|
"eval_runtime": 1.934, |
|
"eval_samples_per_second": 129.263, |
|
"eval_steps_per_second": 16.546, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 69.09, |
|
"eval_accuracy_score": 0.8992805755395683, |
|
"eval_f1": 0.38730158730158737, |
|
"eval_loss": 0.768635630607605, |
|
"eval_precision": 0.40397350993377484, |
|
"eval_recall": 0.3719512195121951, |
|
"eval_runtime": 1.936, |
|
"eval_samples_per_second": 129.13, |
|
"eval_steps_per_second": 16.529, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0049, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"eval_accuracy_score": 0.8886224353850253, |
|
"eval_f1": 0.3631284916201117, |
|
"eval_loss": 0.7521090507507324, |
|
"eval_precision": 0.33505154639175255, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9329, |
|
"eval_samples_per_second": 129.341, |
|
"eval_steps_per_second": 16.556, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.36, |
|
"eval_accuracy_score": 0.8966160405009326, |
|
"eval_f1": 0.3831640058055153, |
|
"eval_loss": 0.7643349170684814, |
|
"eval_precision": 0.3656509695290859, |
|
"eval_recall": 0.4024390243902439, |
|
"eval_runtime": 1.9372, |
|
"eval_samples_per_second": 129.05, |
|
"eval_steps_per_second": 16.518, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy_score": 0.8904876099120703, |
|
"eval_f1": 0.35942028985507246, |
|
"eval_loss": 0.7822825908660889, |
|
"eval_precision": 0.3425414364640884, |
|
"eval_recall": 0.3780487804878049, |
|
"eval_runtime": 1.9392, |
|
"eval_samples_per_second": 128.916, |
|
"eval_steps_per_second": 16.501, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0042, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 83.64, |
|
"eval_accuracy_score": 0.8910205169197974, |
|
"eval_f1": 0.36775106082036774, |
|
"eval_loss": 0.7950363755226135, |
|
"eval_precision": 0.34300791556728233, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9356, |
|
"eval_samples_per_second": 129.16, |
|
"eval_steps_per_second": 16.533, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 87.27, |
|
"eval_accuracy_score": 0.8958166799893419, |
|
"eval_f1": 0.3740573152337858, |
|
"eval_loss": 0.7768180966377258, |
|
"eval_precision": 0.3701492537313433, |
|
"eval_recall": 0.3780487804878049, |
|
"eval_runtime": 1.9278, |
|
"eval_samples_per_second": 129.685, |
|
"eval_steps_per_second": 16.6, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0034, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_accuracy_score": 0.8854249933386624, |
|
"eval_f1": 0.3408450704225352, |
|
"eval_loss": 0.8097852468490601, |
|
"eval_precision": 0.31675392670157065, |
|
"eval_recall": 0.36890243902439024, |
|
"eval_runtime": 1.9518, |
|
"eval_samples_per_second": 128.087, |
|
"eval_steps_per_second": 16.395, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 94.55, |
|
"eval_accuracy_score": 0.8928856914468425, |
|
"eval_f1": 0.3826086956521739, |
|
"eval_loss": 0.8147957921028137, |
|
"eval_precision": 0.36464088397790057, |
|
"eval_recall": 0.4024390243902439, |
|
"eval_runtime": 1.9329, |
|
"eval_samples_per_second": 129.34, |
|
"eval_steps_per_second": 16.555, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 98.18, |
|
"eval_accuracy_score": 0.8939515054622968, |
|
"eval_f1": 0.37125748502994016, |
|
"eval_loss": 0.826421856880188, |
|
"eval_precision": 0.36470588235294116, |
|
"eval_recall": 0.3780487804878049, |
|
"eval_runtime": 1.9341, |
|
"eval_samples_per_second": 129.257, |
|
"eval_steps_per_second": 16.545, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0025, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 101.82, |
|
"eval_accuracy_score": 0.8936850519584333, |
|
"eval_f1": 0.37181409295352325, |
|
"eval_loss": 0.8357872366905212, |
|
"eval_precision": 0.36578171091445427, |
|
"eval_recall": 0.3780487804878049, |
|
"eval_runtime": 1.9277, |
|
"eval_samples_per_second": 129.688, |
|
"eval_steps_per_second": 16.6, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 105.45, |
|
"eval_accuracy_score": 0.8907540634159339, |
|
"eval_f1": 0.36075036075036077, |
|
"eval_loss": 0.8359671235084534, |
|
"eval_precision": 0.3424657534246575, |
|
"eval_recall": 0.38109756097560976, |
|
"eval_runtime": 1.9329, |
|
"eval_samples_per_second": 129.342, |
|
"eval_steps_per_second": 16.556, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.0026, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"eval_accuracy_score": 0.894484412470024, |
|
"eval_f1": 0.37037037037037046, |
|
"eval_loss": 0.8180095553398132, |
|
"eval_precision": 0.34759358288770054, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9323, |
|
"eval_samples_per_second": 129.378, |
|
"eval_steps_per_second": 16.56, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 112.73, |
|
"eval_accuracy_score": 0.8926192379429789, |
|
"eval_f1": 0.3592814371257485, |
|
"eval_loss": 0.8526071906089783, |
|
"eval_precision": 0.35294117647058826, |
|
"eval_recall": 0.36585365853658536, |
|
"eval_runtime": 1.9299, |
|
"eval_samples_per_second": 129.542, |
|
"eval_steps_per_second": 16.581, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 116.36, |
|
"eval_accuracy_score": 0.8950173194777511, |
|
"eval_f1": 0.38135593220338987, |
|
"eval_loss": 0.8284777998924255, |
|
"eval_precision": 0.35526315789473684, |
|
"eval_recall": 0.4115853658536585, |
|
"eval_runtime": 1.9337, |
|
"eval_samples_per_second": 129.288, |
|
"eval_steps_per_second": 16.549, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.0023, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy_score": 0.8979483080202505, |
|
"eval_f1": 0.3795620437956205, |
|
"eval_loss": 0.8373560905456543, |
|
"eval_precision": 0.3641456582633053, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.931, |
|
"eval_samples_per_second": 129.468, |
|
"eval_steps_per_second": 16.572, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 123.64, |
|
"eval_accuracy_score": 0.896349586997069, |
|
"eval_f1": 0.3813682678311499, |
|
"eval_loss": 0.8431832194328308, |
|
"eval_precision": 0.3649025069637883, |
|
"eval_recall": 0.39939024390243905, |
|
"eval_runtime": 1.9307, |
|
"eval_samples_per_second": 129.485, |
|
"eval_steps_per_second": 16.574, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.0017, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"eval_accuracy_score": 0.8968824940047961, |
|
"eval_f1": 0.3843843843843844, |
|
"eval_loss": 0.8679910898208618, |
|
"eval_precision": 0.378698224852071, |
|
"eval_recall": 0.3902439024390244, |
|
"eval_runtime": 1.9274, |
|
"eval_samples_per_second": 129.711, |
|
"eval_steps_per_second": 16.603, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 130.91, |
|
"eval_accuracy_score": 0.8958166799893419, |
|
"eval_f1": 0.38383838383838387, |
|
"eval_loss": 0.8538209199905396, |
|
"eval_precision": 0.3643835616438356, |
|
"eval_recall": 0.4054878048780488, |
|
"eval_runtime": 1.9333, |
|
"eval_samples_per_second": 129.316, |
|
"eval_steps_per_second": 16.552, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 134.55, |
|
"eval_accuracy_score": 0.8995470290434319, |
|
"eval_f1": 0.3894582723279649, |
|
"eval_loss": 0.8490965366363525, |
|
"eval_precision": 0.37464788732394366, |
|
"eval_recall": 0.4054878048780488, |
|
"eval_runtime": 1.9341, |
|
"eval_samples_per_second": 129.26, |
|
"eval_steps_per_second": 16.545, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 2.516778523489933e-05, |
|
"loss": 0.0016, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 138.18, |
|
"eval_accuracy_score": 0.8966160405009326, |
|
"eval_f1": 0.37223042836041353, |
|
"eval_loss": 0.8647195100784302, |
|
"eval_precision": 0.36103151862464183, |
|
"eval_recall": 0.38414634146341464, |
|
"eval_runtime": 1.9327, |
|
"eval_samples_per_second": 129.354, |
|
"eval_steps_per_second": 16.557, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 141.82, |
|
"eval_accuracy_score": 0.8966160405009326, |
|
"eval_f1": 0.37788018433179726, |
|
"eval_loss": 0.8978907465934753, |
|
"eval_precision": 0.38080495356037153, |
|
"eval_recall": 0.375, |
|
"eval_runtime": 1.9332, |
|
"eval_samples_per_second": 129.318, |
|
"eval_steps_per_second": 16.553, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 2.348993288590604e-05, |
|
"loss": 0.0017, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"eval_accuracy_score": 0.894484412470024, |
|
"eval_f1": 0.3607038123167156, |
|
"eval_loss": 0.8674683570861816, |
|
"eval_precision": 0.3474576271186441, |
|
"eval_recall": 0.375, |
|
"eval_runtime": 1.9329, |
|
"eval_samples_per_second": 129.337, |
|
"eval_steps_per_second": 16.555, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 149.09, |
|
"eval_accuracy_score": 0.8976818545163869, |
|
"eval_f1": 0.37518037518037517, |
|
"eval_loss": 0.8685262203216553, |
|
"eval_precision": 0.3561643835616438, |
|
"eval_recall": 0.39634146341463417, |
|
"eval_runtime": 1.9309, |
|
"eval_samples_per_second": 129.47, |
|
"eval_steps_per_second": 16.572, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 152.73, |
|
"eval_accuracy_score": 0.896349586997069, |
|
"eval_f1": 0.38028169014084506, |
|
"eval_loss": 0.816967785358429, |
|
"eval_precision": 0.35340314136125656, |
|
"eval_recall": 0.4115853658536585, |
|
"eval_runtime": 1.9274, |
|
"eval_samples_per_second": 129.712, |
|
"eval_steps_per_second": 16.603, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 152.73, |
|
"step": 8400, |
|
"total_flos": 4.389177345306624e+16, |
|
"train_loss": 0.05317856186912173, |
|
"train_runtime": 6259.9994, |
|
"train_samples_per_second": 76.677, |
|
"train_steps_per_second": 2.396 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 273, |
|
"total_flos": 4.389177345306624e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|