|
{ |
|
"best_metric": 0.33520336605890605, |
|
"best_model_checkpoint": "videomae-base-finetuned-crema-d8-finetuned-elder-creama-d-pretuned/checkpoint-145", |
|
"epoch": 9.09375, |
|
"eval_steps": 500, |
|
"global_step": 1440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 35.67122268676758, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 4.1618, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 33.357269287109375, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 3.4709, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 25.177465438842773, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 2.5591, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 16.02618408203125, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 2.3848, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 15.27375602722168, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 2.1806, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.085648536682129, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.983, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.703251838684082, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 1.8467, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 8.859769821166992, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.7654, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 8.25590705871582, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.6852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.006203651428223, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 1.7227, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.808337688446045, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 1.8047, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.12739372253418, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.711, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 6.718637466430664, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.7888, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.254705905914307, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.6909, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.33520336605890605, |
|
"eval_loss": 1.6483688354492188, |
|
"eval_runtime": 1067.0455, |
|
"eval_samples_per_second": 1.336, |
|
"eval_steps_per_second": 0.168, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.445611476898193, |
|
"learning_rate": 4.976851851851852e-05, |
|
"loss": 1.5898, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 7.012285232543945, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 1.7571, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 6.888933181762695, |
|
"learning_rate": 4.899691358024692e-05, |
|
"loss": 1.7097, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 5.492002964019775, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.8516, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 10.119989395141602, |
|
"learning_rate": 4.8225308641975306e-05, |
|
"loss": 1.7601, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 6.299036026000977, |
|
"learning_rate": 4.783950617283951e-05, |
|
"loss": 1.6907, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 5.24801778793335, |
|
"learning_rate": 4.745370370370371e-05, |
|
"loss": 1.6796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.786376476287842, |
|
"learning_rate": 4.70679012345679e-05, |
|
"loss": 1.6915, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 6.203883647918701, |
|
"learning_rate": 4.66820987654321e-05, |
|
"loss": 1.6441, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 5.037710189819336, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.6633, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 5.640565872192383, |
|
"learning_rate": 4.591049382716049e-05, |
|
"loss": 1.5735, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 4.959427833557129, |
|
"learning_rate": 4.5524691358024696e-05, |
|
"loss": 1.5952, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 4.178658485412598, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 1.6431, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 6.911637306213379, |
|
"learning_rate": 4.4753086419753084e-05, |
|
"loss": 1.6024, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.483658790588379, |
|
"learning_rate": 4.436728395061729e-05, |
|
"loss": 1.5944, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.27980364656381485, |
|
"eval_loss": 1.6735395193099976, |
|
"eval_runtime": 1075.3628, |
|
"eval_samples_per_second": 1.326, |
|
"eval_steps_per_second": 0.166, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 6.275816917419434, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 1.5577, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 3.8816051483154297, |
|
"learning_rate": 4.359567901234568e-05, |
|
"loss": 1.8176, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 5.7805376052856445, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 1.5956, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 7.336784839630127, |
|
"learning_rate": 4.282407407407408e-05, |
|
"loss": 1.5103, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 7.558741569519043, |
|
"learning_rate": 4.243827160493827e-05, |
|
"loss": 1.6603, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 7.094559192657471, |
|
"learning_rate": 4.205246913580247e-05, |
|
"loss": 1.6139, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 5.095736503601074, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.6272, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.24078893661499, |
|
"learning_rate": 4.128086419753087e-05, |
|
"loss": 1.7125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.2533392906188965, |
|
"learning_rate": 4.089506172839506e-05, |
|
"loss": 1.4988, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 4.310216426849365, |
|
"learning_rate": 4.0509259259259265e-05, |
|
"loss": 1.5615, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 5.804831504821777, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 1.5706, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 4.093357563018799, |
|
"learning_rate": 3.973765432098765e-05, |
|
"loss": 1.5732, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 9.556779861450195, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 1.8022, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 4.3243279457092285, |
|
"learning_rate": 3.8966049382716055e-05, |
|
"loss": 1.5776, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.3211781206171108, |
|
"eval_loss": 1.6654304265975952, |
|
"eval_runtime": 1069.7784, |
|
"eval_samples_per_second": 1.333, |
|
"eval_steps_per_second": 0.167, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.815625190734863, |
|
"learning_rate": 3.8580246913580246e-05, |
|
"loss": 1.6887, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 7.0304670333862305, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 1.5767, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 13.4127836227417, |
|
"learning_rate": 3.780864197530865e-05, |
|
"loss": 1.6149, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 5.7433881759643555, |
|
"learning_rate": 3.742283950617284e-05, |
|
"loss": 1.5196, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 3.832094430923462, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.626, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 4.1515793800354, |
|
"learning_rate": 3.665123456790124e-05, |
|
"loss": 1.4536, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 9.092682838439941, |
|
"learning_rate": 3.626543209876543e-05, |
|
"loss": 1.4983, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 10.133805274963379, |
|
"learning_rate": 3.587962962962963e-05, |
|
"loss": 1.6459, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 5.816834926605225, |
|
"learning_rate": 3.5493827160493834e-05, |
|
"loss": 1.4871, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 5.276829242706299, |
|
"learning_rate": 3.5108024691358025e-05, |
|
"loss": 1.696, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 5.181794166564941, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 1.5963, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 5.877143859863281, |
|
"learning_rate": 3.4336419753086427e-05, |
|
"loss": 1.6116, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 3.9388859272003174, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 1.5434, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 8.216493606567383, |
|
"learning_rate": 3.3564814814814815e-05, |
|
"loss": 1.7139, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 5.945636749267578, |
|
"learning_rate": 3.317901234567901e-05, |
|
"loss": 1.6768, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.18583450210378682, |
|
"eval_loss": 1.7330094575881958, |
|
"eval_runtime": 1081.1675, |
|
"eval_samples_per_second": 1.319, |
|
"eval_steps_per_second": 0.166, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 5.201643943786621, |
|
"learning_rate": 3.279320987654321e-05, |
|
"loss": 1.6145, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 5.568819522857666, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 1.5311, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 4.623745441436768, |
|
"learning_rate": 3.2021604938271605e-05, |
|
"loss": 1.5345, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 7.134936809539795, |
|
"learning_rate": 3.16358024691358e-05, |
|
"loss": 1.6203, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 7.0437469482421875, |
|
"learning_rate": 3.125e-05, |
|
"loss": 1.4597, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 6.131565570831299, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 1.5898, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 6.676046371459961, |
|
"learning_rate": 3.04783950617284e-05, |
|
"loss": 1.5425, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 5.616217136383057, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 1.4983, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 8.613158226013184, |
|
"learning_rate": 2.970679012345679e-05, |
|
"loss": 1.6394, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 7.01395320892334, |
|
"learning_rate": 2.9320987654320992e-05, |
|
"loss": 1.6682, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 6.950262546539307, |
|
"learning_rate": 2.8935185185185186e-05, |
|
"loss": 1.5914, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 7.496145248413086, |
|
"learning_rate": 2.8549382716049384e-05, |
|
"loss": 1.5062, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"grad_norm": 7.298316478729248, |
|
"learning_rate": 2.8163580246913578e-05, |
|
"loss": 1.5272, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 8.239325523376465, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.6108, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.2503506311360449, |
|
"eval_loss": 1.691924810409546, |
|
"eval_runtime": 1114.3234, |
|
"eval_samples_per_second": 1.28, |
|
"eval_steps_per_second": 0.161, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.825406551361084, |
|
"learning_rate": 2.7391975308641977e-05, |
|
"loss": 1.5194, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 5.623870849609375, |
|
"learning_rate": 2.700617283950617e-05, |
|
"loss": 1.5299, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 4.288653373718262, |
|
"learning_rate": 2.6620370370370372e-05, |
|
"loss": 1.6937, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 7.575453758239746, |
|
"learning_rate": 2.623456790123457e-05, |
|
"loss": 1.5297, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 5.92163610458374, |
|
"learning_rate": 2.5848765432098764e-05, |
|
"loss": 1.5652, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 7.6441426277160645, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 1.5775, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 4.205317974090576, |
|
"learning_rate": 2.5077160493827162e-05, |
|
"loss": 1.6554, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 7.7730512619018555, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 1.5925, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 5.722338676452637, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 1.5774, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 8.397584915161133, |
|
"learning_rate": 2.3919753086419755e-05, |
|
"loss": 1.6021, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 8.244030952453613, |
|
"learning_rate": 2.353395061728395e-05, |
|
"loss": 1.4784, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 5.344796180725098, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 1.4185, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 6.568389892578125, |
|
"learning_rate": 2.2762345679012348e-05, |
|
"loss": 1.5829, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 4.663444995880127, |
|
"learning_rate": 2.2376543209876542e-05, |
|
"loss": 1.529, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"grad_norm": 4.336330890655518, |
|
"learning_rate": 2.1990740740740743e-05, |
|
"loss": 1.5103, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.2805049088359046, |
|
"eval_loss": 1.6524486541748047, |
|
"eval_runtime": 1069.6905, |
|
"eval_samples_per_second": 1.333, |
|
"eval_steps_per_second": 0.167, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 4.447099685668945, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 1.4805, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"grad_norm": 4.537060260772705, |
|
"learning_rate": 2.1219135802469135e-05, |
|
"loss": 1.5277, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 6.641722202301025, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.3914, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 11.075356483459473, |
|
"learning_rate": 2.044753086419753e-05, |
|
"loss": 1.5228, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 7.371598720550537, |
|
"learning_rate": 2.006172839506173e-05, |
|
"loss": 1.6399, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 7.182305812835693, |
|
"learning_rate": 1.967592592592593e-05, |
|
"loss": 1.5727, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"grad_norm": 6.9517598152160645, |
|
"learning_rate": 1.9290123456790123e-05, |
|
"loss": 1.5919, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 5.393679618835449, |
|
"learning_rate": 1.8904320987654324e-05, |
|
"loss": 1.5038, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"grad_norm": 6.600991249084473, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 1.5993, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"grad_norm": 5.317991733551025, |
|
"learning_rate": 1.8132716049382716e-05, |
|
"loss": 1.5051, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 6.535513401031494, |
|
"learning_rate": 1.7746913580246917e-05, |
|
"loss": 1.5216, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 7.035238265991211, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 1.508, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"grad_norm": 5.729631423950195, |
|
"learning_rate": 1.697530864197531e-05, |
|
"loss": 1.6134, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 5.01363468170166, |
|
"learning_rate": 1.6589506172839506e-05, |
|
"loss": 1.5447, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 0.3085553997194951, |
|
"eval_loss": 1.6767175197601318, |
|
"eval_runtime": 993.337, |
|
"eval_samples_per_second": 1.436, |
|
"eval_steps_per_second": 0.18, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 5.760792255401611, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 1.5299, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 3.973865032196045, |
|
"learning_rate": 1.58179012345679e-05, |
|
"loss": 1.3628, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 4.775136947631836, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 1.5003, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 5.997446060180664, |
|
"learning_rate": 1.5046296296296297e-05, |
|
"loss": 1.4961, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 6.616335391998291, |
|
"learning_rate": 1.4660493827160496e-05, |
|
"loss": 1.4256, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 5.351243495941162, |
|
"learning_rate": 1.4274691358024692e-05, |
|
"loss": 1.4124, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 7.641751289367676, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 1.4978, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 8.547698020935059, |
|
"learning_rate": 1.3503086419753085e-05, |
|
"loss": 1.4808, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 7.036251068115234, |
|
"learning_rate": 1.3117283950617285e-05, |
|
"loss": 1.4367, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 5.7725982666015625, |
|
"learning_rate": 1.2731481481481482e-05, |
|
"loss": 1.4533, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 6.249902725219727, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 1.5726, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 9.042238235473633, |
|
"learning_rate": 1.1959876543209878e-05, |
|
"loss": 1.47, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 8.399900436401367, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 1.5411, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"grad_norm": 6.547450065612793, |
|
"learning_rate": 1.1188271604938271e-05, |
|
"loss": 1.5962, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"grad_norm": 10.586703300476074, |
|
"learning_rate": 1.0802469135802469e-05, |
|
"loss": 1.5237, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.2552594670406732, |
|
"eval_loss": 1.7328603267669678, |
|
"eval_runtime": 1099.3691, |
|
"eval_samples_per_second": 1.297, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 6.408758163452148, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 1.2435, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"grad_norm": 6.371536731719971, |
|
"learning_rate": 1.0030864197530866e-05, |
|
"loss": 1.3234, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 5.1275410652160645, |
|
"learning_rate": 9.645061728395062e-06, |
|
"loss": 1.4336, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 6.502357006072998, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 1.425, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"grad_norm": 6.920971870422363, |
|
"learning_rate": 8.873456790123458e-06, |
|
"loss": 1.462, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 6.478498935699463, |
|
"learning_rate": 8.487654320987654e-06, |
|
"loss": 1.4694, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 8.00242805480957, |
|
"learning_rate": 8.101851851851852e-06, |
|
"loss": 1.5305, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 5.735220909118652, |
|
"learning_rate": 7.71604938271605e-06, |
|
"loss": 1.5784, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 7.882507801055908, |
|
"learning_rate": 7.330246913580248e-06, |
|
"loss": 1.6219, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 3.4598042964935303, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.4463, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 6.024099349975586, |
|
"learning_rate": 6.558641975308642e-06, |
|
"loss": 1.3849, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 5.08905029296875, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 1.507, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"grad_norm": 5.94057559967041, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 1.5478, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"grad_norm": 7.8740458488464355, |
|
"learning_rate": 5.401234567901234e-06, |
|
"loss": 1.4397, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.24754558204768584, |
|
"eval_loss": 1.7292535305023193, |
|
"eval_runtime": 1078.3495, |
|
"eval_samples_per_second": 1.322, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.3548455238342285, |
|
"learning_rate": 5.015432098765433e-06, |
|
"loss": 1.3801, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 6.385293960571289, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 1.57, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 5.050124168395996, |
|
"learning_rate": 4.243827160493827e-06, |
|
"loss": 1.3412, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"grad_norm": 8.06653118133545, |
|
"learning_rate": 3.858024691358025e-06, |
|
"loss": 1.3923, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 6.628742694854736, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 1.4682, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 5.726457118988037, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 1.4295, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 6.105814456939697, |
|
"learning_rate": 2.700617283950617e-06, |
|
"loss": 1.3765, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 8.145650863647461, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 1.3884, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"grad_norm": 5.698288440704346, |
|
"learning_rate": 1.9290123456790124e-06, |
|
"loss": 1.501, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 7.972877025604248, |
|
"learning_rate": 1.5432098765432098e-06, |
|
"loss": 1.4569, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"grad_norm": 5.555938243865967, |
|
"learning_rate": 1.1574074074074074e-06, |
|
"loss": 1.4661, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 6.897174835205078, |
|
"learning_rate": 7.716049382716049e-07, |
|
"loss": 1.3853, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 6.399707794189453, |
|
"learning_rate": 3.8580246913580245e-07, |
|
"loss": 1.4631, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 4.795849800109863, |
|
"learning_rate": 0.0, |
|
"loss": 1.4544, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.2517531556802244, |
|
"eval_loss": 1.7367732524871826, |
|
"eval_runtime": 1094.539, |
|
"eval_samples_per_second": 1.303, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"step": 1440, |
|
"total_flos": 1.4321531194283852e+19, |
|
"train_loss": 1.6113481746779548, |
|
"train_runtime": 27059.6186, |
|
"train_samples_per_second": 0.426, |
|
"train_steps_per_second": 0.053 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.3389121338912134, |
|
"eval_loss": 1.6702075004577637, |
|
"eval_runtime": 905.0071, |
|
"eval_samples_per_second": 1.32, |
|
"eval_steps_per_second": 0.166, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.3389121338912134, |
|
"eval_loss": 1.6702075004577637, |
|
"eval_runtime": 908.3161, |
|
"eval_samples_per_second": 1.316, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1440 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.4321531194283852e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|