cconvnext-tiny-15ep-1e-4 / trainer_state.json
vuongnhathien's picture
End of training
2e43239 verified
raw
history blame contribute delete
No virus
17.5 kB
{
"best_metric": 0.2901732623577118,
"best_model_checkpoint": "./cconvnext-tiny-15ep-1e-4/checkpoint-8250",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 8250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"grad_norm": 17.5977840423584,
"learning_rate": 9.996375239002369e-05,
"loss": 1.9015,
"step": 100
},
{
"epoch": 0.36,
"grad_norm": 19.579683303833008,
"learning_rate": 9.985506211566388e-05,
"loss": 0.9353,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 14.599044799804688,
"learning_rate": 9.967408676742751e-05,
"loss": 0.7327,
"step": 300
},
{
"epoch": 0.73,
"grad_norm": 13.404082298278809,
"learning_rate": 9.942108874226811e-05,
"loss": 0.6524,
"step": 400
},
{
"epoch": 0.91,
"grad_norm": 15.84705924987793,
"learning_rate": 9.909643486313533e-05,
"loss": 0.5838,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8811133200795228,
"eval_loss": 0.40972331166267395,
"eval_runtime": 70.3463,
"eval_samples_per_second": 35.752,
"eval_steps_per_second": 1.123,
"step": 550
},
{
"epoch": 1.09,
"grad_norm": 23.304401397705078,
"learning_rate": 9.870059584711668e-05,
"loss": 0.5633,
"step": 600
},
{
"epoch": 1.27,
"grad_norm": 8.285646438598633,
"learning_rate": 9.82341456229428e-05,
"loss": 0.4562,
"step": 700
},
{
"epoch": 1.45,
"grad_norm": 17.91335678100586,
"learning_rate": 9.769776049884563e-05,
"loss": 0.444,
"step": 800
},
{
"epoch": 1.64,
"grad_norm": 8.382722854614258,
"learning_rate": 9.709221818197624e-05,
"loss": 0.4898,
"step": 900
},
{
"epoch": 1.82,
"grad_norm": 10.226556777954102,
"learning_rate": 9.641839665080363e-05,
"loss": 0.4433,
"step": 1000
},
{
"epoch": 2.0,
"grad_norm": 19.516849517822266,
"learning_rate": 9.567727288213005e-05,
"loss": 0.4565,
"step": 1100
},
{
"epoch": 2.0,
"eval_accuracy": 0.8763419483101391,
"eval_loss": 0.42688119411468506,
"eval_runtime": 71.6777,
"eval_samples_per_second": 35.088,
"eval_steps_per_second": 1.102,
"step": 1100
},
{
"epoch": 2.18,
"grad_norm": 13.549880027770996,
"learning_rate": 9.486992143456792e-05,
"loss": 0.3639,
"step": 1200
},
{
"epoch": 2.36,
"grad_norm": 8.744523048400879,
"learning_rate": 9.399751289053267e-05,
"loss": 0.3795,
"step": 1300
},
{
"epoch": 2.55,
"grad_norm": 5.8690056800842285,
"learning_rate": 9.306131215901003e-05,
"loss": 0.3665,
"step": 1400
},
{
"epoch": 2.73,
"grad_norm": 8.52180290222168,
"learning_rate": 9.206267664155907e-05,
"loss": 0.3741,
"step": 1500
},
{
"epoch": 2.91,
"grad_norm": 3.9724719524383545,
"learning_rate": 9.100305426420956e-05,
"loss": 0.3628,
"step": 1600
},
{
"epoch": 3.0,
"eval_accuracy": 0.9001988071570577,
"eval_loss": 0.3464488685131073,
"eval_runtime": 72.3767,
"eval_samples_per_second": 34.749,
"eval_steps_per_second": 1.092,
"step": 1650
},
{
"epoch": 3.09,
"grad_norm": 10.699442863464355,
"learning_rate": 8.988398137810777e-05,
"loss": 0.3206,
"step": 1700
},
{
"epoch": 3.27,
"grad_norm": 7.718528747558594,
"learning_rate": 8.870708053195413e-05,
"loss": 0.3157,
"step": 1800
},
{
"epoch": 3.45,
"grad_norm": 12.929231643676758,
"learning_rate": 8.74740581194627e-05,
"loss": 0.2883,
"step": 1900
},
{
"epoch": 3.64,
"grad_norm": 9.246111869812012,
"learning_rate": 8.618670190525352e-05,
"loss": 0.3361,
"step": 2000
},
{
"epoch": 3.82,
"grad_norm": 8.94174861907959,
"learning_rate": 8.484687843276469e-05,
"loss": 0.3051,
"step": 2100
},
{
"epoch": 4.0,
"grad_norm": 13.777535438537598,
"learning_rate": 8.345653031794292e-05,
"loss": 0.2915,
"step": 2200
},
{
"epoch": 4.0,
"eval_accuracy": 0.9065606361829026,
"eval_loss": 0.33660224080085754,
"eval_runtime": 72.2343,
"eval_samples_per_second": 34.817,
"eval_steps_per_second": 1.094,
"step": 2200
},
{
"epoch": 4.18,
"grad_norm": 10.229923248291016,
"learning_rate": 8.201767343263612e-05,
"loss": 0.2636,
"step": 2300
},
{
"epoch": 4.36,
"grad_norm": 6.8911027908325195,
"learning_rate": 8.053239398177191e-05,
"loss": 0.2446,
"step": 2400
},
{
"epoch": 4.55,
"grad_norm": 14.081520080566406,
"learning_rate": 7.900284547855991e-05,
"loss": 0.2407,
"step": 2500
},
{
"epoch": 4.73,
"grad_norm": 15.959187507629395,
"learning_rate": 7.74312456221035e-05,
"loss": 0.2645,
"step": 2600
},
{
"epoch": 4.91,
"grad_norm": 8.710769653320312,
"learning_rate": 7.58198730819481e-05,
"loss": 0.2655,
"step": 2700
},
{
"epoch": 5.0,
"eval_accuracy": 0.9053677932405566,
"eval_loss": 0.3387199640274048,
"eval_runtime": 71.8817,
"eval_samples_per_second": 34.988,
"eval_steps_per_second": 1.099,
"step": 2750
},
{
"epoch": 5.09,
"grad_norm": 8.720322608947754,
"learning_rate": 7.417106419422819e-05,
"loss": 0.2436,
"step": 2800
},
{
"epoch": 5.27,
"grad_norm": 14.12549114227295,
"learning_rate": 7.24872095742033e-05,
"loss": 0.2238,
"step": 2900
},
{
"epoch": 5.45,
"grad_norm": 8.974920272827148,
"learning_rate": 7.077075065009433e-05,
"loss": 0.2007,
"step": 3000
},
{
"epoch": 5.64,
"grad_norm": 3.3389041423797607,
"learning_rate": 6.902417612324615e-05,
"loss": 0.2043,
"step": 3100
},
{
"epoch": 5.82,
"grad_norm": 5.468075275421143,
"learning_rate": 6.725001835974853e-05,
"loss": 0.2399,
"step": 3200
},
{
"epoch": 6.0,
"grad_norm": 19.46337890625,
"learning_rate": 6.545084971874738e-05,
"loss": 0.2395,
"step": 3300
},
{
"epoch": 6.0,
"eval_accuracy": 0.9125248508946322,
"eval_loss": 0.33130431175231934,
"eval_runtime": 70.0346,
"eval_samples_per_second": 35.911,
"eval_steps_per_second": 1.128,
"step": 3300
},
{
"epoch": 6.18,
"grad_norm": 5.854804039001465,
"learning_rate": 6.36292788227699e-05,
"loss": 0.1906,
"step": 3400
},
{
"epoch": 6.36,
"grad_norm": 11.46949577331543,
"learning_rate": 6.178794677547137e-05,
"loss": 0.1857,
"step": 3500
},
{
"epoch": 6.55,
"grad_norm": 7.80468225479126,
"learning_rate": 5.992952333228728e-05,
"loss": 0.1863,
"step": 3600
},
{
"epoch": 6.73,
"grad_norm": 6.652174472808838,
"learning_rate": 5.805670302954321e-05,
"loss": 0.1743,
"step": 3700
},
{
"epoch": 6.91,
"grad_norm": 9.390819549560547,
"learning_rate": 5.617220127763474e-05,
"loss": 0.2065,
"step": 3800
},
{
"epoch": 7.0,
"eval_accuracy": 0.9180914512922466,
"eval_loss": 0.3119599223136902,
"eval_runtime": 74.2948,
"eval_samples_per_second": 33.852,
"eval_steps_per_second": 1.063,
"step": 3850
},
{
"epoch": 7.09,
"grad_norm": 7.0801544189453125,
"learning_rate": 5.427875042394199e-05,
"loss": 0.1487,
"step": 3900
},
{
"epoch": 7.27,
"grad_norm": 6.336465358734131,
"learning_rate": 5.2379095791187124e-05,
"loss": 0.1712,
"step": 4000
},
{
"epoch": 7.45,
"grad_norm": 6.033371448516846,
"learning_rate": 5.047599169697884e-05,
"loss": 0.1643,
"step": 4100
},
{
"epoch": 7.64,
"grad_norm": 13.526623725891113,
"learning_rate": 4.85721974603152e-05,
"loss": 0.1716,
"step": 4200
},
{
"epoch": 7.82,
"grad_norm": 13.018455505371094,
"learning_rate": 4.667047340083481e-05,
"loss": 0.159,
"step": 4300
},
{
"epoch": 8.0,
"grad_norm": 19.489803314208984,
"learning_rate": 4.477357683661734e-05,
"loss": 0.1503,
"step": 4400
},
{
"epoch": 8.0,
"eval_accuracy": 0.9220675944333996,
"eval_loss": 0.30650895833969116,
"eval_runtime": 70.0541,
"eval_samples_per_second": 35.901,
"eval_steps_per_second": 1.128,
"step": 4400
},
{
"epoch": 8.18,
"grad_norm": 12.573568344116211,
"learning_rate": 4.288425808633575e-05,
"loss": 0.1518,
"step": 4500
},
{
"epoch": 8.36,
"grad_norm": 13.930220603942871,
"learning_rate": 4.100525648155731e-05,
"loss": 0.1289,
"step": 4600
},
{
"epoch": 8.55,
"grad_norm": 5.025359153747559,
"learning_rate": 3.913929639497462e-05,
"loss": 0.1243,
"step": 4700
},
{
"epoch": 8.73,
"grad_norm": 10.068098068237305,
"learning_rate": 3.728908329032567e-05,
"loss": 0.1484,
"step": 4800
},
{
"epoch": 8.91,
"grad_norm": 7.712088584899902,
"learning_rate": 3.545729979973005e-05,
"loss": 0.1503,
"step": 4900
},
{
"epoch": 9.0,
"eval_accuracy": 0.927634194831014,
"eval_loss": 0.29477769136428833,
"eval_runtime": 72.6614,
"eval_samples_per_second": 34.613,
"eval_steps_per_second": 1.087,
"step": 4950
},
{
"epoch": 9.09,
"grad_norm": 9.499425888061523,
"learning_rate": 3.364660183412892e-05,
"loss": 0.117,
"step": 5000
},
{
"epoch": 9.27,
"grad_norm": 3.1358752250671387,
"learning_rate": 3.1859614732467954e-05,
"loss": 0.118,
"step": 5100
},
{
"epoch": 9.45,
"grad_norm": 8.783591270446777,
"learning_rate": 3.0098929455206904e-05,
"loss": 0.1064,
"step": 5200
},
{
"epoch": 9.64,
"grad_norm": 4.272777557373047,
"learning_rate": 2.8367098827674578e-05,
"loss": 0.1294,
"step": 5300
},
{
"epoch": 9.82,
"grad_norm": 5.206827163696289,
"learning_rate": 2.6666633838716314e-05,
"loss": 0.1452,
"step": 5400
},
{
"epoch": 10.0,
"grad_norm": 27.840124130249023,
"learning_rate": 2.500000000000001e-05,
"loss": 0.1125,
"step": 5500
},
{
"epoch": 10.0,
"eval_accuracy": 0.9304174950298211,
"eval_loss": 0.2917528450489044,
"eval_runtime": 71.1465,
"eval_samples_per_second": 35.35,
"eval_steps_per_second": 1.11,
"step": 5500
},
{
"epoch": 10.18,
"grad_norm": 10.059708595275879,
"learning_rate": 2.336961377126001e-05,
"loss": 0.1115,
"step": 5600
},
{
"epoch": 10.36,
"grad_norm": 3.956040143966675,
"learning_rate": 2.1777839056661554e-05,
"loss": 0.099,
"step": 5700
},
{
"epoch": 10.55,
"grad_norm": 4.310999870300293,
"learning_rate": 2.0226983777365604e-05,
"loss": 0.1056,
"step": 5800
},
{
"epoch": 10.73,
"grad_norm": 10.636781692504883,
"learning_rate": 1.8719296525263922e-05,
"loss": 0.0998,
"step": 5900
},
{
"epoch": 10.91,
"grad_norm": 8.48064136505127,
"learning_rate": 1.725696330273575e-05,
"loss": 0.1057,
"step": 6000
},
{
"epoch": 11.0,
"eval_accuracy": 0.932803180914513,
"eval_loss": 0.2953931987285614,
"eval_runtime": 71.8417,
"eval_samples_per_second": 35.008,
"eval_steps_per_second": 1.1,
"step": 6050
},
{
"epoch": 11.09,
"grad_norm": 1.377661943435669,
"learning_rate": 1.5842104353153287e-05,
"loss": 0.0873,
"step": 6100
},
{
"epoch": 11.27,
"grad_norm": 2.6207616329193115,
"learning_rate": 1.4476771086731567e-05,
"loss": 0.0913,
"step": 6200
},
{
"epoch": 11.45,
"grad_norm": 8.88305950164795,
"learning_rate": 1.3162943106179749e-05,
"loss": 0.0874,
"step": 6300
},
{
"epoch": 11.64,
"grad_norm": 7.829620361328125,
"learning_rate": 1.1902525336466464e-05,
"loss": 0.0901,
"step": 6400
},
{
"epoch": 11.82,
"grad_norm": 1.778537392616272,
"learning_rate": 1.0697345262860636e-05,
"loss": 0.0922,
"step": 6500
},
{
"epoch": 12.0,
"grad_norm": 3.688204526901245,
"learning_rate": 9.549150281252633e-06,
"loss": 0.0937,
"step": 6600
},
{
"epoch": 12.0,
"eval_accuracy": 0.9335984095427435,
"eval_loss": 0.2958705723285675,
"eval_runtime": 71.9587,
"eval_samples_per_second": 34.951,
"eval_steps_per_second": 1.098,
"step": 6600
},
{
"epoch": 12.18,
"grad_norm": 5.87827730178833,
"learning_rate": 8.459605164597267e-06,
"loss": 0.0973,
"step": 6700
},
{
"epoch": 12.36,
"grad_norm": 2.3431482315063477,
"learning_rate": 7.430289649152156e-06,
"loss": 0.0857,
"step": 6800
},
{
"epoch": 12.55,
"grad_norm": 1.375058650970459,
"learning_rate": 6.462696144011149e-06,
"loss": 0.0912,
"step": 6900
},
{
"epoch": 12.73,
"grad_norm": 5.9749979972839355,
"learning_rate": 5.558227567253832e-06,
"loss": 0.073,
"step": 7000
},
{
"epoch": 12.91,
"grad_norm": 8.656426429748535,
"learning_rate": 4.7181953118484556e-06,
"loss": 0.0966,
"step": 7100
},
{
"epoch": 13.0,
"eval_accuracy": 0.9351888667992048,
"eval_loss": 0.29395705461502075,
"eval_runtime": 71.5553,
"eval_samples_per_second": 35.148,
"eval_steps_per_second": 1.104,
"step": 7150
},
{
"epoch": 13.09,
"grad_norm": 6.880538463592529,
"learning_rate": 3.9438173442575e-06,
"loss": 0.0836,
"step": 7200
},
{
"epoch": 13.27,
"grad_norm": 3.5548784732818604,
"learning_rate": 3.2362164385026706e-06,
"loss": 0.1008,
"step": 7300
},
{
"epoch": 13.45,
"grad_norm": 6.498635292053223,
"learning_rate": 2.596418548250029e-06,
"loss": 0.0736,
"step": 7400
},
{
"epoch": 13.64,
"grad_norm": 2.343573808670044,
"learning_rate": 2.0253513192751373e-06,
"loss": 0.0719,
"step": 7500
},
{
"epoch": 13.82,
"grad_norm": 8.872209548950195,
"learning_rate": 1.523842744465437e-06,
"loss": 0.0879,
"step": 7600
},
{
"epoch": 14.0,
"grad_norm": 3.882363796234131,
"learning_rate": 1.0926199633097157e-06,
"loss": 0.0735,
"step": 7700
},
{
"epoch": 14.0,
"eval_accuracy": 0.9339960238568589,
"eval_loss": 0.2915794849395752,
"eval_runtime": 69.9779,
"eval_samples_per_second": 35.94,
"eval_steps_per_second": 1.129,
"step": 7700
},
{
"epoch": 14.18,
"grad_norm": 1.7305363416671753,
"learning_rate": 7.323082076153509e-07,
"loss": 0.0824,
"step": 7800
},
{
"epoch": 14.36,
"grad_norm": 10.12863826751709,
"learning_rate": 4.434298949819449e-07,
"loss": 0.0825,
"step": 7900
},
{
"epoch": 14.55,
"grad_norm": 0.9927307367324829,
"learning_rate": 2.2640387134577058e-07,
"loss": 0.0861,
"step": 8000
},
{
"epoch": 14.73,
"grad_norm": 7.997195720672607,
"learning_rate": 8.15448036932176e-08,
"loss": 0.0824,
"step": 8100
},
{
"epoch": 14.91,
"grad_norm": 7.195863246917725,
"learning_rate": 9.06272382371065e-09,
"loss": 0.0881,
"step": 8200
},
{
"epoch": 15.0,
"eval_accuracy": 0.9355864811133201,
"eval_loss": 0.2901732623577118,
"eval_runtime": 71.1315,
"eval_samples_per_second": 35.357,
"eval_steps_per_second": 1.111,
"step": 8250
},
{
"epoch": 15.0,
"step": 8250,
"total_flos": 1.952142618502398e+19,
"train_loss": 0.23986065309697932,
"train_runtime": 12740.4964,
"train_samples_per_second": 20.699,
"train_steps_per_second": 0.648
}
],
"logging_steps": 100,
"max_steps": 8250,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 1.952142618502398e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}