{ "best_metric": 0.33891761302948, "best_model_checkpoint": "finetuned-electrical-images/checkpoint-600", "epoch": 4.0, "eval_steps": 100, "global_step": 860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.046511627906976744, "grad_norm": 2.072422742843628, "learning_rate": 0.00019767441860465116, "loss": 1.5929, "step": 10 }, { "epoch": 0.09302325581395349, "grad_norm": 2.2486143112182617, "learning_rate": 0.00019534883720930232, "loss": 1.2056, "step": 20 }, { "epoch": 0.13953488372093023, "grad_norm": 2.1311681270599365, "learning_rate": 0.0001930232558139535, "loss": 1.0947, "step": 30 }, { "epoch": 0.18604651162790697, "grad_norm": 2.3108925819396973, "learning_rate": 0.00019069767441860466, "loss": 0.965, "step": 40 }, { "epoch": 0.23255813953488372, "grad_norm": 2.6261141300201416, "learning_rate": 0.00018837209302325584, "loss": 0.7767, "step": 50 }, { "epoch": 0.27906976744186046, "grad_norm": 4.883328914642334, "learning_rate": 0.000186046511627907, "loss": 0.8164, "step": 60 }, { "epoch": 0.32558139534883723, "grad_norm": 2.2796030044555664, "learning_rate": 0.00018372093023255815, "loss": 0.8293, "step": 70 }, { "epoch": 0.37209302325581395, "grad_norm": 2.7582902908325195, "learning_rate": 0.0001813953488372093, "loss": 0.7748, "step": 80 }, { "epoch": 0.4186046511627907, "grad_norm": 3.0205562114715576, "learning_rate": 0.00017906976744186048, "loss": 0.8204, "step": 90 }, { "epoch": 0.46511627906976744, "grad_norm": 1.5362632274627686, "learning_rate": 0.00017674418604651164, "loss": 0.7151, "step": 100 }, { "epoch": 0.46511627906976744, "eval_accuracy": 0.8201320132013201, "eval_loss": 0.5808877348899841, "eval_runtime": 7.6253, "eval_samples_per_second": 79.472, "eval_steps_per_second": 9.967, "step": 100 }, { "epoch": 0.5116279069767442, "grad_norm": 3.7735097408294678, "learning_rate": 0.0001744186046511628, "loss": 0.523, "step": 110 }, { "epoch": 0.5581395348837209, "grad_norm": 5.240425109863281, "learning_rate": 0.00017209302325581395, "loss": 0.5818, "step": 120 }, { "epoch": 0.6046511627906976, "grad_norm": 3.7434163093566895, "learning_rate": 0.0001697674418604651, "loss": 0.7063, "step": 130 }, { "epoch": 0.6511627906976745, "grad_norm": 1.7690590620040894, "learning_rate": 0.00016744186046511629, "loss": 0.7594, "step": 140 }, { "epoch": 0.6976744186046512, "grad_norm": 3.7053258419036865, "learning_rate": 0.00016511627906976747, "loss": 0.5997, "step": 150 }, { "epoch": 0.7441860465116279, "grad_norm": 4.870126247406006, "learning_rate": 0.00016279069767441862, "loss": 0.5614, "step": 160 }, { "epoch": 0.7906976744186046, "grad_norm": 2.533661365509033, "learning_rate": 0.00016046511627906978, "loss": 0.4965, "step": 170 }, { "epoch": 0.8372093023255814, "grad_norm": 3.910142660140991, "learning_rate": 0.00015813953488372093, "loss": 0.5525, "step": 180 }, { "epoch": 0.8837209302325582, "grad_norm": 3.5800535678863525, "learning_rate": 0.0001558139534883721, "loss": 0.5458, "step": 190 }, { "epoch": 0.9302325581395349, "grad_norm": 1.315317153930664, "learning_rate": 0.00015348837209302327, "loss": 0.6882, "step": 200 }, { "epoch": 0.9302325581395349, "eval_accuracy": 0.8498349834983498, "eval_loss": 0.4638592302799225, "eval_runtime": 7.9986, "eval_samples_per_second": 75.764, "eval_steps_per_second": 9.502, "step": 200 }, { "epoch": 0.9767441860465116, "grad_norm": 3.7220730781555176, "learning_rate": 0.00015116279069767442, "loss": 0.5207, "step": 210 }, { "epoch": 1.0232558139534884, "grad_norm": 4.103209018707275, "learning_rate": 0.00014883720930232558, "loss": 0.5282, "step": 220 }, { "epoch": 1.069767441860465, "grad_norm": 2.3725953102111816, "learning_rate": 0.00014651162790697673, "loss": 0.4849, "step": 230 }, { "epoch": 1.1162790697674418, "grad_norm": 2.339578151702881, "learning_rate": 0.00014418604651162791, "loss": 0.3707, "step": 240 }, { "epoch": 1.1627906976744187, "grad_norm": 2.8100476264953613, "learning_rate": 0.0001418604651162791, "loss": 0.3821, "step": 250 }, { "epoch": 1.2093023255813953, "grad_norm": 2.1530966758728027, "learning_rate": 0.00013953488372093025, "loss": 0.4797, "step": 260 }, { "epoch": 1.255813953488372, "grad_norm": 1.164758324623108, "learning_rate": 0.0001372093023255814, "loss": 0.4341, "step": 270 }, { "epoch": 1.302325581395349, "grad_norm": 1.5009866952896118, "learning_rate": 0.00013488372093023256, "loss": 0.4527, "step": 280 }, { "epoch": 1.3488372093023255, "grad_norm": 2.4176268577575684, "learning_rate": 0.00013255813953488372, "loss": 0.3878, "step": 290 }, { "epoch": 1.3953488372093024, "grad_norm": 4.717296123504639, "learning_rate": 0.0001302325581395349, "loss": 0.3897, "step": 300 }, { "epoch": 1.3953488372093024, "eval_accuracy": 0.8465346534653465, "eval_loss": 0.47040635347366333, "eval_runtime": 8.212, "eval_samples_per_second": 73.794, "eval_steps_per_second": 9.255, "step": 300 }, { "epoch": 1.441860465116279, "grad_norm": 1.026237964630127, "learning_rate": 0.00012790697674418605, "loss": 0.3683, "step": 310 }, { "epoch": 1.4883720930232558, "grad_norm": 2.894584894180298, "learning_rate": 0.0001255813953488372, "loss": 0.4968, "step": 320 }, { "epoch": 1.5348837209302326, "grad_norm": 1.6250619888305664, "learning_rate": 0.00012325581395348836, "loss": 0.4784, "step": 330 }, { "epoch": 1.5813953488372094, "grad_norm": 2.221461296081543, "learning_rate": 0.00012093023255813953, "loss": 0.5513, "step": 340 }, { "epoch": 1.627906976744186, "grad_norm": 6.982600688934326, "learning_rate": 0.00011860465116279071, "loss": 0.5509, "step": 350 }, { "epoch": 1.6744186046511627, "grad_norm": 2.3711423873901367, "learning_rate": 0.00011627906976744187, "loss": 0.4542, "step": 360 }, { "epoch": 1.7209302325581395, "grad_norm": 2.340607166290283, "learning_rate": 0.00011395348837209304, "loss": 0.3822, "step": 370 }, { "epoch": 1.7674418604651163, "grad_norm": 3.709766387939453, "learning_rate": 0.00011162790697674419, "loss": 0.4252, "step": 380 }, { "epoch": 1.8139534883720931, "grad_norm": 3.5805418491363525, "learning_rate": 0.00010930232558139534, "loss": 0.6467, "step": 390 }, { "epoch": 1.8604651162790697, "grad_norm": 2.1463587284088135, "learning_rate": 0.00010697674418604651, "loss": 0.4909, "step": 400 }, { "epoch": 1.8604651162790697, "eval_accuracy": 0.8448844884488449, "eval_loss": 0.5023446083068848, "eval_runtime": 8.1791, "eval_samples_per_second": 74.091, "eval_steps_per_second": 9.292, "step": 400 }, { "epoch": 1.9069767441860463, "grad_norm": 3.8787500858306885, "learning_rate": 0.00010465116279069768, "loss": 0.485, "step": 410 }, { "epoch": 1.9534883720930232, "grad_norm": 3.9055089950561523, "learning_rate": 0.00010232558139534885, "loss": 0.4737, "step": 420 }, { "epoch": 2.0, "grad_norm": 1.440263032913208, "learning_rate": 0.0001, "loss": 0.35, "step": 430 }, { "epoch": 2.046511627906977, "grad_norm": 1.907047986984253, "learning_rate": 9.767441860465116e-05, "loss": 0.3646, "step": 440 }, { "epoch": 2.0930232558139537, "grad_norm": 2.902924060821533, "learning_rate": 9.534883720930233e-05, "loss": 0.3787, "step": 450 }, { "epoch": 2.13953488372093, "grad_norm": 2.807384729385376, "learning_rate": 9.30232558139535e-05, "loss": 0.3122, "step": 460 }, { "epoch": 2.186046511627907, "grad_norm": 1.7866243124008179, "learning_rate": 9.069767441860465e-05, "loss": 0.3379, "step": 470 }, { "epoch": 2.2325581395348837, "grad_norm": 6.933365821838379, "learning_rate": 8.837209302325582e-05, "loss": 0.3066, "step": 480 }, { "epoch": 2.2790697674418605, "grad_norm": 5.515610694885254, "learning_rate": 8.604651162790697e-05, "loss": 0.2632, "step": 490 }, { "epoch": 2.3255813953488373, "grad_norm": 4.792200088500977, "learning_rate": 8.372093023255814e-05, "loss": 0.2836, "step": 500 }, { "epoch": 2.3255813953488373, "eval_accuracy": 0.8745874587458746, "eval_loss": 0.41001710295677185, "eval_runtime": 8.2547, "eval_samples_per_second": 73.413, "eval_steps_per_second": 9.207, "step": 500 }, { "epoch": 2.3720930232558137, "grad_norm": 4.973999977111816, "learning_rate": 8.139534883720931e-05, "loss": 0.3589, "step": 510 }, { "epoch": 2.4186046511627906, "grad_norm": 2.9822804927825928, "learning_rate": 7.906976744186047e-05, "loss": 0.2937, "step": 520 }, { "epoch": 2.4651162790697674, "grad_norm": 3.735166549682617, "learning_rate": 7.674418604651163e-05, "loss": 0.3345, "step": 530 }, { "epoch": 2.511627906976744, "grad_norm": 3.042361259460449, "learning_rate": 7.441860465116279e-05, "loss": 0.3717, "step": 540 }, { "epoch": 2.558139534883721, "grad_norm": 2.4927892684936523, "learning_rate": 7.209302325581396e-05, "loss": 0.249, "step": 550 }, { "epoch": 2.604651162790698, "grad_norm": 1.5524264574050903, "learning_rate": 6.976744186046513e-05, "loss": 0.3304, "step": 560 }, { "epoch": 2.6511627906976747, "grad_norm": 0.39165279269218445, "learning_rate": 6.744186046511628e-05, "loss": 0.285, "step": 570 }, { "epoch": 2.697674418604651, "grad_norm": 1.6114171743392944, "learning_rate": 6.511627906976745e-05, "loss": 0.3908, "step": 580 }, { "epoch": 2.744186046511628, "grad_norm": 2.375959634780884, "learning_rate": 6.27906976744186e-05, "loss": 0.2845, "step": 590 }, { "epoch": 2.7906976744186047, "grad_norm": 3.077956199645996, "learning_rate": 6.0465116279069765e-05, "loss": 0.2669, "step": 600 }, { "epoch": 2.7906976744186047, "eval_accuracy": 0.8993399339933993, "eval_loss": 0.33891761302948, "eval_runtime": 8.0736, "eval_samples_per_second": 75.059, "eval_steps_per_second": 9.413, "step": 600 }, { "epoch": 2.8372093023255816, "grad_norm": 1.2785547971725464, "learning_rate": 5.8139534883720933e-05, "loss": 0.2499, "step": 610 }, { "epoch": 2.883720930232558, "grad_norm": 2.2260217666625977, "learning_rate": 5.5813953488372095e-05, "loss": 0.2065, "step": 620 }, { "epoch": 2.9302325581395348, "grad_norm": 2.7635715007781982, "learning_rate": 5.348837209302326e-05, "loss": 0.3334, "step": 630 }, { "epoch": 2.9767441860465116, "grad_norm": 3.221409797668457, "learning_rate": 5.1162790697674425e-05, "loss": 0.2453, "step": 640 }, { "epoch": 3.0232558139534884, "grad_norm": 0.77796870470047, "learning_rate": 4.883720930232558e-05, "loss": 0.2191, "step": 650 }, { "epoch": 3.0697674418604652, "grad_norm": 1.0451290607452393, "learning_rate": 4.651162790697675e-05, "loss": 0.2745, "step": 660 }, { "epoch": 3.116279069767442, "grad_norm": 4.356563091278076, "learning_rate": 4.418604651162791e-05, "loss": 0.2399, "step": 670 }, { "epoch": 3.1627906976744184, "grad_norm": 2.47353458404541, "learning_rate": 4.186046511627907e-05, "loss": 0.3016, "step": 680 }, { "epoch": 3.2093023255813953, "grad_norm": 1.1897259950637817, "learning_rate": 3.953488372093023e-05, "loss": 0.1716, "step": 690 }, { "epoch": 3.255813953488372, "grad_norm": 2.9624576568603516, "learning_rate": 3.7209302325581394e-05, "loss": 0.2304, "step": 700 }, { "epoch": 3.255813953488372, "eval_accuracy": 0.8927392739273927, "eval_loss": 0.36686915159225464, "eval_runtime": 8.0401, "eval_samples_per_second": 75.372, "eval_steps_per_second": 9.453, "step": 700 }, { "epoch": 3.302325581395349, "grad_norm": 0.4115903675556183, "learning_rate": 3.488372093023256e-05, "loss": 0.2835, "step": 710 }, { "epoch": 3.3488372093023258, "grad_norm": 3.0008704662323, "learning_rate": 3.2558139534883724e-05, "loss": 0.1414, "step": 720 }, { "epoch": 3.395348837209302, "grad_norm": 3.6043615341186523, "learning_rate": 3.0232558139534883e-05, "loss": 0.2309, "step": 730 }, { "epoch": 3.441860465116279, "grad_norm": 1.3581503629684448, "learning_rate": 2.7906976744186048e-05, "loss": 0.3342, "step": 740 }, { "epoch": 3.488372093023256, "grad_norm": 1.7710747718811035, "learning_rate": 2.5581395348837212e-05, "loss": 0.2326, "step": 750 }, { "epoch": 3.5348837209302326, "grad_norm": 3.192469835281372, "learning_rate": 2.3255813953488374e-05, "loss": 0.1234, "step": 760 }, { "epoch": 3.5813953488372094, "grad_norm": 0.3328302800655365, "learning_rate": 2.0930232558139536e-05, "loss": 0.228, "step": 770 }, { "epoch": 3.6279069767441863, "grad_norm": 2.4526288509368896, "learning_rate": 1.8604651162790697e-05, "loss": 0.1601, "step": 780 }, { "epoch": 3.6744186046511627, "grad_norm": 1.8664888143539429, "learning_rate": 1.6279069767441862e-05, "loss": 0.2098, "step": 790 }, { "epoch": 3.7209302325581395, "grad_norm": 5.262502193450928, "learning_rate": 1.3953488372093024e-05, "loss": 0.1523, "step": 800 }, { "epoch": 3.7209302325581395, "eval_accuracy": 0.8960396039603961, "eval_loss": 0.36768776178359985, "eval_runtime": 8.208, "eval_samples_per_second": 73.83, "eval_steps_per_second": 9.259, "step": 800 }, { "epoch": 3.7674418604651163, "grad_norm": 6.241558074951172, "learning_rate": 1.1627906976744187e-05, "loss": 0.1313, "step": 810 }, { "epoch": 3.813953488372093, "grad_norm": 2.1938974857330322, "learning_rate": 9.302325581395349e-06, "loss": 0.1505, "step": 820 }, { "epoch": 3.8604651162790695, "grad_norm": 2.1302292346954346, "learning_rate": 6.976744186046512e-06, "loss": 0.2306, "step": 830 }, { "epoch": 3.9069767441860463, "grad_norm": 10.263894081115723, "learning_rate": 4.651162790697674e-06, "loss": 0.2138, "step": 840 }, { "epoch": 3.953488372093023, "grad_norm": 6.005746364593506, "learning_rate": 2.325581395348837e-06, "loss": 0.1595, "step": 850 }, { "epoch": 4.0, "grad_norm": 3.945340156555176, "learning_rate": 0.0, "loss": 0.2154, "step": 860 }, { "epoch": 4.0, "step": 860, "total_flos": 1.0638481718004941e+18, "train_loss": 0.4333847167880036, "train_runtime": 591.3634, "train_samples_per_second": 23.214, "train_steps_per_second": 1.454 } ], "logging_steps": 10, "max_steps": 860, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "total_flos": 1.0638481718004941e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }