{ "best_metric": 0.8272958397865295, "best_model_checkpoint": "./vit-base-renovation2/checkpoint-250", "epoch": 4.0, "eval_steps": 25, "global_step": 496, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 1.815894365310669, "learning_rate": 0.00019596774193548388, "loss": 1.5846, "step": 10 }, { "epoch": 0.16, "grad_norm": 2.0016415119171143, "learning_rate": 0.00019193548387096775, "loss": 1.359, "step": 20 }, { "epoch": 0.2, "eval_accuracy": 0.4657534246575342, "eval_loss": 1.2074170112609863, "eval_runtime": 7.6692, "eval_samples_per_second": 28.556, "eval_steps_per_second": 3.651, "step": 25 }, { "epoch": 0.24, "grad_norm": 1.6700384616851807, "learning_rate": 0.00018790322580645164, "loss": 1.182, "step": 30 }, { "epoch": 0.32, "grad_norm": 2.4473767280578613, "learning_rate": 0.00018387096774193548, "loss": 1.1559, "step": 40 }, { "epoch": 0.4, "grad_norm": 2.757627010345459, "learning_rate": 0.00017983870967741935, "loss": 1.1384, "step": 50 }, { "epoch": 0.4, "eval_accuracy": 0.5205479452054794, "eval_loss": 1.1213266849517822, "eval_runtime": 8.2359, "eval_samples_per_second": 26.591, "eval_steps_per_second": 3.4, "step": 50 }, { "epoch": 0.48, "grad_norm": 1.8923784494400024, "learning_rate": 0.00017580645161290325, "loss": 1.147, "step": 60 }, { "epoch": 0.56, "grad_norm": 2.0101828575134277, "learning_rate": 0.00017177419354838711, "loss": 1.0866, "step": 70 }, { "epoch": 0.6, "eval_accuracy": 0.6301369863013698, "eval_loss": 0.9746416807174683, "eval_runtime": 8.0785, "eval_samples_per_second": 27.109, "eval_steps_per_second": 3.466, "step": 75 }, { "epoch": 0.65, "grad_norm": 3.479001522064209, "learning_rate": 0.00016774193548387098, "loss": 0.875, "step": 80 }, { "epoch": 0.73, "grad_norm": 3.4011082649230957, "learning_rate": 0.00016370967741935485, "loss": 1.1928, "step": 90 }, { "epoch": 0.81, "grad_norm": 2.3756794929504395, "learning_rate": 0.00015967741935483872, "loss": 1.1787, "step": 100 }, { "epoch": 0.81, "eval_accuracy": 0.5662100456621004, "eval_loss": 1.0523391962051392, "eval_runtime": 9.1559, "eval_samples_per_second": 23.919, "eval_steps_per_second": 3.058, "step": 100 }, { "epoch": 0.89, "grad_norm": 1.5179095268249512, "learning_rate": 0.0001556451612903226, "loss": 0.9827, "step": 110 }, { "epoch": 0.97, "grad_norm": 2.3550424575805664, "learning_rate": 0.00015161290322580646, "loss": 0.9242, "step": 120 }, { "epoch": 1.01, "eval_accuracy": 0.6255707762557078, "eval_loss": 0.9543189406394958, "eval_runtime": 7.3462, "eval_samples_per_second": 29.812, "eval_steps_per_second": 3.812, "step": 125 }, { "epoch": 1.05, "grad_norm": 1.8633378744125366, "learning_rate": 0.00014758064516129032, "loss": 0.8227, "step": 130 }, { "epoch": 1.13, "grad_norm": 2.0424883365631104, "learning_rate": 0.00014354838709677422, "loss": 0.747, "step": 140 }, { "epoch": 1.21, "grad_norm": 2.2865993976593018, "learning_rate": 0.0001395161290322581, "loss": 0.7945, "step": 150 }, { "epoch": 1.21, "eval_accuracy": 0.6118721461187214, "eval_loss": 0.9199973940849304, "eval_runtime": 7.7884, "eval_samples_per_second": 28.119, "eval_steps_per_second": 3.595, "step": 150 }, { "epoch": 1.29, "grad_norm": 1.884401798248291, "learning_rate": 0.00013548387096774193, "loss": 0.9408, "step": 160 }, { "epoch": 1.37, "grad_norm": 2.085380792617798, "learning_rate": 0.0001314516129032258, "loss": 0.8379, "step": 170 }, { "epoch": 1.41, "eval_accuracy": 0.6712328767123288, "eval_loss": 0.8447285294532776, "eval_runtime": 7.8794, "eval_samples_per_second": 27.794, "eval_steps_per_second": 3.554, "step": 175 }, { "epoch": 1.45, "grad_norm": 2.1437325477600098, "learning_rate": 0.0001274193548387097, "loss": 0.8348, "step": 180 }, { "epoch": 1.53, "grad_norm": 1.6616462469100952, "learning_rate": 0.00012338709677419356, "loss": 0.6521, "step": 190 }, { "epoch": 1.61, "grad_norm": 2.3329973220825195, "learning_rate": 0.00011935483870967743, "loss": 0.7253, "step": 200 }, { "epoch": 1.61, "eval_accuracy": 0.6575342465753424, "eval_loss": 0.8642416596412659, "eval_runtime": 7.2392, "eval_samples_per_second": 30.252, "eval_steps_per_second": 3.868, "step": 200 }, { "epoch": 1.69, "grad_norm": 3.4008474349975586, "learning_rate": 0.00011532258064516131, "loss": 0.6683, "step": 210 }, { "epoch": 1.77, "grad_norm": 2.7154157161712646, "learning_rate": 0.00011129032258064515, "loss": 0.6344, "step": 220 }, { "epoch": 1.81, "eval_accuracy": 0.6438356164383562, "eval_loss": 0.8443467020988464, "eval_runtime": 7.7267, "eval_samples_per_second": 28.343, "eval_steps_per_second": 3.624, "step": 225 }, { "epoch": 1.85, "grad_norm": 2.2653236389160156, "learning_rate": 0.00010725806451612903, "loss": 0.768, "step": 230 }, { "epoch": 1.94, "grad_norm": 2.3153140544891357, "learning_rate": 0.0001032258064516129, "loss": 0.8194, "step": 240 }, { "epoch": 2.02, "grad_norm": 1.3728018999099731, "learning_rate": 9.919354838709678e-05, "loss": 0.6521, "step": 250 }, { "epoch": 2.02, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8272958397865295, "eval_runtime": 7.9935, "eval_samples_per_second": 27.397, "eval_steps_per_second": 3.503, "step": 250 }, { "epoch": 2.1, "grad_norm": 1.0636628866195679, "learning_rate": 9.516129032258065e-05, "loss": 0.3979, "step": 260 }, { "epoch": 2.18, "grad_norm": 1.3106963634490967, "learning_rate": 9.112903225806452e-05, "loss": 0.3627, "step": 270 }, { "epoch": 2.22, "eval_accuracy": 0.6712328767123288, "eval_loss": 0.865295946598053, "eval_runtime": 7.9627, "eval_samples_per_second": 27.503, "eval_steps_per_second": 3.516, "step": 275 }, { "epoch": 2.26, "grad_norm": 1.7618376016616821, "learning_rate": 8.709677419354839e-05, "loss": 0.3624, "step": 280 }, { "epoch": 2.34, "grad_norm": 1.3475382328033447, "learning_rate": 8.306451612903227e-05, "loss": 0.4034, "step": 290 }, { "epoch": 2.42, "grad_norm": 3.0443389415740967, "learning_rate": 7.903225806451613e-05, "loss": 0.2523, "step": 300 }, { "epoch": 2.42, "eval_accuracy": 0.6894977168949772, "eval_loss": 0.874794065952301, "eval_runtime": 7.2976, "eval_samples_per_second": 30.01, "eval_steps_per_second": 3.837, "step": 300 }, { "epoch": 2.5, "grad_norm": 2.961167812347412, "learning_rate": 7.500000000000001e-05, "loss": 0.3129, "step": 310 }, { "epoch": 2.58, "grad_norm": 1.5659066438674927, "learning_rate": 7.096774193548388e-05, "loss": 0.363, "step": 320 }, { "epoch": 2.62, "eval_accuracy": 0.684931506849315, "eval_loss": 0.8407436609268188, "eval_runtime": 7.6113, "eval_samples_per_second": 28.773, "eval_steps_per_second": 3.679, "step": 325 }, { "epoch": 2.66, "grad_norm": 1.1653140783309937, "learning_rate": 6.733870967741935e-05, "loss": 0.3924, "step": 330 }, { "epoch": 2.74, "grad_norm": 3.533656358718872, "learning_rate": 6.330645161290322e-05, "loss": 0.3786, "step": 340 }, { "epoch": 2.82, "grad_norm": 3.4866058826446533, "learning_rate": 5.9274193548387104e-05, "loss": 0.3433, "step": 350 }, { "epoch": 2.82, "eval_accuracy": 0.6484018264840182, "eval_loss": 0.969610333442688, "eval_runtime": 8.4636, "eval_samples_per_second": 25.875, "eval_steps_per_second": 3.308, "step": 350 }, { "epoch": 2.9, "grad_norm": 0.8572694659233093, "learning_rate": 5.5241935483870966e-05, "loss": 0.3991, "step": 360 }, { "epoch": 2.98, "grad_norm": 2.2343902587890625, "learning_rate": 5.120967741935484e-05, "loss": 0.2874, "step": 370 }, { "epoch": 3.02, "eval_accuracy": 0.680365296803653, "eval_loss": 0.9290387034416199, "eval_runtime": 7.9323, "eval_samples_per_second": 27.608, "eval_steps_per_second": 3.53, "step": 375 }, { "epoch": 3.06, "grad_norm": 0.40827465057373047, "learning_rate": 4.7177419354838716e-05, "loss": 0.1757, "step": 380 }, { "epoch": 3.15, "grad_norm": 0.28673678636550903, "learning_rate": 4.3145161290322584e-05, "loss": 0.122, "step": 390 }, { "epoch": 3.23, "grad_norm": 1.3551135063171387, "learning_rate": 3.911290322580645e-05, "loss": 0.1682, "step": 400 }, { "epoch": 3.23, "eval_accuracy": 0.6575342465753424, "eval_loss": 0.9713302254676819, "eval_runtime": 7.4128, "eval_samples_per_second": 29.543, "eval_steps_per_second": 3.777, "step": 400 }, { "epoch": 3.31, "grad_norm": 0.3541167676448822, "learning_rate": 3.508064516129033e-05, "loss": 0.1371, "step": 410 }, { "epoch": 3.39, "grad_norm": 1.2407793998718262, "learning_rate": 3.1048387096774195e-05, "loss": 0.1575, "step": 420 }, { "epoch": 3.43, "eval_accuracy": 0.680365296803653, "eval_loss": 0.9963440299034119, "eval_runtime": 7.3009, "eval_samples_per_second": 29.996, "eval_steps_per_second": 3.835, "step": 425 }, { "epoch": 3.47, "grad_norm": 2.0023341178894043, "learning_rate": 2.7016129032258064e-05, "loss": 0.1432, "step": 430 }, { "epoch": 3.55, "grad_norm": 0.3904053568840027, "learning_rate": 2.2983870967741935e-05, "loss": 0.1827, "step": 440 }, { "epoch": 3.63, "grad_norm": 0.42743560671806335, "learning_rate": 1.8951612903225807e-05, "loss": 0.0822, "step": 450 }, { "epoch": 3.63, "eval_accuracy": 0.7123287671232876, "eval_loss": 0.9472664594650269, "eval_runtime": 7.4585, "eval_samples_per_second": 29.362, "eval_steps_per_second": 3.754, "step": 450 }, { "epoch": 3.71, "grad_norm": 0.7994292974472046, "learning_rate": 1.4919354838709679e-05, "loss": 0.133, "step": 460 }, { "epoch": 3.79, "grad_norm": 0.4317198097705841, "learning_rate": 1.0887096774193549e-05, "loss": 0.1678, "step": 470 }, { "epoch": 3.83, "eval_accuracy": 0.7031963470319634, "eval_loss": 0.9787965416908264, "eval_runtime": 7.434, "eval_samples_per_second": 29.459, "eval_steps_per_second": 3.766, "step": 475 }, { "epoch": 3.87, "grad_norm": 0.2247464805841446, "learning_rate": 6.854838709677419e-06, "loss": 0.148, "step": 480 }, { "epoch": 3.95, "grad_norm": 0.28201037645339966, "learning_rate": 2.82258064516129e-06, "loss": 0.1406, "step": 490 }, { "epoch": 4.0, "step": 496, "total_flos": 6.10974224738132e+17, "train_loss": 0.601069396421794, "train_runtime": 693.9158, "train_samples_per_second": 11.362, "train_steps_per_second": 0.715 } ], "logging_steps": 10, "max_steps": 496, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "total_flos": 6.10974224738132e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }