{ "best_metric": 0.9918838103374626, "best_model_checkpoint": "resnet-50-finetuned-dog-vs-cat/checkpoint-329", "epoch": 2.986342943854325, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "grad_norm": 0.5951987504959106, "learning_rate": 1e-05, "loss": 0.693, "step": 10 }, { "epoch": 0.12, "grad_norm": 0.743243932723999, "learning_rate": 2e-05, "loss": 0.6912, "step": 20 }, { "epoch": 0.18, "grad_norm": 0.6900031566619873, "learning_rate": 3e-05, "loss": 0.6871, "step": 30 }, { "epoch": 0.24, "grad_norm": 0.7302324175834656, "learning_rate": 4e-05, "loss": 0.68, "step": 40 }, { "epoch": 0.3, "grad_norm": 0.9748005270957947, "learning_rate": 5e-05, "loss": 0.6664, "step": 50 }, { "epoch": 0.36, "grad_norm": 1.131984829902649, "learning_rate": 4.8868778280542986e-05, "loss": 0.6408, "step": 60 }, { "epoch": 0.42, "grad_norm": 1.0235670804977417, "learning_rate": 4.7737556561085976e-05, "loss": 0.617, "step": 70 }, { "epoch": 0.49, "grad_norm": 1.1163190603256226, "learning_rate": 4.660633484162896e-05, "loss": 0.5807, "step": 80 }, { "epoch": 0.55, "grad_norm": 1.282690405845642, "learning_rate": 4.547511312217195e-05, "loss": 0.5438, "step": 90 }, { "epoch": 0.61, "grad_norm": 1.068588137626648, "learning_rate": 4.434389140271493e-05, "loss": 0.502, "step": 100 }, { "epoch": 0.67, "grad_norm": 0.9256152510643005, "learning_rate": 4.321266968325792e-05, "loss": 0.4701, "step": 110 }, { "epoch": 0.73, "grad_norm": 0.9594171643257141, "learning_rate": 4.2081447963800907e-05, "loss": 0.447, "step": 120 }, { "epoch": 0.79, "grad_norm": 1.0906646251678467, "learning_rate": 4.095022624434389e-05, "loss": 0.4238, "step": 130 }, { "epoch": 0.85, "grad_norm": 0.9434269070625305, "learning_rate": 3.981900452488688e-05, "loss": 0.4026, "step": 140 }, { "epoch": 0.91, "grad_norm": 1.15977144241333, "learning_rate": 3.868778280542987e-05, "loss": 0.3807, "step": 150 }, { "epoch": 0.97, "grad_norm": 0.9563286304473877, "learning_rate": 3.7556561085972854e-05, "loss": 0.3357, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.9867577958137548, "eval_loss": 0.22545380890369415, "eval_runtime": 27.2631, "eval_samples_per_second": 85.867, "eval_steps_per_second": 2.714, "step": 164 }, { "epoch": 1.03, "grad_norm": 1.0790256261825562, "learning_rate": 3.642533936651584e-05, "loss": 0.3166, "step": 170 }, { "epoch": 1.09, "grad_norm": 0.9720374345779419, "learning_rate": 3.529411764705883e-05, "loss": 0.2961, "step": 180 }, { "epoch": 1.15, "grad_norm": 1.2068029642105103, "learning_rate": 3.416289592760181e-05, "loss": 0.2753, "step": 190 }, { "epoch": 1.21, "grad_norm": 1.0407794713974, "learning_rate": 3.3031674208144794e-05, "loss": 0.2631, "step": 200 }, { "epoch": 1.27, "grad_norm": 0.9938662052154541, "learning_rate": 3.1900452488687784e-05, "loss": 0.2427, "step": 210 }, { "epoch": 1.34, "grad_norm": 0.8150984048843384, "learning_rate": 3.0769230769230774e-05, "loss": 0.2163, "step": 220 }, { "epoch": 1.4, "grad_norm": 0.7988576889038086, "learning_rate": 2.9638009049773758e-05, "loss": 0.216, "step": 230 }, { "epoch": 1.46, "grad_norm": 1.1607186794281006, "learning_rate": 2.850678733031674e-05, "loss": 0.2086, "step": 240 }, { "epoch": 1.52, "grad_norm": 1.0582228899002075, "learning_rate": 2.737556561085973e-05, "loss": 0.1857, "step": 250 }, { "epoch": 1.58, "grad_norm": 0.8427866697311401, "learning_rate": 2.6244343891402718e-05, "loss": 0.1808, "step": 260 }, { "epoch": 1.64, "grad_norm": 0.8713784217834473, "learning_rate": 2.51131221719457e-05, "loss": 0.1689, "step": 270 }, { "epoch": 1.7, "grad_norm": 0.8434356451034546, "learning_rate": 2.3981900452488688e-05, "loss": 0.1732, "step": 280 }, { "epoch": 1.76, "grad_norm": 0.7676334977149963, "learning_rate": 2.2850678733031675e-05, "loss": 0.1678, "step": 290 }, { "epoch": 1.82, "grad_norm": 0.6907545328140259, "learning_rate": 2.1719457013574662e-05, "loss": 0.1624, "step": 300 }, { "epoch": 1.88, "grad_norm": 0.7248879075050354, "learning_rate": 2.058823529411765e-05, "loss": 0.1613, "step": 310 }, { "epoch": 1.94, "grad_norm": 1.4968321323394775, "learning_rate": 1.9457013574660635e-05, "loss": 0.1683, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.9918838103374626, "eval_loss": 0.05769222229719162, "eval_runtime": 27.9863, "eval_samples_per_second": 83.648, "eval_steps_per_second": 2.644, "step": 329 }, { "epoch": 2.0, "grad_norm": 1.3721855878829956, "learning_rate": 1.832579185520362e-05, "loss": 0.1635, "step": 330 }, { "epoch": 2.06, "grad_norm": 1.0472960472106934, "learning_rate": 1.7194570135746606e-05, "loss": 0.1576, "step": 340 }, { "epoch": 2.12, "grad_norm": 0.9706544280052185, "learning_rate": 1.6063348416289596e-05, "loss": 0.157, "step": 350 }, { "epoch": 2.19, "grad_norm": 0.6168745756149292, "learning_rate": 1.493212669683258e-05, "loss": 0.1535, "step": 360 }, { "epoch": 2.25, "grad_norm": 2.201444149017334, "learning_rate": 1.3800904977375568e-05, "loss": 0.1494, "step": 370 }, { "epoch": 2.31, "grad_norm": 0.6486875414848328, "learning_rate": 1.2669683257918553e-05, "loss": 0.1502, "step": 380 }, { "epoch": 2.37, "grad_norm": 0.7056523561477661, "learning_rate": 1.153846153846154e-05, "loss": 0.1446, "step": 390 }, { "epoch": 2.43, "grad_norm": 1.3912076950073242, "learning_rate": 1.0407239819004526e-05, "loss": 0.1428, "step": 400 }, { "epoch": 2.49, "grad_norm": 1.0273276567459106, "learning_rate": 9.276018099547511e-06, "loss": 0.144, "step": 410 }, { "epoch": 2.55, "grad_norm": 1.1831059455871582, "learning_rate": 8.144796380090498e-06, "loss": 0.1572, "step": 420 }, { "epoch": 2.61, "grad_norm": 1.5577871799468994, "learning_rate": 7.013574660633485e-06, "loss": 0.1419, "step": 430 }, { "epoch": 2.67, "grad_norm": 1.2978945970535278, "learning_rate": 5.882352941176471e-06, "loss": 0.1562, "step": 440 }, { "epoch": 2.73, "grad_norm": 0.5621709823608398, "learning_rate": 4.751131221719457e-06, "loss": 0.1352, "step": 450 }, { "epoch": 2.79, "grad_norm": 1.0484684705734253, "learning_rate": 3.619909502262444e-06, "loss": 0.143, "step": 460 }, { "epoch": 2.85, "grad_norm": 0.9431272149085999, "learning_rate": 2.48868778280543e-06, "loss": 0.1517, "step": 470 }, { "epoch": 2.91, "grad_norm": 0.8055468201637268, "learning_rate": 1.3574660633484164e-06, "loss": 0.1455, "step": 480 }, { "epoch": 2.97, "grad_norm": 1.1355047225952148, "learning_rate": 2.2624434389140275e-07, "loss": 0.1448, "step": 490 }, { "epoch": 2.99, "eval_accuracy": 0.9918838103374626, "eval_loss": 0.04604041948914528, "eval_runtime": 27.4499, "eval_samples_per_second": 85.283, "eval_steps_per_second": 2.696, "step": 492 }, { "epoch": 2.99, "step": 492, "total_flos": 1.336513820941394e+18, "train_loss": 0.2994473668617931, "train_runtime": 953.1608, "train_samples_per_second": 66.313, "train_steps_per_second": 0.516 } ], "logging_steps": 10, "max_steps": 492, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.336513820941394e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }