{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.835051546391753, "eval_steps": 30, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16494845360824742, "grad_norm": 1.603933334350586, "learning_rate": 2.9687500000000003e-05, "loss": 10.1011, "step": 1 }, { "epoch": 0.32989690721649484, "grad_norm": 1.7179864645004272, "learning_rate": 2.9375e-05, "loss": 10.2338, "step": 2 }, { "epoch": 0.4948453608247423, "grad_norm": 1.748449444770813, "learning_rate": 2.90625e-05, "loss": 10.0554, "step": 3 }, { "epoch": 0.6597938144329897, "grad_norm": 2.172945976257324, "learning_rate": 2.875e-05, "loss": 10.1999, "step": 4 }, { "epoch": 0.8247422680412371, "grad_norm": 2.121206045150757, "learning_rate": 2.84375e-05, "loss": 10.0634, "step": 5 }, { "epoch": 0.9896907216494846, "grad_norm": 2.179741144180298, "learning_rate": 2.8125e-05, "loss": 9.8454, "step": 6 }, { "epoch": 1.1546391752577319, "grad_norm": 2.3345258235931396, "learning_rate": 2.7812500000000002e-05, "loss": 9.9095, "step": 7 }, { "epoch": 1.3195876288659794, "grad_norm": 2.664315938949585, "learning_rate": 2.75e-05, "loss": 9.8296, "step": 8 }, { "epoch": 1.4845360824742269, "grad_norm": 3.1426305770874023, "learning_rate": 2.71875e-05, "loss": 9.8446, "step": 9 }, { "epoch": 1.6494845360824741, "grad_norm": 2.7230398654937744, "learning_rate": 2.6875000000000003e-05, "loss": 9.5769, "step": 10 }, { "epoch": 1.8144329896907216, "grad_norm": 2.9412689208984375, "learning_rate": 2.65625e-05, "loss": 9.5563, "step": 11 }, { "epoch": 1.9793814432989691, "grad_norm": 3.2456820011138916, "learning_rate": 2.625e-05, "loss": 9.4496, "step": 12 }, { "epoch": 2.1443298969072164, "grad_norm": 3.3948400020599365, "learning_rate": 2.59375e-05, "loss": 9.3822, "step": 13 }, { "epoch": 2.3092783505154637, "grad_norm": 3.4787559509277344, "learning_rate": 2.5625e-05, "loss": 9.2314, "step": 14 }, { "epoch": 2.4742268041237114, "grad_norm": 3.8148350715637207, "learning_rate": 2.5312500000000002e-05, "loss": 9.1825, "step": 15 }, { "epoch": 2.6391752577319587, "grad_norm": 3.7718141078948975, "learning_rate": 2.5e-05, "loss": 9.0502, "step": 16 }, { "epoch": 2.804123711340206, "grad_norm": 3.9114766120910645, "learning_rate": 2.46875e-05, "loss": 8.9091, "step": 17 }, { "epoch": 2.9690721649484537, "grad_norm": 3.791461706161499, "learning_rate": 2.4375e-05, "loss": 8.825, "step": 18 }, { "epoch": 3.134020618556701, "grad_norm": 3.532548666000366, "learning_rate": 2.4062500000000002e-05, "loss": 8.6953, "step": 19 }, { "epoch": 3.2989690721649483, "grad_norm": 3.9233362674713135, "learning_rate": 2.3749999999999998e-05, "loss": 8.5862, "step": 20 }, { "epoch": 3.463917525773196, "grad_norm": 3.315626621246338, "learning_rate": 2.34375e-05, "loss": 8.5552, "step": 21 }, { "epoch": 3.6288659793814433, "grad_norm": 3.417571783065796, "learning_rate": 2.3125000000000003e-05, "loss": 8.358, "step": 22 }, { "epoch": 3.7938144329896906, "grad_norm": 3.810925006866455, "learning_rate": 2.28125e-05, "loss": 8.2089, "step": 23 }, { "epoch": 3.9587628865979383, "grad_norm": 3.319178819656372, "learning_rate": 2.25e-05, "loss": 8.1787, "step": 24 }, { "epoch": 4.123711340206185, "grad_norm": 3.318840742111206, "learning_rate": 2.21875e-05, "loss": 8.1272, "step": 25 }, { "epoch": 4.288659793814433, "grad_norm": 3.684760332107544, "learning_rate": 2.1875e-05, "loss": 8.1127, "step": 26 }, { "epoch": 4.453608247422681, "grad_norm": 3.7684097290039062, "learning_rate": 2.15625e-05, "loss": 7.9922, "step": 27 }, { "epoch": 4.618556701030927, "grad_norm": 4.493322849273682, "learning_rate": 2.125e-05, "loss": 7.8609, "step": 28 }, { "epoch": 4.783505154639175, "grad_norm": 4.545900821685791, "learning_rate": 2.09375e-05, "loss": 7.9192, "step": 29 }, { "epoch": 4.948453608247423, "grad_norm": 3.999005079269409, "learning_rate": 2.0625e-05, "loss": 7.7976, "step": 30 }, { "epoch": 4.948453608247423, "eval_audio_cosine_sim": 0.5581808090209961, "eval_loss": 3.100252151489258, "eval_runtime": 2102.121, "eval_samples_per_second": 0.006, "eval_steps_per_second": 0.006, "eval_text_cosine_sim": 0.3910459578037262, "step": 30 }, { "epoch": 5.11340206185567, "grad_norm": 4.612445831298828, "learning_rate": 2.0312500000000002e-05, "loss": 7.7548, "step": 31 }, { "epoch": 5.278350515463917, "grad_norm": 4.341012001037598, "learning_rate": 1.9999999999999998e-05, "loss": 7.5716, "step": 32 }, { "epoch": 5.443298969072165, "grad_norm": 4.00157356262207, "learning_rate": 1.96875e-05, "loss": 7.8051, "step": 33 }, { "epoch": 5.608247422680412, "grad_norm": 3.777099847793579, "learning_rate": 1.9375e-05, "loss": 7.5696, "step": 34 }, { "epoch": 5.77319587628866, "grad_norm": 3.256072998046875, "learning_rate": 1.90625e-05, "loss": 7.5285, "step": 35 }, { "epoch": 5.938144329896907, "grad_norm": 3.026923179626465, "learning_rate": 1.8750000000000002e-05, "loss": 7.3408, "step": 36 }, { "epoch": 6.103092783505154, "grad_norm": 2.9930453300476074, "learning_rate": 1.84375e-05, "loss": 7.2398, "step": 37 }, { "epoch": 6.268041237113402, "grad_norm": 2.6270177364349365, "learning_rate": 1.8125e-05, "loss": 7.2282, "step": 38 }, { "epoch": 6.43298969072165, "grad_norm": 2.583390474319458, "learning_rate": 1.78125e-05, "loss": 7.2528, "step": 39 }, { "epoch": 6.597938144329897, "grad_norm": 2.5908498764038086, "learning_rate": 1.7500000000000002e-05, "loss": 7.2968, "step": 40 }, { "epoch": 6.762886597938144, "grad_norm": 2.725625991821289, "learning_rate": 1.7187499999999998e-05, "loss": 7.4103, "step": 41 }, { "epoch": 6.927835051546392, "grad_norm": 2.520019769668579, "learning_rate": 1.6875e-05, "loss": 7.2359, "step": 42 }, { "epoch": 7.092783505154639, "grad_norm": 2.3079137802124023, "learning_rate": 1.6562500000000003e-05, "loss": 7.0846, "step": 43 }, { "epoch": 7.257731958762887, "grad_norm": 2.5834431648254395, "learning_rate": 1.625e-05, "loss": 6.9889, "step": 44 }, { "epoch": 7.422680412371134, "grad_norm": 2.618986129760742, "learning_rate": 1.59375e-05, "loss": 7.2963, "step": 45 }, { "epoch": 7.587628865979381, "grad_norm": 2.395085573196411, "learning_rate": 1.5625e-05, "loss": 7.1191, "step": 46 }, { "epoch": 7.752577319587629, "grad_norm": 2.3549487590789795, "learning_rate": 1.53125e-05, "loss": 6.7482, "step": 47 }, { "epoch": 7.917525773195877, "grad_norm": 2.349393606185913, "learning_rate": 1.5e-05, "loss": 7.0581, "step": 48 }, { "epoch": 8.082474226804123, "grad_norm": 2.2347733974456787, "learning_rate": 1.46875e-05, "loss": 6.8346, "step": 49 }, { "epoch": 8.24742268041237, "grad_norm": 2.182095527648926, "learning_rate": 1.4375e-05, "loss": 7.0005, "step": 50 }, { "epoch": 8.412371134020619, "grad_norm": 2.0510122776031494, "learning_rate": 1.40625e-05, "loss": 6.8881, "step": 51 }, { "epoch": 8.577319587628866, "grad_norm": 1.9954293966293335, "learning_rate": 1.375e-05, "loss": 6.7836, "step": 52 }, { "epoch": 8.742268041237114, "grad_norm": 1.9961941242218018, "learning_rate": 1.3437500000000001e-05, "loss": 6.6969, "step": 53 }, { "epoch": 8.907216494845361, "grad_norm": 2.0308048725128174, "learning_rate": 1.3125e-05, "loss": 6.9004, "step": 54 }, { "epoch": 9.072164948453608, "grad_norm": 1.9732003211975098, "learning_rate": 1.28125e-05, "loss": 6.7652, "step": 55 }, { "epoch": 9.237113402061855, "grad_norm": 1.6928937435150146, "learning_rate": 1.25e-05, "loss": 6.6789, "step": 56 }, { "epoch": 9.402061855670103, "grad_norm": 1.9639475345611572, "learning_rate": 1.21875e-05, "loss": 6.6207, "step": 57 }, { "epoch": 9.56701030927835, "grad_norm": 1.6721774339675903, "learning_rate": 1.1874999999999999e-05, "loss": 6.7687, "step": 58 }, { "epoch": 9.731958762886597, "grad_norm": 1.8212580680847168, "learning_rate": 1.1562500000000002e-05, "loss": 6.5534, "step": 59 }, { "epoch": 9.896907216494846, "grad_norm": 1.6735063791275024, "learning_rate": 1.125e-05, "loss": 6.6807, "step": 60 }, { "epoch": 9.896907216494846, "eval_audio_cosine_sim": 0.6627817749977112, "eval_loss": 3.2311527729034424, "eval_runtime": 2160.5029, "eval_samples_per_second": 0.006, "eval_steps_per_second": 0.006, "eval_text_cosine_sim": 0.35343077778816223, "step": 60 }, { "epoch": 10.061855670103093, "grad_norm": 1.6333427429199219, "learning_rate": 1.09375e-05, "loss": 6.6808, "step": 61 }, { "epoch": 10.22680412371134, "grad_norm": 1.5876134634017944, "learning_rate": 1.0625e-05, "loss": 6.6629, "step": 62 }, { "epoch": 10.391752577319588, "grad_norm": 1.4894704818725586, "learning_rate": 1.03125e-05, "loss": 6.6252, "step": 63 }, { "epoch": 10.556701030927835, "grad_norm": 1.5303210020065308, "learning_rate": 9.999999999999999e-06, "loss": 6.4798, "step": 64 }, { "epoch": 10.721649484536082, "grad_norm": 1.3529256582260132, "learning_rate": 9.6875e-06, "loss": 6.5828, "step": 65 }, { "epoch": 10.88659793814433, "grad_norm": 1.4958022832870483, "learning_rate": 9.375000000000001e-06, "loss": 6.5528, "step": 66 }, { "epoch": 11.051546391752577, "grad_norm": 1.6871830224990845, "learning_rate": 9.0625e-06, "loss": 6.3689, "step": 67 }, { "epoch": 11.216494845360824, "grad_norm": 1.5430212020874023, "learning_rate": 8.750000000000001e-06, "loss": 6.3102, "step": 68 }, { "epoch": 11.381443298969073, "grad_norm": 1.4388532638549805, "learning_rate": 8.4375e-06, "loss": 6.4015, "step": 69 }, { "epoch": 11.54639175257732, "grad_norm": 1.7323001623153687, "learning_rate": 8.125e-06, "loss": 6.7011, "step": 70 }, { "epoch": 11.711340206185566, "grad_norm": 1.4382604360580444, "learning_rate": 7.8125e-06, "loss": 6.3432, "step": 71 }, { "epoch": 11.876288659793815, "grad_norm": 1.323933720588684, "learning_rate": 7.5e-06, "loss": 6.5288, "step": 72 }, { "epoch": 12.041237113402062, "grad_norm": 1.3559449911117554, "learning_rate": 7.1875e-06, "loss": 6.557, "step": 73 }, { "epoch": 12.206185567010309, "grad_norm": 1.1633719205856323, "learning_rate": 6.875e-06, "loss": 6.386, "step": 74 }, { "epoch": 12.371134020618557, "grad_norm": 1.5071109533309937, "learning_rate": 6.5625e-06, "loss": 6.5438, "step": 75 }, { "epoch": 12.536082474226804, "grad_norm": 1.1435602903366089, "learning_rate": 6.25e-06, "loss": 6.4093, "step": 76 }, { "epoch": 12.70103092783505, "grad_norm": 1.4174741506576538, "learning_rate": 5.9374999999999995e-06, "loss": 6.3887, "step": 77 }, { "epoch": 12.8659793814433, "grad_norm": 1.423195481300354, "learning_rate": 5.625e-06, "loss": 6.5358, "step": 78 }, { "epoch": 13.030927835051546, "grad_norm": 1.2214951515197754, "learning_rate": 5.3125e-06, "loss": 6.4307, "step": 79 }, { "epoch": 13.195876288659793, "grad_norm": 1.1089880466461182, "learning_rate": 4.9999999999999996e-06, "loss": 6.4179, "step": 80 }, { "epoch": 13.360824742268042, "grad_norm": 1.0945030450820923, "learning_rate": 4.6875000000000004e-06, "loss": 6.3149, "step": 81 }, { "epoch": 13.525773195876289, "grad_norm": 1.4375858306884766, "learning_rate": 4.3750000000000005e-06, "loss": 6.6049, "step": 82 }, { "epoch": 13.690721649484535, "grad_norm": 1.2144274711608887, "learning_rate": 4.0625e-06, "loss": 6.3501, "step": 83 }, { "epoch": 13.855670103092784, "grad_norm": 1.091561198234558, "learning_rate": 3.75e-06, "loss": 6.4059, "step": 84 }, { "epoch": 14.02061855670103, "grad_norm": 1.4113916158676147, "learning_rate": 3.4375e-06, "loss": 6.1656, "step": 85 }, { "epoch": 14.185567010309278, "grad_norm": 1.275417685508728, "learning_rate": 3.125e-06, "loss": 6.2095, "step": 86 }, { "epoch": 14.350515463917526, "grad_norm": 1.1587275266647339, "learning_rate": 2.8125e-06, "loss": 6.4885, "step": 87 }, { "epoch": 14.515463917525773, "grad_norm": 1.0685796737670898, "learning_rate": 2.4999999999999998e-06, "loss": 6.3991, "step": 88 }, { "epoch": 14.68041237113402, "grad_norm": 1.102617859840393, "learning_rate": 2.1875000000000002e-06, "loss": 6.4392, "step": 89 }, { "epoch": 14.845360824742269, "grad_norm": 1.0771472454071045, "learning_rate": 1.875e-06, "loss": 6.4444, "step": 90 }, { "epoch": 14.845360824742269, "eval_audio_cosine_sim": 0.5883382558822632, "eval_loss": 3.6276748180389404, "eval_runtime": 1072.6246, "eval_samples_per_second": 0.011, "eval_steps_per_second": 0.011, "eval_text_cosine_sim": 0.24592441320419312, "step": 90 }, { "epoch": 15.010309278350515, "grad_norm": 1.2423425912857056, "learning_rate": 1.5625e-06, "loss": 6.2888, "step": 91 }, { "epoch": 15.175257731958762, "grad_norm": 1.1462149620056152, "learning_rate": 1.2499999999999999e-06, "loss": 6.4798, "step": 92 }, { "epoch": 15.34020618556701, "grad_norm": 1.1183279752731323, "learning_rate": 9.375e-07, "loss": 6.254, "step": 93 }, { "epoch": 15.505154639175258, "grad_norm": 1.1249028444290161, "learning_rate": 6.249999999999999e-07, "loss": 6.3222, "step": 94 }, { "epoch": 15.670103092783505, "grad_norm": 1.2026952505111694, "learning_rate": 3.1249999999999997e-07, "loss": 6.2651, "step": 95 }, { "epoch": 15.835051546391753, "grad_norm": 1.283206820487976, "learning_rate": 0.0, "loss": 6.553, "step": 96 }, { "epoch": 15.835051546391753, "step": 96, "total_flos": 1925884422732936.0, "train_loss": 7.460742597778638, "train_runtime": 13364.4334, "train_samples_per_second": 0.116, "train_steps_per_second": 0.007 } ], "logging_steps": 1.0, "max_steps": 96, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1925884422732936.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }