{ "epoch": 0.896, "eval_audio_cosine_sim": 0.593883752822876, "eval_loss": 3.099468231201172, "eval_runtime": 2650.2034, "eval_samples": 12, "eval_samples_per_second": 0.005, "eval_steps_per_second": 0.005, "eval_text_cosine_sim": 0.41744545102119446, "total_flos": 94839606729324.0, "train_loss": 10.104869842529297, "train_runtime": 2106.6171, "train_samples": 125, "train_samples_per_second": 0.059, "train_steps_per_second": 0.003 }