{ "epoch": 0.896, "eval_audio_cosine_sim": 0.593883752822876, "eval_loss": 3.099468231201172, "eval_runtime": 2650.2034, "eval_samples": 12, "eval_samples_per_second": 0.005, "eval_steps_per_second": 0.005, "eval_text_cosine_sim": 0.41744545102119446 }