{ "epoch": 1.0, "eval_audio_cosine_sim": 0.4679793119430542, "eval_loss": 3.913311243057251, "eval_runtime": 237.6952, "eval_samples": 9, "eval_samples_per_second": 0.038, "eval_steps_per_second": 0.013, "eval_text_cosine_sim": 0.23309490084648132, "total_flos": 2077734266890272.0, "train_loss": 7.893082406785753, "train_runtime": 1993.2394, "train_samples": 1580, "train_samples_per_second": 0.793, "train_steps_per_second": 0.05 }