{ "epoch": 20.0, "eval_loss": 1.7797787189483643, "eval_mem_cpu_alloc_delta": 3391488, "eval_mem_cpu_peaked_delta": 0, "eval_mem_gpu_alloc_delta": 0, "eval_mem_gpu_peaked_delta": 7386987520, "eval_runtime": 183.3718, "eval_samples": 527, "eval_samples_per_second": 2.874, "init_mem_cpu_alloc_delta": 413691904, "init_mem_cpu_peaked_delta": 0, "init_mem_gpu_alloc_delta": 1444470784, "init_mem_gpu_peaked_delta": 0, "perplexity": 5.928544398894694, "train_mem_cpu_alloc_delta": 39612416, "train_mem_cpu_peaked_delta": 0, "train_mem_gpu_alloc_delta": 4257904128, "train_mem_gpu_peaked_delta": 7950325248, "train_runtime": 29384.1685, "train_samples": 2094, "train_samples_per_second": 1.425 }