{ "epoch": 1500.0, "eval_loss": 0.029992273077368736, "eval_mem_cpu_alloc_delta": 72385, "eval_mem_cpu_peaked_delta": 26695, "eval_mem_gpu_alloc_delta": 0, "eval_mem_gpu_peaked_delta": 379483648, "eval_runtime": 0.6282, "eval_samples": 18, "eval_samples_per_second": 28.653, "init_mem_cpu_alloc_delta": 66338, "init_mem_cpu_peaked_delta": 19115, "init_mem_gpu_alloc_delta": 454524928, "init_mem_gpu_peaked_delta": 0, "perplexity": 1.0304465717418196, "train_mem_cpu_alloc_delta": 465658, "train_mem_cpu_peaked_delta": 103326056, "train_mem_gpu_alloc_delta": 1347066368, "train_mem_gpu_peaked_delta": 3584124928, "train_runtime": 3225.4397, "train_samples": 18, "train_samples_per_second": 8.371 }