distilgpt2-HC3 / all_results.json
pszemraj's picture
End of training
5f8f910
{
"before_init_mem_cpu": 699817984,
"before_init_mem_gpu": 0,
"epoch": 5.98,
"eval_accuracy": 0.544073054441718,
"eval_loss": 1.9982993602752686,
"eval_mem_cpu_alloc_delta": 9977856,
"eval_mem_cpu_peaked_delta": 60305408,
"eval_mem_gpu_alloc_delta": 0,
"eval_mem_gpu_peaked_delta": 2357757952,
"eval_runtime": 481.5484,
"eval_samples": 588,
"eval_samples_per_second": 1.221,
"eval_steps_per_second": 0.305,
"init_mem_cpu_alloc_delta": -283938816,
"init_mem_cpu_peaked_delta": 332918784,
"init_mem_gpu_alloc_delta": 334744576,
"init_mem_gpu_peaked_delta": 0,
"perplexity": 7.376500655769286,
"train_loss": 2.0183091745143984,
"train_mem_cpu_alloc_delta": 1155149824,
"train_mem_cpu_peaked_delta": 308719616,
"train_mem_gpu_alloc_delta": 988995584,
"train_mem_gpu_peaked_delta": 4500811776,
"train_runtime": 35067.5673,
"train_samples": 5325,
"train_samples_per_second": 0.911,
"train_steps_per_second": 0.007
}