distilgpt2-HC3 / train_results.json
pszemraj's picture
End of training
5f8f910
{
"before_init_mem_cpu": 699817984,
"before_init_mem_gpu": 0,
"epoch": 5.98,
"init_mem_cpu_alloc_delta": -283938816,
"init_mem_cpu_peaked_delta": 332918784,
"init_mem_gpu_alloc_delta": 334744576,
"init_mem_gpu_peaked_delta": 0,
"train_loss": 2.0183091745143984,
"train_mem_cpu_alloc_delta": 1155149824,
"train_mem_cpu_peaked_delta": 308719616,
"train_mem_gpu_alloc_delta": 988995584,
"train_mem_gpu_peaked_delta": 4500811776,
"train_runtime": 35067.5673,
"train_samples": 5325,
"train_samples_per_second": 0.911,
"train_steps_per_second": 0.007
}