import json | |
metric = { | |
"tokens_per_second": [], | |
"peak_gpu_memory_mb": [], | |
"num_generated_tokens": [], | |
"inference_time": [], | |
"cpu_usage": [], | |
} | |
for key, value in json.load(open("/mnt/data/xiuying/Code/local_deploy/outputs/mini/mini_60s.json")).items(): | |
metric["tokens_per_second"].append(value["tokens_per_second"]) | |
metric["peak_gpu_memory_mb"].append(value["peak_gpu_memory_mb"]) | |
metric["num_generated_tokens"].append(value["num_generated_tokens"]) | |
metric["inference_time"].append(value["inference_time"]) | |
metric["cpu_usage"].append(value["cpu_usage"]) | |
for key, value in metric.items(): | |
print(key, sum(value) / len(value)) |