test / benchmarks /llm_gpu_benchmarks.json
iblfe's picture
Upload folder using huggingface_hub
b585c7f verified
[
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 10:46:19",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 32.29472152392069,
"generate_output_len_bytes": 2384,
"generate_time": 14.563165505727133
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 10:48:55",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 67.97515447934468,
"generate_output_len_bytes": 2384,
"generate_time": 33.00641902287801
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 10:48:58",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1440,
"summarize_time": 114.62220064798991,
"generate_output_len_bytes": 2619,
"generate_time": 71.0722058614095
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 10:58:34",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 866,
"summarize_time": 39.54404203097025,
"generate_output_len_bytes": 2927,
"generate_time": 22.466302394866943
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:01:59",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 32.1394579410553,
"generate_output_len_bytes": 2384,
"generate_time": 14.757195552190145
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 10:54:29",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 910,
"summarize_time": 185.14580019315085,
"generate_output_len_bytes": 2042,
"generate_time": 117.13909141222636
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:04:37",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 94.98129558563232,
"generate_output_len_bytes": 2512,
"generate_time": 69.4871145884196
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:13:08",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1276,
"summarize_time": 43.23498781522115,
"generate_output_len_bytes": 2927,
"generate_time": 22.826789538065594
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 11:10:08",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 991,
"summarize_time": 90.51939169565837,
"generate_output_len_bytes": 2927,
"generate_time": 48.96095744768778
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 11:16:48",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 31.86189842224121,
"generate_output_len_bytes": 2384,
"generate_time": 14.209659894307455
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:17:39",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 71.48081835110982,
"generate_output_len_bytes": 2384,
"generate_time": 33.5740262667338
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 11:19:24",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 94.17744310696919,
"generate_output_len_bytes": 2512,
"generate_time": 70.12592967351277
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 11:27:57",
"git_sha": "55d3b55b",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1276,
"summarize_time": 42.8066500822703,
"generate_output_len_bytes": 2927,
"generate_time": 22.626200040181477
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:23:22",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 910,
"summarize_time": 186.88371555010477,
"generate_output_len_bytes": 2042,
"generate_time": 117.3530724843343
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 11:39:03",
"git_sha": "55d3b55b",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 991,
"summarize_time": 94.50985678037007,
"generate_output_len_bytes": 2927,
"generate_time": 50.06416177749634
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 21:08:31",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 38.80374129613241,
"generate_output_len_bytes": 2384,
"generate_time": 19.23690136273702
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 21:11:49",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 178.79640992482504,
"generate_output_len_bytes": 2772,
"generate_time": 93.99476226170857
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 21:25:53",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 53.44271365801493,
"generate_output_len_bytes": 2927,
"generate_time": 30.641155401865642
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 21:30:30",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 40.80062770843506,
"generate_output_len_bytes": 2384,
"generate_time": 19.825008392333984
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 21:35:29",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 177.35046529769897,
"generate_output_len_bytes": 2772,
"generate_time": 91.73111907641093
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 21:49:20",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 56.894784371058144,
"generate_output_len_bytes": 2927,
"generate_time": 32.15500020980835
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/18/2023 21:54:11",
"git_sha": "fc4826f2",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 41.46419604619344,
"generate_output_len_bytes": 2384,
"generate_time": 20.049855709075928
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/18/2023 21:57:39",
"git_sha": "fc4826f2",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 183.73364853858948,
"generate_output_len_bytes": 2772,
"generate_time": 94.9052836894989
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/18/2023 22:11:59",
"git_sha": "fc4826f2",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 59.204413731892906,
"generate_output_len_bytes": 2927,
"generate_time": 33.25332593917847
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/18/2023 22:17:00",
"git_sha": "fc4826f2",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 42.09002653757731,
"generate_output_len_bytes": 2384,
"generate_time": 20.106103817621868
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 8,
"reps": 3,
"date": "08/18/2023 22:20:31",
"git_sha": "fc4826f2",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 185.28164370854697,
"generate_output_len_bytes": 2772,
"generate_time": 95.13023789723714
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 8,
"reps": 3,
"date": "08/18/2023 22:34:58",
"git_sha": "fc4826f2",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 60.9919019540151,
"generate_output_len_bytes": 2927,
"generate_time": 34.328625202178955
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:31:34",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 52.49842747052511,
"generate_output_len_bytes": 2172,
"generate_time": 20.686774571736652
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:31:55",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:35:38",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1007,
"summarize_time": 168.9666860898336,
"generate_output_len_bytes": 2249,
"generate_time": 73.25518870353699
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:48:09",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 856,
"summarize_time": 45.30513469378153,
"generate_output_len_bytes": 1802,
"generate_time": 22.000216643015545
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 13:51:56",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 51.64275654157003,
"generate_output_len_bytes": 2172,
"generate_time": 20.737667481104534
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:35:47",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 980,
"summarize_time": 280.4669913450877,
"generate_output_len_bytes": 2132,
"generate_time": 141.7793349424998
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 13:57:35",
"git_sha": "fc4826f2",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 869,
"summarize_time": 96.61887431144714,
"generate_output_len_bytes": 3244,
"generate_time": 82.98751719792683
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 13:55:51",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1007,
"summarize_time": 167.52292919158936,
"generate_output_len_bytes": 2249,
"generate_time": 71.82611886660258
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 14:08:08",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 856,
"summarize_time": 47.14254776636759,
"generate_output_len_bytes": 1802,
"generate_time": 22.54850967725118
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 14:15:15",
"git_sha": "d13230ee",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 14:07:15",
"git_sha": "fc4826f2",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 89.59958203633626,
"generate_output_len_bytes": 2172,
"generate_time": 42.32424934705099
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 14:15:30",
"git_sha": "d13230ee",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1024,
"summarize_time": 185.44230167071024,
"generate_output_len_bytes": 2122,
"generate_time": 88.11553311347961
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 14:29:36",
"git_sha": "d13230ee",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 922,
"summarize_time": 68.06459252039592,
"generate_output_len_bytes": 1802,
"generate_time": 27.939613421758015
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 14:26:29",
"git_sha": "d13230ee",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 980,
"summarize_time": 280.8310640652974,
"generate_output_len_bytes": 2132,
"generate_time": 143.21916349728903
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 14:48:17",
"git_sha": "d13230ee",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 869,
"summarize_time": 98.47045453389485,
"generate_output_len_bytes": 3244,
"generate_time": 83.71360301971436
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 15:35:13",
"git_sha": "0dec0f52",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 15:49:33",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 16:26:53",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 16:27:32",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 16:29:03",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 17:26:02",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 18:59:16",
"git_sha": "5691db4a",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1075,
"summarize_time": 39.01545596122742,
"generate_output_len_bytes": 2242,
"generate_time": 10.151424566904703
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 19:03:13",
"git_sha": "5691db4a",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 940,
"summarize_time": 21.78233750661214,
"generate_output_len_bytes": 2130,
"generate_time": 15.794983307520548
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 19:38:40",
"git_sha": "6f05e8f1",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1114,
"summarize_time": 7.636120955149333,
"generate_output_len_bytes": 2275,
"generate_time": 7.922623078028361
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 19:41:02",
"git_sha": "6f05e8f1",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1024,
"summarize_time": 10.824170271555582,
"generate_output_len_bytes": 2130,
"generate_time": 9.209020694096884
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 19:55:17",
"git_sha": "2c548f21",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1088,
"summarize_time": 24.39883820215861,
"generate_output_len_bytes": 2275,
"generate_time": 12.755743900934855
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 00:57:21",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 37.113919814427696,
"generate_output_len_bytes": 2384,
"generate_time": 18.36507821083069
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:00:31",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 49.79721482594808,
"generate_output_len_bytes": 2172,
"generate_time": 21.780913591384888
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:04:36",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:05:26",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 181.2461258570353,
"generate_output_len_bytes": 2772,
"generate_time": 92.64811905225118
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:19:33",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 800,
"summarize_time": 174.4576851526896,
"generate_output_len_bytes": 2713,
"generate_time": 119.14412077267964
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:36:14",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 53.39731526374817,
"generate_output_len_bytes": 2927,
"generate_time": 31.369641542434692
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:40:53",
"git_sha": "a227be4f",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 74.27096923192342,
"generate_output_len_bytes": 1802,
"generate_time": 29.860486666361492
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 01:48:09",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 39.926851193110146,
"generate_output_len_bytes": 2384,
"generate_time": 18.481745958328247
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 01:51:27",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 51.299002488454185,
"generate_output_len_bytes": 2172,
"generate_time": 21.828503131866455
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 01:56:20",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 178.19972308476767,
"generate_output_len_bytes": 2772,
"generate_time": 91.73426882425944
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 02:10:13",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 800,
"summarize_time": 180.7814578215281,
"generate_output_len_bytes": 2713,
"generate_time": 124.72717420260112
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 02:26:43",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 57.08081785837809,
"generate_output_len_bytes": 2927,
"generate_time": 32.26534946759542
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 02:31:36",
"git_sha": "a227be4f",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 79.9461121559143,
"generate_output_len_bytes": 1802,
"generate_time": 31.403561115264893
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 02:38:23",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 42.33977222442627,
"generate_output_len_bytes": 2384,
"generate_time": 19.723278522491455
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 02:41:52",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 55.377869288126625,
"generate_output_len_bytes": 2172,
"generate_time": 25.01458676656087
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 02:47:05",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 180.53432401021323,
"generate_output_len_bytes": 2772,
"generate_time": 91.93375285466512
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:01:07",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 800,
"summarize_time": 179.50477250417075,
"generate_output_len_bytes": 2713,
"generate_time": 124.40728378295898
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:17:36",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 58.62867816289266,
"generate_output_len_bytes": 2927,
"generate_time": 33.394495725631714
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:22:37",
"git_sha": "a227be4f",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 78.90612125396729,
"generate_output_len_bytes": 1802,
"generate_time": 30.697617371877033
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 03:29:20",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 40.498607873916626,
"generate_output_len_bytes": 2384,
"generate_time": 19.509677171707153
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 03:32:44",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 55.3964786529541,
"generate_output_len_bytes": 2172,
"generate_time": 24.347585439682007
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 03:37:55",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1179,
"summarize_time": 186.71331850687662,
"generate_output_len_bytes": 2772,
"generate_time": 95.784650405248
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 03:52:28",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 800,
"summarize_time": 185.3280005455017,
"generate_output_len_bytes": 2713,
"generate_time": 125.91738017400105
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 04:09:18",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1002,
"summarize_time": 60.18280680974325,
"generate_output_len_bytes": 2927,
"generate_time": 33.386961142222084
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 8,
"reps": 3,
"date": "08/19/2023 04:14:25",
"git_sha": "a227be4f",
"n_gpus": 8,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 83.04790727297465,
"generate_output_len_bytes": 1802,
"generate_time": 32.24992283185323
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 23:26:19",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 47.03754989306132,
"generate_output_len_bytes": 2384,
"generate_time": 19.964784463246662
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 23:33:09",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 71.91136892636617,
"generate_output_len_bytes": 2480,
"generate_time": 33.6295014222463
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 23:44:08",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 00:45:42",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1007,
"summarize_time": 148.61560583114624,
"generate_output_len_bytes": 2357,
"generate_time": 89.01266026496887
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 00:58:00",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 763,
"summarize_time": 193.99270629882812,
"generate_output_len_bytes": 2129,
"generate_time": 95.66660761833191
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:13:01",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:13:55",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 991,
"summarize_time": 61.52411222457886,
"generate_output_len_bytes": 2927,
"generate_time": 32.030215660730995
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 01:19:00",
"git_sha": "0cdb75ef",
"n_gpus": 1,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 81.13888708750407,
"generate_output_len_bytes": 3486,
"generate_time": 55.5331826210022
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 01:27:49",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 47.41046245892843,
"generate_output_len_bytes": 2384,
"generate_time": 20.660600344340008
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 01:34:28",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 72.85646979014079,
"generate_output_len_bytes": 2480,
"generate_time": 34.05861854553223
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 02:39:22",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1007,
"summarize_time": 152.54357608159384,
"generate_output_len_bytes": 2357,
"generate_time": 91.51808977127075
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 02:52:58",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 763,
"summarize_time": 195.92926557858786,
"generate_output_len_bytes": 2129,
"generate_time": 96.55542047818501
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 03:15:01",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 991,
"summarize_time": 64.64422671000163,
"generate_output_len_bytes": 2927,
"generate_time": 33.30378039677938
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 03:20:19",
"git_sha": "0cdb75ef",
"n_gpus": 2,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 84.57761120796204,
"generate_output_len_bytes": 3486,
"generate_time": 57.59072462717692
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:28:44",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1417,
"summarize_time": 49.08898218472799,
"generate_output_len_bytes": 2384,
"generate_time": 21.489527861277264
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:32:39",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 74.43774898846944,
"generate_output_len_bytes": 2480,
"generate_time": 34.72673638661703
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:39:21",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1007,
"summarize_time": 153.41076453526816,
"generate_output_len_bytes": 2357,
"generate_time": 91.14894040425618
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 03:52:00",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 763,
"summarize_time": 199.79869039853415,
"generate_output_len_bytes": 2129,
"generate_time": 98.61504419644673
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 04:08:12",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 991,
"summarize_time": 66.49260465304057,
"generate_output_len_bytes": 2927,
"generate_time": 34.17951035499573
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 04:13:39",
"git_sha": "0cdb75ef",
"n_gpus": 4,
"transformers": "4.30.2",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1000,
"summarize_time": 87.65787092844646,
"generate_output_len_bytes": 3486,
"generate_time": 59.3750696182251
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 22:22:24",
"git_sha": "b63768c6",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 948,
"summarize_time": 122.13213857014973,
"generate_output_len_bytes": 2826,
"generate_time": 66.34098903338115
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/18/2023 22:33:33",
"git_sha": "c1348fb3",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 948,
"summarize_time": 120.53812781969707,
"generate_output_len_bytes": 2826,
"generate_time": 67.28052496910095
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 22:56:52",
"git_sha": "fb84de76",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1036,
"summarize_time": 29.128981749216717,
"generate_output_len_bytes": 2242,
"generate_time": 12.197122732798258
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/18/2023 23:00:33",
"git_sha": "fb84de76",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 05:47:43",
"git_sha": "22352acd",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 05:48:58",
"git_sha": "22352acd",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 1,
"reps": 3,
"date": "08/19/2023 05:50:40",
"git_sha": "22352acd",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 948,
"summarize_time": 165.05752809842429,
"generate_output_len_bytes": 2605,
"generate_time": 93.80659619967143
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 06:05:51",
"git_sha": "22352acd",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 06:10:05",
"git_sha": "22352acd",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 906,
"summarize_time": 410.0691332022349,
"generate_output_len_bytes": 521,
"generate_time": 57.71272214253744
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 06:36:58",
"git_sha": "22352acd",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 948,
"summarize_time": 171.74388321240744,
"generate_output_len_bytes": 2605,
"generate_time": 97.00725762049358
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 06:51:13",
"git_sha": "22352acd",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 792,
"summarize_time": 267.0555826822917,
"generate_output_len_bytes": 2783,
"generate_time": 163.99818523724875
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 07:13:35",
"git_sha": "22352acd",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 906,
"summarize_time": 413.9569679101308,
"generate_output_len_bytes": 521,
"generate_time": 58.52583885192871
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 4,
"reps": 3,
"date": "08/19/2023 07:38:02",
"git_sha": "22352acd",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 948,
"summarize_time": 175.4907926718394,
"generate_output_len_bytes": 2605,
"generate_time": 98.97720170021057
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/19/2023 12:35:08",
"git_sha": "29a002e5",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "timemachine",
"gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 983,
"summarize_time": 42.21107586224874,
"generate_output_len_bytes": 2130,
"generate_time": 16.94527777036031
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/21/2023 20:03:36",
"git_sha": "51318f44",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 41.0461368560791,
"generate_output_len_bytes": 2383,
"generate_time": 19.614749511082966
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 20:07:35",
"git_sha": "51318f44",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 42.8376894791921,
"generate_output_len_bytes": 2383,
"generate_time": 20.2719091574351
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/21/2023 20:42:46",
"git_sha": "2f4bb620",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 20:50:19",
"git_sha": "2f4bb620",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 66.52468911806743,
"generate_output_len_bytes": 2479,
"generate_time": 29.828714847564697
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 20:56:04",
"git_sha": "2f4bb620",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
"exception": "OOM"
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/21/2023 19:55:35",
"git_sha": "51318f44",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 38.753786404927574,
"generate_output_len_bytes": 2383,
"generate_time": 19.529522736867268
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/21/2023 20:36:13",
"git_sha": "51318f44",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 41.024452924728394,
"generate_output_len_bytes": 2383,
"generate_time": 20.29120985666911
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/21/2023 20:40:08",
"git_sha": "51318f44",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 54.554532527923584,
"generate_output_len_bytes": 2171,
"generate_time": 24.604793945948284
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 20:50:05",
"git_sha": "51318f44",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 41.09950613975525,
"generate_output_len_bytes": 2383,
"generate_time": 20.947362899780273
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 20:54:08",
"git_sha": "51318f44",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 58.3172922929128,
"generate_output_len_bytes": 2171,
"generate_time": 25.735217014948528
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/21/2023 21:01:04",
"git_sha": "51318f44",
"n_gpus": 8,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 42.85940829912821,
"generate_output_len_bytes": 2383,
"generate_time": 21.380353291829426
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/21/2023 21:05:24",
"git_sha": "51318f44",
"n_gpus": 8,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 54.235164642333984,
"generate_output_len_bytes": 2171,
"generate_time": 25.70338026682536
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/21/2023 21:10:37",
"git_sha": "51318f44",
"n_gpus": 8,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 927,
"summarize_time": 133.53030570348105,
"generate_output_len_bytes": 2782,
"generate_time": 72.97924383481343
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 22:18:17",
"git_sha": "51318f44",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 927,
"summarize_time": 131.45291074117026,
"generate_output_len_bytes": 2782,
"generate_time": 72.30849742889404
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/21/2023 22:51:09",
"git_sha": "383b6bbc",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 39.269713958104454,
"generate_output_len_bytes": 2383,
"generate_time": 19.65731406211853
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/21/2023 22:54:54",
"git_sha": "383b6bbc",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 51.84283971786499,
"generate_output_len_bytes": 2171,
"generate_time": 28.441521485646565
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/21/2023 23:13:10",
"git_sha": "383b6bbc",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 53.383726040522255,
"generate_output_len_bytes": 2171,
"generate_time": 24.422890504201252
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 4,
"reps": 3,
"date": "08/21/2023 23:18:04",
"git_sha": "383b6bbc",
"n_gpus": 4,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 52.791220347086586,
"generate_output_len_bytes": 2171,
"generate_time": 25.378511508305866
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 8,
"reps": 3,
"date": "08/21/2023 23:23:11",
"git_sha": "383b6bbc",
"n_gpus": 8,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.8",
"hostname": "cloudvm",
"gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1046,
"summarize_time": 56.3846542040507,
"generate_output_len_bytes": 2171,
"generate_time": 26.636192480723064
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 1,
"reps": 3,
"date": "08/21/2023 23:52:44",
"git_sha": "da69b822",
"n_gpus": 1,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1267,
"summarize_time": 40.36223220825195,
"generate_output_len_bytes": 2383,
"generate_time": 19.87660264968872
},
{
"backend": "text-generation-inference",
"base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 2,
"reps": 3,
"date": "08/22/2023 00:15:05",
"git_sha": "e843e8c3",
"n_gpus": 2,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "recypabaszmhhmuae",
"gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 915,
"summarize_time": 64.78201874097188,
"generate_output_len_bytes": 2479,
"generate_time": 29.02147897084554
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 16,
"ngpus": 0,
"reps": 3,
"date": "08/22/2023 19:01:15",
"git_sha": "855b7d15",
"n_gpus": 0,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "CPU",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1351,
"summarize_time": 1215.5185990333557,
"generate_output_len_bytes": 849,
"generate_time": 180.56836318969727
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 8,
"ngpus": 0,
"reps": 3,
"date": "08/22/2023 20:11:16",
"git_sha": "855b7d15",
"n_gpus": 0,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "CPU",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1353,
"summarize_time": 1216.9783231417339,
"generate_output_len_bytes": 849,
"generate_time": 180.42225472132364
},
{
"backend": "transformers",
"base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
"task": "summary_and_generate",
"bits": 4,
"ngpus": 0,
"reps": 3,
"date": "08/22/2023 21:21:20",
"git_sha": "855b7d15",
"n_gpus": 0,
"transformers": "4.31.0",
"bitsandbytes": "0.41.1",
"cuda": "11.7",
"hostname": "rippa",
"gpus": "CPU",
"summarize_input_len_bytes": 857252,
"summarize_output_len_bytes": 1354,
"summarize_time": 1217.1687794526417,
"generate_output_len_bytes": 843,
"generate_time": 180.78463260332742
}
]