fxmarty's picture
fxmarty HF staff
add experience
7d060af
[
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3104.203\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 1007.1949454375,
"latency_std": 78.84058358158838,
"latency_50": 985.576602,
"latency_90": 1059.7841785,
"latency_95": 1145.035921,
"latency_99": 1250.375245,
"latency_999": 1274.0765929
},
"optimized": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1331.11930825,
"latency_std": 2.6333668250894813,
"latency_50": 1331.341538,
"latency_90": 1333.9482434000001,
"latency_95": 1334.2215575,
"latency_99": 1334.4844530999999,
"latency_999": 1334.5436046099999
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 81,
"throughput": 5.4,
"latency_mean": 186.5835592716049,
"latency_std": 20.00760723775789,
"latency_50": 198.81694,
"latency_90": 201.024797,
"latency_95": 201.779275,
"latency_99": 202.413402,
"latency_999": 202.5442512
},
"optimized": {
"nb_forwards": 92,
"throughput": 6.13,
"latency_mean": 163.38389757608698,
"latency_std": 0.3275123768495738,
"latency_50": 163.3941135,
"latency_90": 163.7525755,
"latency_95": 163.926186,
"latency_99": 164.34097907,
"latency_999": 164.45778730700002
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 76,
"throughput": 5.07,
"latency_mean": 198.9630147368421,
"latency_std": 1.8034042650036124,
"latency_50": 198.5655705,
"latency_90": 200.2815155,
"latency_95": 203.1906375,
"latency_99": 206.4380845,
"latency_999": 206.66971884999998
},
"optimized": {
"nb_forwards": 117,
"throughput": 7.8,
"latency_mean": 128.82420041880343,
"latency_std": 1.701156916298848,
"latency_50": 128.623607,
"latency_90": 131.323657,
"latency_95": 131.90711579999999,
"latency_99": 132.77440764,
"latency_999": 133.447005692
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 655.365458,
"latency_std": 0.6236008690322891,
"latency_50": 655.34975,
"latency_90": 656.1519954,
"latency_95": 656.2574049,
"latency_99": 656.3535554199999,
"latency_999": 656.375521342
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 473.76923496875,
"latency_std": 7.2747277092183165,
"latency_50": 472.62492,
"latency_90": 483.41045330000003,
"latency_95": 484.61633445,
"latency_99": 489.37835364,
"latency_999": 490.82021336400004
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 96,
"throughput": 6.4,
"latency_mean": 157.24992970833335,
"latency_std": 4.732553962424211,
"latency_50": 155.406953,
"latency_90": 164.713482,
"latency_95": 167.8590275,
"latency_99": 170.8462496,
"latency_999": 171.27275096
},
"optimized": {
"nb_forwards": 117,
"throughput": 7.8,
"latency_mean": 129.13340215384616,
"latency_std": 2.1864167617663997,
"latency_50": 128.928664,
"latency_90": 131.8907006,
"latency_95": 132.5507548,
"latency_99": 135.60957616,
"latency_999": 136.973290652
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1280.0728285,
"latency_std": 15.395205966597677,
"latency_50": 1270.8922015,
"latency_90": 1298.3091468,
"latency_95": 1300.7668938,
"latency_99": 1303.06202356,
"latency_999": 1303.578427756
},
"optimized": {
"nb_forwards": 17,
"throughput": 1.13,
"latency_mean": 926.9665002352941,
"latency_std": 9.31210282206815,
"latency_50": 930.379688,
"latency_90": 937.0084018,
"latency_95": 939.6581278,
"latency_99": 941.59419436,
"latency_999": 942.029809336
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 656.3433247826088,
"latency_std": 0.8294053936962497,
"latency_50": 656.248342,
"latency_90": 657.3568842000001,
"latency_95": 657.4566738999999,
"latency_99": 658.36745674,
"latency_999": 658.596371074
},
"optimized": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 661.2441610869565,
"latency_std": 1.5993679984326394,
"latency_50": 661.382511,
"latency_90": 662.625988,
"latency_95": 663.5314744,
"latency_99": 665.2728648,
"latency_999": 665.69171598
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1276.3800261666668,
"latency_std": 14.72110021637953,
"latency_50": 1268.308021,
"latency_90": 1294.9160152,
"latency_95": 1300.88363675,
"latency_99": 1306.34986895,
"latency_999": 1307.579771195
},
"optimized": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1319.8366895833333,
"latency_std": 2.9678765372708384,
"latency_50": 1319.0768965,
"latency_90": 1323.875593,
"latency_95": 1325.10346415,
"latency_99": 1326.0648056300001,
"latency_999": 1326.2811074630001
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 26,
"throughput": 1.73,
"latency_mean": 588.8595787307693,
"latency_std": 75.12307910293818,
"latency_50": 656.586274,
"latency_90": 659.335121,
"latency_95": 659.60222425,
"latency_99": 661.19556825,
"latency_999": 661.6635563250001
},
"optimized": {
"nb_forwards": 25,
"throughput": 1.67,
"latency_mean": 602.9062813200001,
"latency_std": 87.18802711972026,
"latency_50": 660.573071,
"latency_90": 662.2857194,
"latency_95": 662.9589754,
"latency_99": 664.0132692799999,
"latency_999": 664.269413528
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": true,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.882\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 25,
"throughput": 1.67,
"latency_mean": 609.81228388,
"latency_std": 73.78848735281055,
"latency_50": 654.990393,
"latency_90": 690.0503104,
"latency_95": 692.5913532000001,
"latency_99": 692.88928036,
"latency_999": 692.967085936
},
"optimized": {
"nb_forwards": 24,
"throughput": 1.6,
"latency_mean": 632.7304657083333,
"latency_std": 67.87527940398857,
"latency_50": 665.6419855,
"latency_90": 668.6654992,
"latency_95": 668.7597744,
"latency_99": 669.6960696,
"latency_999": 669.94376166
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 13,
"throughput": 0.87,
"latency_mean": 1186.5381284615385,
"latency_std": 161.4237069238961,
"latency_50": 1269.670847,
"latency_90": 1355.9492604000002,
"latency_95": 1390.1450375999998,
"latency_99": 1411.6607203199999,
"latency_999": 1416.5017489319998
},
"optimized": {
"nb_forwards": 17,
"throughput": 1.13,
"latency_mean": 934.0936628235294,
"latency_std": 14.582536342073604,
"latency_50": 935.050755,
"latency_90": 952.4674898,
"latency_95": 957.0302138,
"latency_99": 962.9052563600001,
"latency_999": 964.227140936
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 76,
"throughput": 5.07,
"latency_mean": 198.5647816973684,
"latency_std": 3.4715584501636134,
"latency_50": 198.506581,
"latency_90": 200.2144805,
"latency_95": 200.55136775,
"latency_99": 207.540032,
"latency_999": 223.25337049999993
},
"optimized": {
"nb_forwards": 93,
"throughput": 6.2,
"latency_mean": 162.5132122580645,
"latency_std": 0.513116818309582,
"latency_50": 162.371852,
"latency_90": 163.2859852,
"latency_95": 163.7528158,
"latency_99": 163.85883275999998,
"latency_999": 163.915920876
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 13,
"throughput": 0.87,
"latency_mean": 1173.0143999230768,
"latency_std": 130.54371262227698,
"latency_50": 1260.605467,
"latency_90": 1296.8186462,
"latency_95": 1308.2374858,
"latency_99": 1319.8486531600001,
"latency_999": 1322.461165816
},
"optimized": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 995.22072925,
"latency_std": 129.45556415437287,
"latency_50": 946.254189,
"latency_90": 1213.3328715,
"latency_95": 1315.33706625,
"latency_99": 1318.61972205,
"latency_999": 1319.358319605
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 29,
"throughput": 1.93,
"latency_mean": 521.0909943793104,
"latency_std": 46.475380124669094,
"latency_50": 506.281592,
"latency_90": 544.5816703999999,
"latency_95": 656.4042,
"latency_99": 658.84548128,
"latency_999": 659.635381328
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 472.8995814375,
"latency_std": 7.852107004932393,
"latency_50": 471.6044875,
"latency_90": 483.1297946,
"latency_95": 485.7553554,
"latency_99": 490.66026108,
"latency_999": 492.539614008
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 97,
"throughput": 6.47,
"latency_mean": 156.2440793814433,
"latency_std": 1.9840595219565056,
"latency_50": 156.225903,
"latency_90": 158.593671,
"latency_95": 159.64843179999997,
"latency_99": 161.01634776,
"latency_999": 161.629544976
},
"optimized": {
"nb_forwards": 93,
"throughput": 6.2,
"latency_mean": 162.63202710752688,
"latency_std": 0.7398820002769471,
"latency_50": 162.382878,
"latency_90": 163.62236180000002,
"latency_95": 163.8380992,
"latency_99": 164.88131604,
"latency_999": 165.77600150400002
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 24,
"throughput": 1.6,
"latency_mean": 642.4793479166666,
"latency_std": 41.85394569042994,
"latency_50": 654.998135,
"latency_90": 656.7839087000001,
"latency_95": 657.02353375,
"latency_99": 657.8482725700001,
"latency_999": 658.060870057
},
"optimized": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 666.2755957826088,
"latency_std": 1.7410908635173166,
"latency_50": 666.717442,
"latency_90": 668.0602384,
"latency_95": 668.910585,
"latency_99": 669.7671928,
"latency_999": 669.9619772799999
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 75,
"throughput": 5.0,
"latency_mean": 201.75692086666666,
"latency_std": 1.8320009608554637,
"latency_50": 201.71631,
"latency_90": 204.2971264,
"latency_95": 204.8295851,
"latency_99": 206.53276574,
"latency_999": 206.55384397400002
},
"optimized": {
"nb_forwards": 115,
"throughput": 7.67,
"latency_mean": 131.24782122608696,
"latency_std": 4.242274517523887,
"latency_50": 129.902701,
"latency_90": 138.24299720000002,
"latency_95": 141.03294480000002,
"latency_99": 142.079643,
"latency_999": 142.57829744400001
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1263.2922429166667,
"latency_std": 77.36389645216727,
"latency_50": 1278.0368575,
"latency_90": 1328.6916195,
"latency_95": 1342.47310885,
"latency_99": 1353.1162041700002,
"latency_999": 1355.510900617
},
"optimized": {
"nb_forwards": 15,
"throughput": 1.0,
"latency_mean": 1012.6554306,
"latency_std": 156.31890427073202,
"latency_50": 934.872073,
"latency_90": 1322.7367448,
"latency_95": 1323.9433467000001,
"latency_99": 1325.30759014,
"latency_999": 1325.6145449140001
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.508\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 30,
"throughput": 2.0,
"latency_mean": 504.13991656666667,
"latency_std": 4.513179886057779,
"latency_50": 503.19925,
"latency_90": 511.38798610000003,
"latency_95": 512.42949615,
"latency_99": 513.05507925,
"latency_999": 513.2059829250001
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 217.46600939130434,
"latency_std": 2.972318261728456,
"latency_50": 217.184814,
"latency_90": 220.705846,
"latency_95": 222.9682218,
"latency_99": 224.70305839999997,
"latency_999": 226.06056314000003
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 79,
"throughput": 5.27,
"latency_mean": 191.43747164556962,
"latency_std": 15.655307063217192,
"latency_50": 198.595354,
"latency_90": 200.57276240000002,
"latency_95": 200.87837530000002,
"latency_99": 201.16657547999998,
"latency_999": 201.356981748
},
"optimized": {
"nb_forwards": 238,
"throughput": 15.87,
"latency_mean": 63.268969394957985,
"latency_std": 0.9427228875615695,
"latency_50": 63.1408055,
"latency_90": 64.2273272,
"latency_95": 65.1072349,
"latency_99": 66.96859157,
"latency_999": 67.69891359200001
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 968.852768375,
"latency_std": 38.04343260757334,
"latency_50": 952.2546675,
"latency_90": 1038.7362795,
"latency_95": 1054.559503,
"latency_99": 1057.6485166,
"latency_999": 1058.34354466
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 537.6509522857143,
"latency_std": 3.8349742843783643,
"latency_50": 536.4832135,
"latency_90": 541.9351384,
"latency_95": 546.1624122000001,
"latency_99": 550.43430736,
"latency_999": 551.363741536
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1278.7261375,
"latency_std": 13.900041175798188,
"latency_50": 1275.258701,
"latency_90": 1298.9398739,
"latency_95": 1304.62491295,
"latency_99": 1308.62324179,
"latency_999": 1309.522865779
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 537.52356,
"latency_std": 3.4322833452073542,
"latency_50": 536.6087305,
"latency_90": 542.7677001000001,
"latency_95": 545.60897155,
"latency_99": 546.32498686,
"latency_999": 546.4325099859999
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 655.3645512173913,
"latency_std": 1.9257989113526919,
"latency_50": 654.646442,
"latency_90": 657.282423,
"latency_95": 659.662086,
"latency_99": 660.8313084800001,
"latency_999": 661.061189648
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 219.61360882608696,
"latency_std": 10.272944961242715,
"latency_50": 217.050361,
"latency_90": 221.35356240000002,
"latency_95": 244.56632359999978,
"latency_99": 260.41507764,
"latency_999": 261.058168464
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 654.7864544347826,
"latency_std": 1.3045747652970006,
"latency_50": 654.519817,
"latency_90": 655.7390292,
"latency_95": 656.9679398,
"latency_99": 659.0271437,
"latency_999": 659.51908757
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 217.45058371014494,
"latency_std": 2.719898318573885,
"latency_50": 217.033952,
"latency_90": 221.1297136,
"latency_95": 222.2854166,
"latency_99": 223.81499584,
"latency_999": 224.26408878400002
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 15,
"throughput": 1.0,
"latency_mean": 1033.3692992666668,
"latency_std": 121.20856568749772,
"latency_50": 963.132488,
"latency_90": 1262.2027165999998,
"latency_95": 1265.428772,
"latency_99": 1270.8415024,
"latency_999": 1272.05936674
},
"optimized": {
"nb_forwards": 35,
"throughput": 2.33,
"latency_mean": 433.3171128,
"latency_std": 5.620352976190466,
"latency_50": 432.092966,
"latency_90": 439.51550460000004,
"latency_95": 442.0759091,
"latency_99": 449.04011729999996,
"latency_999": 451.02373323
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 75,
"throughput": 5.0,
"latency_mean": 200.45710213333334,
"latency_std": 0.912546986602039,
"latency_50": 200.443548,
"latency_90": 201.5305846,
"latency_95": 202.211401,
"latency_99": 202.77304016,
"latency_999": 202.841815316
},
"optimized": {
"nb_forwards": 239,
"throughput": 15.93,
"latency_mean": 62.95156658158996,
"latency_std": 0.6494732254002147,
"latency_50": 62.920241,
"latency_90": 63.867974,
"latency_95": 64.1582552,
"latency_99": 64.69974346000001,
"latency_999": 64.96208175
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 82,
"throughput": 5.47,
"latency_mean": 183.10327674390246,
"latency_std": 21.77607771500049,
"latency_50": 197.6686845,
"latency_90": 202.83920880000002,
"latency_95": 203.04726945,
"latency_99": 204.23605759999998,
"latency_999": 204.50013056
},
"optimized": {
"nb_forwards": 235,
"throughput": 15.67,
"latency_mean": 64.02330576170213,
"latency_std": 1.559037537525157,
"latency_50": 63.56593,
"latency_90": 66.4842204,
"latency_95": 67.527317,
"latency_99": 68.7520603,
"latency_999": 70.43527871799999
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.359\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 13,
"throughput": 0.87,
"latency_mean": 1188.9833955384615,
"latency_std": 156.3766424391915,
"latency_50": 1271.045981,
"latency_90": 1320.508096,
"latency_95": 1351.0301161999998,
"latency_99": 1381.7705920399999,
"latency_999": 1388.6871991039998
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 537.5845056428572,
"latency_std": 24.479382275992702,
"latency_50": 544.233773,
"latency_90": 549.9048538999999,
"latency_95": 550.3655834,
"latency_99": 551.47189197,
"latency_999": 551.817216597
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1294.067229,
"latency_std": 42.49668221169809,
"latency_50": 1276.250713,
"latency_90": 1376.546508,
"latency_95": 1386.4398829000002,
"latency_99": 1387.82451098,
"latency_999": 1388.136052298
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 472.54393821875,
"latency_std": 44.30277356434995,
"latency_50": 448.9115105,
"latency_90": 538.9957706,
"latency_95": 539.2858448,
"latency_99": 541.8835004299999,
"latency_999": 542.8368844429999
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 75,
"throughput": 5.0,
"latency_mean": 200.50050148,
"latency_std": 0.8285022604698893,
"latency_50": 200.26392,
"latency_90": 201.65092280000002,
"latency_95": 202.4235563,
"latency_99": 202.9026252,
"latency_999": 203.01259512000001
},
"optimized": {
"nb_forwards": 239,
"throughput": 15.93,
"latency_mean": 62.99670253974895,
"latency_std": 0.6391249990225535,
"latency_50": 62.961666,
"latency_90": 63.847294399999996,
"latency_95": 64.0924426,
"latency_99": 64.62881584,
"latency_999": 64.996266822
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1273.6051258333332,
"latency_std": 12.303101937934832,
"latency_50": 1270.9759965,
"latency_90": 1294.2611932,
"latency_95": 1297.39174085,
"latency_99": 1299.14183777,
"latency_999": 1299.5356095769998
},
"optimized": {
"nb_forwards": 35,
"throughput": 2.33,
"latency_mean": 435.2721461714286,
"latency_std": 5.5886169074680705,
"latency_50": 434.611038,
"latency_90": 441.9488788,
"latency_95": 444.0377945,
"latency_99": 447.65647636,
"latency_999": 448.09250983600003
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 656.8662442608696,
"latency_std": 1.963675172824631,
"latency_50": 656.523145,
"latency_90": 658.1741306,
"latency_95": 658.2483522,
"latency_99": 663.4018644600001,
"latency_999": 664.7094380460001
},
"optimized": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 216.32413269999998,
"latency_std": 2.573320955654004,
"latency_50": 216.1438355,
"latency_90": 219.18598509999998,
"latency_95": 220.69771815000001,
"latency_99": 223.89661489000002,
"latency_999": 226.28716618899998
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 654.2367223043478,
"latency_std": 1.3812737872427574,
"latency_50": 653.93458,
"latency_90": 655.7302913999999,
"latency_95": 656.3990489,
"latency_99": 658.70954416,
"latency_999": 659.2843920160001
},
"optimized": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 216.82448075714288,
"latency_std": 2.4549019943848234,
"latency_50": 216.682762,
"latency_90": 219.8229577,
"latency_95": 221.14088784999998,
"latency_99": 222.15547067000003,
"latency_999": 223.170220067
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 93,
"throughput": 6.2,
"latency_mean": 162.75067422580645,
"latency_std": 16.670443274832795,
"latency_50": 155.50524,
"latency_90": 197.1873276,
"latency_95": 198.3428246,
"latency_99": 199.21549572,
"latency_999": 199.303705872
},
"optimized": {
"nb_forwards": 224,
"throughput": 14.93,
"latency_mean": 67.17887016071428,
"latency_std": 3.767553337517106,
"latency_50": 69.853314,
"latency_90": 70.8704738,
"latency_95": 71.48267955,
"latency_99": 72.17330481,
"latency_999": 73.872151108
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 94,
"throughput": 6.27,
"latency_mean": 160.5772765319149,
"latency_std": 13.789324382626834,
"latency_50": 155.949789,
"latency_90": 192.97832880000007,
"latency_95": 199.62176935,
"latency_99": 200.06097094,
"latency_999": 200.418572494
},
"optimized": {
"nb_forwards": 222,
"throughput": 14.8,
"latency_mean": 67.64596318918919,
"latency_std": 3.8012002735511974,
"latency_50": 70.4326435,
"latency_90": 71.0351658,
"latency_95": 71.17170809999999,
"latency_99": 71.37054176000001,
"latency_999": 71.686580853
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 30,
"throughput": 2.0,
"latency_mean": 503.8281891666667,
"latency_std": 3.79639368455217,
"latency_50": 503.337252,
"latency_90": 509.18891210000004,
"latency_95": 509.43762219999996,
"latency_99": 511.16002555,
"latency_999": 511.72013285500003
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 219.61836788405796,
"latency_std": 6.812193992706365,
"latency_50": 216.499815,
"latency_90": 231.0559726,
"latency_95": 233.1648294,
"latency_99": 239.13513347999998,
"latency_999": 240.964685448
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.080\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1270.30411025,
"latency_std": 8.657521693333212,
"latency_50": 1269.1937825,
"latency_90": 1281.7940697000001,
"latency_95": 1283.1798158,
"latency_99": 1284.16803116,
"latency_999": 1284.390379616
},
"optimized": {
"nb_forwards": 14,
"throughput": 0.93,
"latency_mean": 1092.104026,
"latency_std": 192.38390460063763,
"latency_50": 942.50711,
"latency_90": 1317.2986979000002,
"latency_95": 1317.7165482999999,
"latency_99": 1317.88203206,
"latency_999": 1317.919265906
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 97,
"throughput": 6.47,
"latency_mean": 155.31766736082474,
"latency_std": 2.58376281346955,
"latency_50": 154.80961,
"latency_90": 158.62567140000002,
"latency_95": 159.8607244,
"latency_99": 162.57999983999997,
"latency_999": 166.451882784
},
"optimized": {
"nb_forwards": 115,
"throughput": 7.67,
"latency_mean": 130.7428943826087,
"latency_std": 4.439134079400091,
"latency_50": 129.426836,
"latency_90": 139.1114094,
"latency_95": 140.0790016,
"latency_99": 141.99444142,
"latency_999": 143.111380738
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 953.2654264375,
"latency_std": 52.43131265130536,
"latency_50": 939.297859,
"latency_90": 953.7382385,
"latency_95": 1007.0155795,
"latency_99": 1124.8867758999997,
"latency_999": 1151.4077950899998
},
"optimized": {
"nb_forwards": 17,
"throughput": 1.13,
"latency_mean": 926.1379787647058,
"latency_std": 10.221038220575734,
"latency_50": 925.067649,
"latency_90": 942.8075994,
"latency_95": 943.8492997999999,
"latency_99": 944.12823036,
"latency_999": 944.1909897359999
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 656.297728173913,
"latency_std": 0.9507206548680598,
"latency_50": 656.379836,
"latency_90": 657.2358052000001,
"latency_95": 657.2469835,
"latency_99": 657.8591597000001,
"latency_999": 658.01437487
},
"optimized": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 652.7235733913044,
"latency_std": 1.4541608709475486,
"latency_50": 652.974982,
"latency_90": 654.6616273999999,
"latency_95": 654.7420641,
"latency_99": 655.24253678,
"latency_999": 655.3693460779999
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1265.5476989166668,
"latency_std": 3.289433212667684,
"latency_50": 1263.5434205,
"latency_90": 1269.744916,
"latency_95": 1270.16758755,
"latency_99": 1270.5304551099998,
"latency_999": 1270.6121003110002
},
"optimized": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1304.50654875,
"latency_std": 2.6884396280606753,
"latency_50": 1303.907755,
"latency_90": 1307.1289129000002,
"latency_95": 1309.2907671500002,
"latency_99": 1311.21018223,
"latency_999": 1311.642050623
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 97,
"throughput": 6.47,
"latency_mean": 155.8433487113402,
"latency_std": 2.5029692630646236,
"latency_50": 155.989606,
"latency_90": 158.707679,
"latency_95": 159.601676,
"latency_99": 161.73731819999998,
"latency_999": 162.09523452000002
},
"optimized": {
"nb_forwards": 115,
"throughput": 7.67,
"latency_mean": 130.9514579652174,
"latency_std": 8.81473257206767,
"latency_50": 128.898162,
"latency_90": 131.6082024,
"latency_95": 163.16474219999998,
"latency_99": 163.3782847,
"latency_999": 163.563403254
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 653.9863154782608,
"latency_std": 0.7289228494064774,
"latency_50": 653.794938,
"latency_90": 654.6420688,
"latency_95": 655.7360347,
"latency_99": 656.03099652,
"latency_999": 656.0760086519999
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 479.06163646875,
"latency_std": 19.59131442173658,
"latency_50": 474.2416765,
"latency_90": 511.8005115,
"latency_95": 519.3751149,
"latency_99": 526.80620621,
"latency_999": 527.546256221
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 76,
"throughput": 5.07,
"latency_mean": 197.58542217105264,
"latency_std": 0.9013285926255195,
"latency_50": 197.396874,
"latency_90": 198.8574485,
"latency_95": 199.188459,
"latency_99": 200.012024,
"latency_999": 201.7015409
},
"optimized": {
"nb_forwards": 117,
"throughput": 7.8,
"latency_mean": 128.7953315897436,
"latency_std": 1.7882824907070387,
"latency_50": 128.806478,
"latency_90": 131.2709188,
"latency_95": 131.67982,
"latency_99": 132.40365796,
"latency_999": 132.676468532
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 654.1969950434783,
"latency_std": 1.1220804198391827,
"latency_50": 654.001645,
"latency_90": 655.7840166,
"latency_95": 656.016458,
"latency_99": 656.36563072,
"latency_999": 656.447835172
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 481.61485321875,
"latency_std": 33.724130139913434,
"latency_50": 473.2563825,
"latency_90": 504.5333757,
"latency_95": 520.7592738999999,
"latency_99": 612.8124424300001,
"latency_999": 644.3983803430002
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": true,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 2594.420\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 96,
"throughput": 6.4,
"latency_mean": 157.21188266666667,
"latency_std": 4.2296673831318135,
"latency_50": 156.0635645,
"latency_90": 164.39148,
"latency_95": 168.00414375,
"latency_99": 169.24721239999997,
"latency_999": 172.15546754
},
"optimized": {
"nb_forwards": 234,
"throughput": 15.6,
"latency_mean": 64.15645528205128,
"latency_std": 0.8760033202077914,
"latency_50": 64.078188,
"latency_90": 65.0844361,
"latency_95": 65.42577375,
"latency_99": 66.03388837,
"latency_999": 70.50518821699998
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 655.935887826087,
"latency_std": 1.5184807551034185,
"latency_50": 656.150496,
"latency_90": 657.7225057999999,
"latency_95": 658.3034359,
"latency_99": 658.49345218,
"latency_999": 658.5271777180001
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 220.12218289855073,
"latency_std": 2.8124226950487565,
"latency_50": 220.269108,
"latency_90": 223.81522719999998,
"latency_95": 224.73072179999997,
"latency_99": 226.68743828,
"latency_999": 227.86304682800002
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 14,
"throughput": 0.93,
"latency_mean": 1107.6612250714286,
"latency_std": 153.3041188785755,
"latency_50": 1068.835057,
"latency_90": 1280.4905781,
"latency_95": 1289.3787047,
"latency_99": 1300.66569054,
"latency_999": 1303.205262354
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 472.52847371875,
"latency_std": 41.051488900475434,
"latency_50": 449.4546335,
"latency_90": 548.263878,
"latency_95": 549.79502655,
"latency_99": 552.90739222,
"latency_999": 553.856337622
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 25,
"throughput": 1.67,
"latency_mean": 613.27841144,
"latency_std": 65.27331298428419,
"latency_50": 656.540737,
"latency_90": 661.4057177999999,
"latency_95": 661.429921,
"latency_99": 661.6534559600001,
"latency_999": 661.716672896
},
"optimized": {
"nb_forwards": 68,
"throughput": 4.53,
"latency_mean": 220.9577393235294,
"latency_std": 2.202118334734023,
"latency_50": 220.84368,
"latency_90": 223.2997182,
"latency_95": 224.4097824,
"latency_99": 226.46243421,
"latency_999": 227.056110021
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 1005.3757105,
"latency_std": 110.08740222252929,
"latency_50": 951.764254,
"latency_90": 1173.9785045,
"latency_95": 1275.012984,
"latency_99": 1277.4162648,
"latency_999": 1277.95700298
},
"optimized": {
"nb_forwards": 35,
"throughput": 2.33,
"latency_mean": 440.0128511142857,
"latency_std": 6.045956719281095,
"latency_50": 440.399668,
"latency_90": 446.0546886,
"latency_95": 449.1048619,
"latency_99": 454.09199782,
"latency_999": 455.953342882
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 77,
"throughput": 5.13,
"latency_mean": 195.38825248051947,
"latency_std": 15.566776674930802,
"latency_50": 201.26664,
"latency_90": 203.9999776,
"latency_95": 204.9478114,
"latency_99": 208.10657124,
"latency_999": 211.47291212400003
},
"optimized": {
"nb_forwards": 208,
"throughput": 13.87,
"latency_mean": 72.28192877884617,
"latency_std": 0.30617482097915,
"latency_50": 72.2594515,
"latency_90": 72.687902,
"latency_95": 72.8431132,
"latency_99": 73.03046864,
"latency_999": 73.322488771
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 97,
"throughput": 6.47,
"latency_mean": 154.83997250515463,
"latency_std": 1.6989375176627797,
"latency_50": 154.770658,
"latency_90": 156.874254,
"latency_95": 157.6805368,
"latency_99": 158.86760128,
"latency_999": 159.947736928
},
"optimized": {
"nb_forwards": 207,
"throughput": 13.8,
"latency_mean": 72.51127445410629,
"latency_std": 0.27142401425407037,
"latency_50": 72.511206,
"latency_90": 72.86571620000001,
"latency_95": 72.9502418,
"latency_99": 73.14818106,
"latency_999": 73.181334838
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 15,
"throughput": 1.0,
"latency_mean": 1030.1399602666665,
"latency_std": 124.65913381994983,
"latency_50": 955.359583,
"latency_90": 1262.7360772,
"latency_95": 1268.8635989000002,
"latency_99": 1279.42059738,
"latency_999": 1281.795922038
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 542.358436,
"latency_std": 5.562261406290527,
"latency_50": 540.1506655,
"latency_90": 552.4821344,
"latency_95": 552.7215868999999,
"latency_99": 558.17499099,
"latency_999": 559.9655274989999
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 654.3281370434783,
"latency_std": 1.8605606203681557,
"latency_50": 653.634216,
"latency_90": 656.6254864,
"latency_95": 658.6546205,
"latency_99": 660.00098088,
"latency_999": 660.294841788
},
"optimized": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 219.54447333333334,
"latency_std": 2.7890297777760664,
"latency_50": 219.354462,
"latency_90": 223.2168158,
"latency_95": 224.12707319999998,
"latency_99": 225.20001263999998,
"latency_999": 225.522583764
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": true,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.006\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 981.8066059375,
"latency_std": 44.423175785791656,
"latency_50": 968.222589,
"latency_90": 1030.6252395,
"latency_95": 1051.9451035,
"latency_99": 1099.3337503,
"latency_999": 1109.99619583
},
"optimized": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 940.686023875,
"latency_std": 34.50079570717892,
"latency_50": 932.533706,
"latency_90": 954.537913,
"latency_95": 982.67146125,
"latency_99": 1049.86251225,
"latency_999": 1064.980498725
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 653.6214047826087,
"latency_std": 0.554367349056088,
"latency_50": 653.56593,
"latency_90": 654.3410592,
"latency_95": 654.4254941,
"latency_99": 654.86828346,
"latency_999": 654.978897546
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 468.82132275,
"latency_std": 6.479021216722283,
"latency_50": 468.046428,
"latency_90": 477.1373799,
"latency_95": 478.81192385,
"latency_99": 481.5715929,
"latency_999": 482.31672099
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 76,
"throughput": 5.07,
"latency_mean": 199.75279011842105,
"latency_std": 1.1029265450656718,
"latency_50": 199.5758745,
"latency_90": 200.400115,
"latency_95": 200.559785,
"latency_99": 202.84980075,
"latency_999": 207.81083137499996
},
"optimized": {
"nb_forwards": 115,
"throughput": 7.67,
"latency_mean": 131.4627194347826,
"latency_std": 4.6871974959568545,
"latency_50": 130.19088,
"latency_90": 139.359352,
"latency_95": 141.0757014,
"latency_99": 143.8943466,
"latency_999": 150.29109535799998
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 661.8593041304348,
"latency_std": 11.283099142573517,
"latency_50": 655.43737,
"latency_90": 681.5352313999999,
"latency_95": 682.9110206,
"latency_99": 683.9518636399999,
"latency_999": 684.184243964
},
"optimized": {
"nb_forwards": 32,
"throughput": 2.13,
"latency_mean": 472.49058509375,
"latency_std": 8.963136445787091,
"latency_50": 474.708593,
"latency_90": 483.1333381,
"latency_95": 485.4448544,
"latency_99": 488.61856141000004,
"latency_999": 489.732493741
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1283.6983385,
"latency_std": 22.13170499191711,
"latency_50": 1275.341306,
"latency_90": 1321.6280871,
"latency_95": 1324.7206230499999,
"latency_99": 1326.57760301,
"latency_999": 1326.995423501
},
"optimized": {
"nb_forwards": 17,
"throughput": 1.13,
"latency_mean": 927.8742942352941,
"latency_std": 13.192113526204107,
"latency_50": 928.112074,
"latency_90": 943.312943,
"latency_95": 947.7967182000001,
"latency_99": 950.06144204,
"latency_999": 950.571004904
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 92,
"throughput": 6.13,
"latency_mean": 163.7631747173913,
"latency_std": 16.656417748995572,
"latency_50": 155.936432,
"latency_90": 197.9161219,
"latency_95": 199.74363870000002,
"latency_99": 201.92121115999998,
"latency_999": 203.62556981600002
},
"optimized": {
"nb_forwards": 116,
"throughput": 7.73,
"latency_mean": 129.6231810086207,
"latency_std": 1.9548923667657356,
"latency_50": 129.4212125,
"latency_90": 132.2460935,
"latency_95": 132.81225825,
"latency_99": 133.86391385,
"latency_999": 138.651827795
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1286.5124550833332,
"latency_std": 23.2357907207992,
"latency_50": 1276.5111835,
"latency_90": 1323.3058475999999,
"latency_95": 1327.5837783,
"latency_99": 1330.1992940599998,
"latency_999": 1330.787785106
},
"optimized": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1317.95578525,
"latency_std": 1.490128618702992,
"latency_50": 1317.6128805,
"latency_90": 1320.2002315,
"latency_95": 1320.6199098,
"latency_99": 1320.96551396,
"latency_999": 1321.043274896
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 666.9763079565217,
"latency_std": 11.149385297770825,
"latency_50": 661.47222,
"latency_90": 684.7818294,
"latency_95": 685.6954787000001,
"latency_99": 686.6990396799999,
"latency_999": 686.939640568
},
"optimized": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 655.3178510869565,
"latency_std": 2.77492679226272,
"latency_50": 655.00068,
"latency_90": 659.4658294,
"latency_95": 660.3613685,
"latency_99": 660.48331564,
"latency_999": 660.4916490640001
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 86,
"throughput": 5.73,
"latency_mean": 176.08157447674418,
"latency_std": 20.32932776355446,
"latency_50": 167.453177,
"latency_90": 197.6717735,
"latency_95": 198.75702125,
"latency_99": 199.23389685,
"latency_999": 199.447866585
},
"optimized": {
"nb_forwards": 93,
"throughput": 6.2,
"latency_mean": 161.78841179569892,
"latency_std": 0.2689887068096146,
"latency_50": 161.820936,
"latency_90": 162.0967684,
"latency_95": 162.21570319999998,
"latency_99": 162.39115031999998,
"latency_999": 162.649013532
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
},
{
"model_name_or_path": "nateraw/vit-base-beans",
"task": "image-classification",
"task_args": null,
"dataset": {
"path": "beans",
"eval_split": "validation",
"data_keys": {
"primary": "image",
"secondary": null
},
"ref_keys": [
"labels"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": true,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.350\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 13,
"throughput": 0.87,
"latency_mean": 1157.0002205384615,
"latency_std": 173.96892806199435,
"latency_50": 1268.399731,
"latency_90": 1373.240541,
"latency_95": 1394.8925212000001,
"latency_99": 1395.4346634400001,
"latency_999": 1395.556645444
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 542.74753675,
"latency_std": 1.5526347456374558,
"latency_50": 542.544525,
"latency_90": 544.3944952,
"latency_95": 545.1945222999999,
"latency_99": 547.32045758,
"latency_999": 547.9295482580001
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 76,
"throughput": 5.07,
"latency_mean": 198.1925660394737,
"latency_std": 1.5139423529661107,
"latency_50": 198.3774705,
"latency_90": 199.9859375,
"latency_95": 200.47868375,
"latency_99": 200.94235025,
"latency_999": 201.309621125
},
"optimized": {
"nb_forwards": 207,
"throughput": 13.8,
"latency_mean": 72.64656536231884,
"latency_std": 0.5352263228340901,
"latency_50": 72.569523,
"latency_90": 73.2376486,
"latency_95": 73.53506229999999,
"latency_99": 74.89236506,
"latency_999": 76.09928199800001
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 12,
"throughput": 0.8,
"latency_mean": 1287.5803281666667,
"latency_std": 36.91340143279409,
"latency_50": 1269.6482595,
"latency_90": 1303.1859430999998,
"latency_95": 1346.71807165,
"latency_99": 1388.7522119300002,
"latency_999": 1398.209893493
},
"optimized": {
"nb_forwards": 28,
"throughput": 1.87,
"latency_mean": 542.7170135714285,
"latency_std": 1.3567039497139863,
"latency_50": 542.287221,
"latency_90": 544.4592137999999,
"latency_95": 545.4364649,
"latency_99": 546.21864882,
"latency_999": 546.315030882
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 30,
"throughput": 2.0,
"latency_mean": 507.2400474666666,
"latency_std": 4.290148550841821,
"latency_50": 507.659231,
"latency_90": 511.6910329,
"latency_95": 514.4643897,
"latency_99": 516.23375722,
"latency_999": 516.269666122
},
"optimized": {
"nb_forwards": 57,
"throughput": 3.8,
"latency_mean": 265.6226849122807,
"latency_std": 2.3036455255332684,
"latency_50": 265.179855,
"latency_90": 269.8071572,
"latency_95": 271.4136062,
"latency_99": 271.69196852,
"latency_999": 271.82992995200004
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 25,
"throughput": 1.67,
"latency_mean": 603.25719464,
"latency_std": 66.25874091678149,
"latency_50": 654.828359,
"latency_90": 660.8697482,
"latency_95": 661.393019,
"latency_99": 661.8344047999999,
"latency_999": 661.93484588
},
"optimized": {
"nb_forwards": 57,
"throughput": 3.8,
"latency_mean": 266.1465179649123,
"latency_std": 2.133106034837133,
"latency_50": 265.532055,
"latency_90": 269.2449408,
"latency_95": 270.3573558,
"latency_99": 271.02784404000005,
"latency_999": 271.716555504
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 95,
"throughput": 6.33,
"latency_mean": 159.68801769473683,
"latency_std": 12.773363897780701,
"latency_50": 155.413717,
"latency_90": 176.90005560000023,
"latency_95": 197.0968159,
"latency_99": 198.07615253999998,
"latency_999": 198.10961945399998
},
"optimized": {
"nb_forwards": 207,
"throughput": 13.8,
"latency_mean": 72.7664098888889,
"latency_std": 0.38816228058385593,
"latency_50": 72.718874,
"latency_90": 73.24516159999999,
"latency_95": 73.43794679999999,
"latency_99": 73.8501665,
"latency_999": 73.996995134
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 23,
"throughput": 1.53,
"latency_mean": 657.163290173913,
"latency_std": 1.3013676808424233,
"latency_50": 657.058665,
"latency_90": 657.6071907999999,
"latency_95": 659.0426878,
"latency_99": 661.57735552,
"latency_999": 662.180470252
},
"optimized": {
"nb_forwards": 63,
"throughput": 4.2,
"latency_mean": 240.11091615873016,
"latency_std": 21.14828674924693,
"latency_50": 226.793935,
"latency_90": 269.1414722,
"latency_95": 270.4462369,
"latency_99": 279.1629919800001,
"latency_999": 290.00248849800005
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 95,
"throughput": 6.33,
"latency_mean": 158.60142149473685,
"latency_std": 11.827907365032553,
"latency_50": 155.594958,
"latency_90": 159.5463688,
"latency_95": 198.4467831,
"latency_99": 201.84369322,
"latency_999": 202.49142212200002
},
"optimized": {
"nb_forwards": 207,
"throughput": 13.8,
"latency_mean": 72.53135863285024,
"latency_std": 0.4182249154784389,
"latency_50": 72.447941,
"latency_90": 73.07772,
"latency_95": 73.2243443,
"latency_99": 73.8286485,
"latency_999": 73.97194594400001
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 16,
"throughput": 1.07,
"latency_mean": 951.313504375,
"latency_std": 6.690910380125991,
"latency_50": 951.8069655,
"latency_90": 958.854554,
"latency_95": 960.087836,
"latency_99": 962.6794064,
"latency_999": 963.26250974
},
"optimized": {
"nb_forwards": 31,
"throughput": 2.07,
"latency_mean": 489.4035592258065,
"latency_std": 45.5667283182196,
"latency_50": 469.093915,
"latency_90": 553.663101,
"latency_95": 555.243522,
"latency_99": 556.1204486,
"latency_999": 556.42928486
}
}
],
"others": {
"baseline": {
"accuracy": 0.98
},
"optimized": {
"accuracy": 0.98
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "vit"
}
]