diff --git "a/runs.json" "b/runs.json" new file mode 100644--- /dev/null +++ "b/runs.json" @@ -0,0 +1,2390 @@ +[ + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3104.203\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 1007.1949454375, + "latency_std": 78.84058358158838, + "latency_50": 985.576602, + "latency_90": 1059.7841785, + "latency_95": 1145.035921, + "latency_99": 1250.375245, + "latency_999": 1274.0765929 + }, + "optimized": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1331.11930825, + "latency_std": 2.6333668250894813, + "latency_50": 1331.341538, + "latency_90": 1333.9482434000001, + "latency_95": 1334.2215575, + "latency_99": 1334.4844530999999, + "latency_999": 1334.5436046099999 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 81, + "throughput": 5.4, + "latency_mean": 186.5835592716049, + "latency_std": 20.00760723775789, + "latency_50": 198.81694, + "latency_90": 201.024797, + "latency_95": 201.779275, + "latency_99": 202.413402, + "latency_999": 202.5442512 + }, + "optimized": { + "nb_forwards": 92, + "throughput": 6.13, + "latency_mean": 163.38389757608698, + "latency_std": 0.3275123768495738, + "latency_50": 163.3941135, + "latency_90": 163.7525755, + "latency_95": 163.926186, + "latency_99": 164.34097907, + "latency_999": 164.45778730700002 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 76, + "throughput": 5.07, + "latency_mean": 198.9630147368421, + "latency_std": 1.8034042650036124, + "latency_50": 198.5655705, + "latency_90": 200.2815155, + "latency_95": 203.1906375, + "latency_99": 206.4380845, + "latency_999": 206.66971884999998 + }, + "optimized": { + "nb_forwards": 117, + "throughput": 7.8, + "latency_mean": 128.82420041880343, + "latency_std": 1.701156916298848, + "latency_50": 128.623607, + "latency_90": 131.323657, + "latency_95": 131.90711579999999, + "latency_99": 132.77440764, + "latency_999": 133.447005692 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 655.365458, + "latency_std": 0.6236008690322891, + "latency_50": 655.34975, + "latency_90": 656.1519954, + "latency_95": 656.2574049, + "latency_99": 656.3535554199999, + "latency_999": 656.375521342 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 473.76923496875, + "latency_std": 7.2747277092183165, + "latency_50": 472.62492, + "latency_90": 483.41045330000003, + "latency_95": 484.61633445, + "latency_99": 489.37835364, + "latency_999": 490.82021336400004 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 96, + "throughput": 6.4, + "latency_mean": 157.24992970833335, + "latency_std": 4.732553962424211, + "latency_50": 155.406953, + "latency_90": 164.713482, + "latency_95": 167.8590275, + "latency_99": 170.8462496, + "latency_999": 171.27275096 + }, + "optimized": { + "nb_forwards": 117, + "throughput": 7.8, + "latency_mean": 129.13340215384616, + "latency_std": 2.1864167617663997, + "latency_50": 128.928664, + "latency_90": 131.8907006, + "latency_95": 132.5507548, + "latency_99": 135.60957616, + "latency_999": 136.973290652 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1280.0728285, + "latency_std": 15.395205966597677, + "latency_50": 1270.8922015, + "latency_90": 1298.3091468, + "latency_95": 1300.7668938, + "latency_99": 1303.06202356, + "latency_999": 1303.578427756 + }, + "optimized": { + "nb_forwards": 17, + "throughput": 1.13, + "latency_mean": 926.9665002352941, + "latency_std": 9.31210282206815, + "latency_50": 930.379688, + "latency_90": 937.0084018, + "latency_95": 939.6581278, + "latency_99": 941.59419436, + "latency_999": 942.029809336 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 656.3433247826088, + "latency_std": 0.8294053936962497, + "latency_50": 656.248342, + "latency_90": 657.3568842000001, + "latency_95": 657.4566738999999, + "latency_99": 658.36745674, + "latency_999": 658.596371074 + }, + "optimized": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 661.2441610869565, + "latency_std": 1.5993679984326394, + "latency_50": 661.382511, + "latency_90": 662.625988, + "latency_95": 663.5314744, + "latency_99": 665.2728648, + "latency_999": 665.69171598 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1276.3800261666668, + "latency_std": 14.72110021637953, + "latency_50": 1268.308021, + "latency_90": 1294.9160152, + "latency_95": 1300.88363675, + "latency_99": 1306.34986895, + "latency_999": 1307.579771195 + }, + "optimized": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1319.8366895833333, + "latency_std": 2.9678765372708384, + "latency_50": 1319.0768965, + "latency_90": 1323.875593, + "latency_95": 1325.10346415, + "latency_99": 1326.0648056300001, + "latency_999": 1326.2811074630001 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 26, + "throughput": 1.73, + "latency_mean": 588.8595787307693, + "latency_std": 75.12307910293818, + "latency_50": 656.586274, + "latency_90": 659.335121, + "latency_95": 659.60222425, + "latency_99": 661.19556825, + "latency_999": 661.6635563250001 + }, + "optimized": { + "nb_forwards": 25, + "throughput": 1.67, + "latency_mean": 602.9062813200001, + "latency_std": 87.18802711972026, + "latency_50": 660.573071, + "latency_90": 662.2857194, + "latency_95": 662.9589754, + "latency_99": 664.0132692799999, + "latency_999": 664.269413528 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": true, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.882\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 25, + "throughput": 1.67, + "latency_mean": 609.81228388, + "latency_std": 73.78848735281055, + "latency_50": 654.990393, + "latency_90": 690.0503104, + "latency_95": 692.5913532000001, + "latency_99": 692.88928036, + "latency_999": 692.967085936 + }, + "optimized": { + "nb_forwards": 24, + "throughput": 1.6, + "latency_mean": 632.7304657083333, + "latency_std": 67.87527940398857, + "latency_50": 665.6419855, + "latency_90": 668.6654992, + "latency_95": 668.7597744, + "latency_99": 669.6960696, + "latency_999": 669.94376166 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 13, + "throughput": 0.87, + "latency_mean": 1186.5381284615385, + "latency_std": 161.4237069238961, + "latency_50": 1269.670847, + "latency_90": 1355.9492604000002, + "latency_95": 1390.1450375999998, + "latency_99": 1411.6607203199999, + "latency_999": 1416.5017489319998 + }, + "optimized": { + "nb_forwards": 17, + "throughput": 1.13, + "latency_mean": 934.0936628235294, + "latency_std": 14.582536342073604, + "latency_50": 935.050755, + "latency_90": 952.4674898, + "latency_95": 957.0302138, + "latency_99": 962.9052563600001, + "latency_999": 964.227140936 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 76, + "throughput": 5.07, + "latency_mean": 198.5647816973684, + "latency_std": 3.4715584501636134, + "latency_50": 198.506581, + "latency_90": 200.2144805, + "latency_95": 200.55136775, + "latency_99": 207.540032, + "latency_999": 223.25337049999993 + }, + "optimized": { + "nb_forwards": 93, + "throughput": 6.2, + "latency_mean": 162.5132122580645, + "latency_std": 0.513116818309582, + "latency_50": 162.371852, + "latency_90": 163.2859852, + "latency_95": 163.7528158, + "latency_99": 163.85883275999998, + "latency_999": 163.915920876 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 13, + "throughput": 0.87, + "latency_mean": 1173.0143999230768, + "latency_std": 130.54371262227698, + "latency_50": 1260.605467, + "latency_90": 1296.8186462, + "latency_95": 1308.2374858, + "latency_99": 1319.8486531600001, + "latency_999": 1322.461165816 + }, + "optimized": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 995.22072925, + "latency_std": 129.45556415437287, + "latency_50": 946.254189, + "latency_90": 1213.3328715, + "latency_95": 1315.33706625, + "latency_99": 1318.61972205, + "latency_999": 1319.358319605 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 29, + "throughput": 1.93, + "latency_mean": 521.0909943793104, + "latency_std": 46.475380124669094, + "latency_50": 506.281592, + "latency_90": 544.5816703999999, + "latency_95": 656.4042, + "latency_99": 658.84548128, + "latency_999": 659.635381328 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 472.8995814375, + "latency_std": 7.852107004932393, + "latency_50": 471.6044875, + "latency_90": 483.1297946, + "latency_95": 485.7553554, + "latency_99": 490.66026108, + "latency_999": 492.539614008 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 97, + "throughput": 6.47, + "latency_mean": 156.2440793814433, + "latency_std": 1.9840595219565056, + "latency_50": 156.225903, + "latency_90": 158.593671, + "latency_95": 159.64843179999997, + "latency_99": 161.01634776, + "latency_999": 161.629544976 + }, + "optimized": { + "nb_forwards": 93, + "throughput": 6.2, + "latency_mean": 162.63202710752688, + "latency_std": 0.7398820002769471, + "latency_50": 162.382878, + "latency_90": 163.62236180000002, + "latency_95": 163.8380992, + "latency_99": 164.88131604, + "latency_999": 165.77600150400002 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 24, + "throughput": 1.6, + "latency_mean": 642.4793479166666, + "latency_std": 41.85394569042994, + "latency_50": 654.998135, + "latency_90": 656.7839087000001, + "latency_95": 657.02353375, + "latency_99": 657.8482725700001, + "latency_999": 658.060870057 + }, + "optimized": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 666.2755957826088, + "latency_std": 1.7410908635173166, + "latency_50": 666.717442, + "latency_90": 668.0602384, + "latency_95": 668.910585, + "latency_99": 669.7671928, + "latency_999": 669.9619772799999 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 75, + "throughput": 5.0, + "latency_mean": 201.75692086666666, + "latency_std": 1.8320009608554637, + "latency_50": 201.71631, + "latency_90": 204.2971264, + "latency_95": 204.8295851, + "latency_99": 206.53276574, + "latency_999": 206.55384397400002 + }, + "optimized": { + "nb_forwards": 115, + "throughput": 7.67, + "latency_mean": 131.24782122608696, + "latency_std": 4.242274517523887, + "latency_50": 129.902701, + "latency_90": 138.24299720000002, + "latency_95": 141.03294480000002, + "latency_99": 142.079643, + "latency_999": 142.57829744400001 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1263.2922429166667, + "latency_std": 77.36389645216727, + "latency_50": 1278.0368575, + "latency_90": 1328.6916195, + "latency_95": 1342.47310885, + "latency_99": 1353.1162041700002, + "latency_999": 1355.510900617 + }, + "optimized": { + "nb_forwards": 15, + "throughput": 1.0, + "latency_mean": 1012.6554306, + "latency_std": 156.31890427073202, + "latency_50": 934.872073, + "latency_90": 1322.7367448, + "latency_95": 1323.9433467000001, + "latency_99": 1325.30759014, + "latency_999": 1325.6145449140001 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.508\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 30, + "throughput": 2.0, + "latency_mean": 504.13991656666667, + "latency_std": 4.513179886057779, + "latency_50": 503.19925, + "latency_90": 511.38798610000003, + "latency_95": 512.42949615, + "latency_99": 513.05507925, + "latency_999": 513.2059829250001 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 217.46600939130434, + "latency_std": 2.972318261728456, + "latency_50": 217.184814, + "latency_90": 220.705846, + "latency_95": 222.9682218, + "latency_99": 224.70305839999997, + "latency_999": 226.06056314000003 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 79, + "throughput": 5.27, + "latency_mean": 191.43747164556962, + "latency_std": 15.655307063217192, + "latency_50": 198.595354, + "latency_90": 200.57276240000002, + "latency_95": 200.87837530000002, + "latency_99": 201.16657547999998, + "latency_999": 201.356981748 + }, + "optimized": { + "nb_forwards": 238, + "throughput": 15.87, + "latency_mean": 63.268969394957985, + "latency_std": 0.9427228875615695, + "latency_50": 63.1408055, + "latency_90": 64.2273272, + "latency_95": 65.1072349, + "latency_99": 66.96859157, + "latency_999": 67.69891359200001 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 968.852768375, + "latency_std": 38.04343260757334, + "latency_50": 952.2546675, + "latency_90": 1038.7362795, + "latency_95": 1054.559503, + "latency_99": 1057.6485166, + "latency_999": 1058.34354466 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 537.6509522857143, + "latency_std": 3.8349742843783643, + "latency_50": 536.4832135, + "latency_90": 541.9351384, + "latency_95": 546.1624122000001, + "latency_99": 550.43430736, + "latency_999": 551.363741536 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1278.7261375, + "latency_std": 13.900041175798188, + "latency_50": 1275.258701, + "latency_90": 1298.9398739, + "latency_95": 1304.62491295, + "latency_99": 1308.62324179, + "latency_999": 1309.522865779 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 537.52356, + "latency_std": 3.4322833452073542, + "latency_50": 536.6087305, + "latency_90": 542.7677001000001, + "latency_95": 545.60897155, + "latency_99": 546.32498686, + "latency_999": 546.4325099859999 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 655.3645512173913, + "latency_std": 1.9257989113526919, + "latency_50": 654.646442, + "latency_90": 657.282423, + "latency_95": 659.662086, + "latency_99": 660.8313084800001, + "latency_999": 661.061189648 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 219.61360882608696, + "latency_std": 10.272944961242715, + "latency_50": 217.050361, + "latency_90": 221.35356240000002, + "latency_95": 244.56632359999978, + "latency_99": 260.41507764, + "latency_999": 261.058168464 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 654.7864544347826, + "latency_std": 1.3045747652970006, + "latency_50": 654.519817, + "latency_90": 655.7390292, + "latency_95": 656.9679398, + "latency_99": 659.0271437, + "latency_999": 659.51908757 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 217.45058371014494, + "latency_std": 2.719898318573885, + "latency_50": 217.033952, + "latency_90": 221.1297136, + "latency_95": 222.2854166, + "latency_99": 223.81499584, + "latency_999": 224.26408878400002 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 15, + "throughput": 1.0, + "latency_mean": 1033.3692992666668, + "latency_std": 121.20856568749772, + "latency_50": 963.132488, + "latency_90": 1262.2027165999998, + "latency_95": 1265.428772, + "latency_99": 1270.8415024, + "latency_999": 1272.05936674 + }, + "optimized": { + "nb_forwards": 35, + "throughput": 2.33, + "latency_mean": 433.3171128, + "latency_std": 5.620352976190466, + "latency_50": 432.092966, + "latency_90": 439.51550460000004, + "latency_95": 442.0759091, + "latency_99": 449.04011729999996, + "latency_999": 451.02373323 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 75, + "throughput": 5.0, + "latency_mean": 200.45710213333334, + "latency_std": 0.912546986602039, + "latency_50": 200.443548, + "latency_90": 201.5305846, + "latency_95": 202.211401, + "latency_99": 202.77304016, + "latency_999": 202.841815316 + }, + "optimized": { + "nb_forwards": 239, + "throughput": 15.93, + "latency_mean": 62.95156658158996, + "latency_std": 0.6494732254002147, + "latency_50": 62.920241, + "latency_90": 63.867974, + "latency_95": 64.1582552, + "latency_99": 64.69974346000001, + "latency_999": 64.96208175 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 82, + "throughput": 5.47, + "latency_mean": 183.10327674390246, + "latency_std": 21.77607771500049, + "latency_50": 197.6686845, + "latency_90": 202.83920880000002, + "latency_95": 203.04726945, + "latency_99": 204.23605759999998, + "latency_999": 204.50013056 + }, + "optimized": { + "nb_forwards": 235, + "throughput": 15.67, + "latency_mean": 64.02330576170213, + "latency_std": 1.559037537525157, + "latency_50": 63.56593, + "latency_90": 66.4842204, + "latency_95": 67.527317, + "latency_99": 68.7520603, + "latency_999": 70.43527871799999 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.359\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 13, + "throughput": 0.87, + "latency_mean": 1188.9833955384615, + "latency_std": 156.3766424391915, + "latency_50": 1271.045981, + "latency_90": 1320.508096, + "latency_95": 1351.0301161999998, + "latency_99": 1381.7705920399999, + "latency_999": 1388.6871991039998 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 537.5845056428572, + "latency_std": 24.479382275992702, + "latency_50": 544.233773, + "latency_90": 549.9048538999999, + "latency_95": 550.3655834, + "latency_99": 551.47189197, + "latency_999": 551.817216597 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1294.067229, + "latency_std": 42.49668221169809, + "latency_50": 1276.250713, + "latency_90": 1376.546508, + "latency_95": 1386.4398829000002, + "latency_99": 1387.82451098, + "latency_999": 1388.136052298 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 472.54393821875, + "latency_std": 44.30277356434995, + "latency_50": 448.9115105, + "latency_90": 538.9957706, + "latency_95": 539.2858448, + "latency_99": 541.8835004299999, + "latency_999": 542.8368844429999 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 75, + "throughput": 5.0, + "latency_mean": 200.50050148, + "latency_std": 0.8285022604698893, + "latency_50": 200.26392, + "latency_90": 201.65092280000002, + "latency_95": 202.4235563, + "latency_99": 202.9026252, + "latency_999": 203.01259512000001 + }, + "optimized": { + "nb_forwards": 239, + "throughput": 15.93, + "latency_mean": 62.99670253974895, + "latency_std": 0.6391249990225535, + "latency_50": 62.961666, + "latency_90": 63.847294399999996, + "latency_95": 64.0924426, + "latency_99": 64.62881584, + "latency_999": 64.996266822 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1273.6051258333332, + "latency_std": 12.303101937934832, + "latency_50": 1270.9759965, + "latency_90": 1294.2611932, + "latency_95": 1297.39174085, + "latency_99": 1299.14183777, + "latency_999": 1299.5356095769998 + }, + "optimized": { + "nb_forwards": 35, + "throughput": 2.33, + "latency_mean": 435.2721461714286, + "latency_std": 5.5886169074680705, + "latency_50": 434.611038, + "latency_90": 441.9488788, + "latency_95": 444.0377945, + "latency_99": 447.65647636, + "latency_999": 448.09250983600003 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 656.8662442608696, + "latency_std": 1.963675172824631, + "latency_50": 656.523145, + "latency_90": 658.1741306, + "latency_95": 658.2483522, + "latency_99": 663.4018644600001, + "latency_999": 664.7094380460001 + }, + "optimized": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 216.32413269999998, + "latency_std": 2.573320955654004, + "latency_50": 216.1438355, + "latency_90": 219.18598509999998, + "latency_95": 220.69771815000001, + "latency_99": 223.89661489000002, + "latency_999": 226.28716618899998 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 654.2367223043478, + "latency_std": 1.3812737872427574, + "latency_50": 653.93458, + "latency_90": 655.7302913999999, + "latency_95": 656.3990489, + "latency_99": 658.70954416, + "latency_999": 659.2843920160001 + }, + "optimized": { + "nb_forwards": 70, + "throughput": 4.67, + "latency_mean": 216.82448075714288, + "latency_std": 2.4549019943848234, + "latency_50": 216.682762, + "latency_90": 219.8229577, + "latency_95": 221.14088784999998, + "latency_99": 222.15547067000003, + "latency_999": 223.170220067 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 93, + "throughput": 6.2, + "latency_mean": 162.75067422580645, + "latency_std": 16.670443274832795, + "latency_50": 155.50524, + "latency_90": 197.1873276, + "latency_95": 198.3428246, + "latency_99": 199.21549572, + "latency_999": 199.303705872 + }, + "optimized": { + "nb_forwards": 224, + "throughput": 14.93, + "latency_mean": 67.17887016071428, + "latency_std": 3.767553337517106, + "latency_50": 69.853314, + "latency_90": 70.8704738, + "latency_95": 71.48267955, + "latency_99": 72.17330481, + "latency_999": 73.872151108 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 94, + "throughput": 6.27, + "latency_mean": 160.5772765319149, + "latency_std": 13.789324382626834, + "latency_50": 155.949789, + "latency_90": 192.97832880000007, + "latency_95": 199.62176935, + "latency_99": 200.06097094, + "latency_999": 200.418572494 + }, + "optimized": { + "nb_forwards": 222, + "throughput": 14.8, + "latency_mean": 67.64596318918919, + "latency_std": 3.8012002735511974, + "latency_50": 70.4326435, + "latency_90": 71.0351658, + "latency_95": 71.17170809999999, + "latency_99": 71.37054176000001, + "latency_999": 71.686580853 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 30, + "throughput": 2.0, + "latency_mean": 503.8281891666667, + "latency_std": 3.79639368455217, + "latency_50": 503.337252, + "latency_90": 509.18891210000004, + "latency_95": 509.43762219999996, + "latency_99": 511.16002555, + "latency_999": 511.72013285500003 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 219.61836788405796, + "latency_std": 6.812193992706365, + "latency_50": 216.499815, + "latency_90": 231.0559726, + "latency_95": 233.1648294, + "latency_99": 239.13513347999998, + "latency_999": 240.964685448 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": false, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.080\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1270.30411025, + "latency_std": 8.657521693333212, + "latency_50": 1269.1937825, + "latency_90": 1281.7940697000001, + "latency_95": 1283.1798158, + "latency_99": 1284.16803116, + "latency_999": 1284.390379616 + }, + "optimized": { + "nb_forwards": 14, + "throughput": 0.93, + "latency_mean": 1092.104026, + "latency_std": 192.38390460063763, + "latency_50": 942.50711, + "latency_90": 1317.2986979000002, + "latency_95": 1317.7165482999999, + "latency_99": 1317.88203206, + "latency_999": 1317.919265906 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 97, + "throughput": 6.47, + "latency_mean": 155.31766736082474, + "latency_std": 2.58376281346955, + "latency_50": 154.80961, + "latency_90": 158.62567140000002, + "latency_95": 159.8607244, + "latency_99": 162.57999983999997, + "latency_999": 166.451882784 + }, + "optimized": { + "nb_forwards": 115, + "throughput": 7.67, + "latency_mean": 130.7428943826087, + "latency_std": 4.439134079400091, + "latency_50": 129.426836, + "latency_90": 139.1114094, + "latency_95": 140.0790016, + "latency_99": 141.99444142, + "latency_999": 143.111380738 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 953.2654264375, + "latency_std": 52.43131265130536, + "latency_50": 939.297859, + "latency_90": 953.7382385, + "latency_95": 1007.0155795, + "latency_99": 1124.8867758999997, + "latency_999": 1151.4077950899998 + }, + "optimized": { + "nb_forwards": 17, + "throughput": 1.13, + "latency_mean": 926.1379787647058, + "latency_std": 10.221038220575734, + "latency_50": 925.067649, + "latency_90": 942.8075994, + "latency_95": 943.8492997999999, + "latency_99": 944.12823036, + "latency_999": 944.1909897359999 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 656.297728173913, + "latency_std": 0.9507206548680598, + "latency_50": 656.379836, + "latency_90": 657.2358052000001, + "latency_95": 657.2469835, + "latency_99": 657.8591597000001, + "latency_999": 658.01437487 + }, + "optimized": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 652.7235733913044, + "latency_std": 1.4541608709475486, + "latency_50": 652.974982, + "latency_90": 654.6616273999999, + "latency_95": 654.7420641, + "latency_99": 655.24253678, + "latency_999": 655.3693460779999 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1265.5476989166668, + "latency_std": 3.289433212667684, + "latency_50": 1263.5434205, + "latency_90": 1269.744916, + "latency_95": 1270.16758755, + "latency_99": 1270.5304551099998, + "latency_999": 1270.6121003110002 + }, + "optimized": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1304.50654875, + "latency_std": 2.6884396280606753, + "latency_50": 1303.907755, + "latency_90": 1307.1289129000002, + "latency_95": 1309.2907671500002, + "latency_99": 1311.21018223, + "latency_999": 1311.642050623 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 97, + "throughput": 6.47, + "latency_mean": 155.8433487113402, + "latency_std": 2.5029692630646236, + "latency_50": 155.989606, + "latency_90": 158.707679, + "latency_95": 159.601676, + "latency_99": 161.73731819999998, + "latency_999": 162.09523452000002 + }, + "optimized": { + "nb_forwards": 115, + "throughput": 7.67, + "latency_mean": 130.9514579652174, + "latency_std": 8.81473257206767, + "latency_50": 128.898162, + "latency_90": 131.6082024, + "latency_95": 163.16474219999998, + "latency_99": 163.3782847, + "latency_999": 163.563403254 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 653.9863154782608, + "latency_std": 0.7289228494064774, + "latency_50": 653.794938, + "latency_90": 654.6420688, + "latency_95": 655.7360347, + "latency_99": 656.03099652, + "latency_999": 656.0760086519999 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 479.06163646875, + "latency_std": 19.59131442173658, + "latency_50": 474.2416765, + "latency_90": 511.8005115, + "latency_95": 519.3751149, + "latency_99": 526.80620621, + "latency_999": 527.546256221 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 76, + "throughput": 5.07, + "latency_mean": 197.58542217105264, + "latency_std": 0.9013285926255195, + "latency_50": 197.396874, + "latency_90": 198.8574485, + "latency_95": 199.188459, + "latency_99": 200.012024, + "latency_999": 201.7015409 + }, + "optimized": { + "nb_forwards": 117, + "throughput": 7.8, + "latency_mean": 128.7953315897436, + "latency_std": 1.7882824907070387, + "latency_50": 128.806478, + "latency_90": 131.2709188, + "latency_95": 131.67982, + "latency_99": 132.40365796, + "latency_999": 132.676468532 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 654.1969950434783, + "latency_std": 1.1220804198391827, + "latency_50": 654.001645, + "latency_90": 655.7840166, + "latency_95": 656.016458, + "latency_99": 656.36563072, + "latency_999": 656.447835172 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 481.61485321875, + "latency_std": 33.724130139913434, + "latency_50": 473.2563825, + "latency_90": 504.5333757, + "latency_95": 520.7592738999999, + "latency_99": 612.8124424300001, + "latency_999": 644.3983803430002 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": true, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 2594.420\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 96, + "throughput": 6.4, + "latency_mean": 157.21188266666667, + "latency_std": 4.2296673831318135, + "latency_50": 156.0635645, + "latency_90": 164.39148, + "latency_95": 168.00414375, + "latency_99": 169.24721239999997, + "latency_999": 172.15546754 + }, + "optimized": { + "nb_forwards": 234, + "throughput": 15.6, + "latency_mean": 64.15645528205128, + "latency_std": 0.8760033202077914, + "latency_50": 64.078188, + "latency_90": 65.0844361, + "latency_95": 65.42577375, + "latency_99": 66.03388837, + "latency_999": 70.50518821699998 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 655.935887826087, + "latency_std": 1.5184807551034185, + "latency_50": 656.150496, + "latency_90": 657.7225057999999, + "latency_95": 658.3034359, + "latency_99": 658.49345218, + "latency_999": 658.5271777180001 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 220.12218289855073, + "latency_std": 2.8124226950487565, + "latency_50": 220.269108, + "latency_90": 223.81522719999998, + "latency_95": 224.73072179999997, + "latency_99": 226.68743828, + "latency_999": 227.86304682800002 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 14, + "throughput": 0.93, + "latency_mean": 1107.6612250714286, + "latency_std": 153.3041188785755, + "latency_50": 1068.835057, + "latency_90": 1280.4905781, + "latency_95": 1289.3787047, + "latency_99": 1300.66569054, + "latency_999": 1303.205262354 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 472.52847371875, + "latency_std": 41.051488900475434, + "latency_50": 449.4546335, + "latency_90": 548.263878, + "latency_95": 549.79502655, + "latency_99": 552.90739222, + "latency_999": 553.856337622 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 25, + "throughput": 1.67, + "latency_mean": 613.27841144, + "latency_std": 65.27331298428419, + "latency_50": 656.540737, + "latency_90": 661.4057177999999, + "latency_95": 661.429921, + "latency_99": 661.6534559600001, + "latency_999": 661.716672896 + }, + "optimized": { + "nb_forwards": 68, + "throughput": 4.53, + "latency_mean": 220.9577393235294, + "latency_std": 2.202118334734023, + "latency_50": 220.84368, + "latency_90": 223.2997182, + "latency_95": 224.4097824, + "latency_99": 226.46243421, + "latency_999": 227.056110021 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 1005.3757105, + "latency_std": 110.08740222252929, + "latency_50": 951.764254, + "latency_90": 1173.9785045, + "latency_95": 1275.012984, + "latency_99": 1277.4162648, + "latency_999": 1277.95700298 + }, + "optimized": { + "nb_forwards": 35, + "throughput": 2.33, + "latency_mean": 440.0128511142857, + "latency_std": 6.045956719281095, + "latency_50": 440.399668, + "latency_90": 446.0546886, + "latency_95": 449.1048619, + "latency_99": 454.09199782, + "latency_999": 455.953342882 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 77, + "throughput": 5.13, + "latency_mean": 195.38825248051947, + "latency_std": 15.566776674930802, + "latency_50": 201.26664, + "latency_90": 203.9999776, + "latency_95": 204.9478114, + "latency_99": 208.10657124, + "latency_999": 211.47291212400003 + }, + "optimized": { + "nb_forwards": 208, + "throughput": 13.87, + "latency_mean": 72.28192877884617, + "latency_std": 0.30617482097915, + "latency_50": 72.2594515, + "latency_90": 72.687902, + "latency_95": 72.8431132, + "latency_99": 73.03046864, + "latency_999": 73.322488771 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 97, + "throughput": 6.47, + "latency_mean": 154.83997250515463, + "latency_std": 1.6989375176627797, + "latency_50": 154.770658, + "latency_90": 156.874254, + "latency_95": 157.6805368, + "latency_99": 158.86760128, + "latency_999": 159.947736928 + }, + "optimized": { + "nb_forwards": 207, + "throughput": 13.8, + "latency_mean": 72.51127445410629, + "latency_std": 0.27142401425407037, + "latency_50": 72.511206, + "latency_90": 72.86571620000001, + "latency_95": 72.9502418, + "latency_99": 73.14818106, + "latency_999": 73.181334838 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 15, + "throughput": 1.0, + "latency_mean": 1030.1399602666665, + "latency_std": 124.65913381994983, + "latency_50": 955.359583, + "latency_90": 1262.7360772, + "latency_95": 1268.8635989000002, + "latency_99": 1279.42059738, + "latency_999": 1281.795922038 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 542.358436, + "latency_std": 5.562261406290527, + "latency_50": 540.1506655, + "latency_90": 552.4821344, + "latency_95": 552.7215868999999, + "latency_99": 558.17499099, + "latency_999": 559.9655274989999 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 654.3281370434783, + "latency_std": 1.8605606203681557, + "latency_50": 653.634216, + "latency_90": 656.6254864, + "latency_95": 658.6546205, + "latency_99": 660.00098088, + "latency_999": 660.294841788 + }, + "optimized": { + "nb_forwards": 69, + "throughput": 4.6, + "latency_mean": 219.54447333333334, + "latency_std": 2.7890297777760664, + "latency_50": 219.354462, + "latency_90": 223.2168158, + "latency_95": 224.12707319999998, + "latency_99": 225.20001263999998, + "latency_999": 225.522583764 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add" + ], + "node_exclusion": [], + "aware_training": false, + "per_channel": true, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.006\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 981.8066059375, + "latency_std": 44.423175785791656, + "latency_50": 968.222589, + "latency_90": 1030.6252395, + "latency_95": 1051.9451035, + "latency_99": 1099.3337503, + "latency_999": 1109.99619583 + }, + "optimized": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 940.686023875, + "latency_std": 34.50079570717892, + "latency_50": 932.533706, + "latency_90": 954.537913, + "latency_95": 982.67146125, + "latency_99": 1049.86251225, + "latency_999": 1064.980498725 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 653.6214047826087, + "latency_std": 0.554367349056088, + "latency_50": 653.56593, + "latency_90": 654.3410592, + "latency_95": 654.4254941, + "latency_99": 654.86828346, + "latency_999": 654.978897546 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 468.82132275, + "latency_std": 6.479021216722283, + "latency_50": 468.046428, + "latency_90": 477.1373799, + "latency_95": 478.81192385, + "latency_99": 481.5715929, + "latency_999": 482.31672099 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 76, + "throughput": 5.07, + "latency_mean": 199.75279011842105, + "latency_std": 1.1029265450656718, + "latency_50": 199.5758745, + "latency_90": 200.400115, + "latency_95": 200.559785, + "latency_99": 202.84980075, + "latency_999": 207.81083137499996 + }, + "optimized": { + "nb_forwards": 115, + "throughput": 7.67, + "latency_mean": 131.4627194347826, + "latency_std": 4.6871974959568545, + "latency_50": 130.19088, + "latency_90": 139.359352, + "latency_95": 141.0757014, + "latency_99": 143.8943466, + "latency_999": 150.29109535799998 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 661.8593041304348, + "latency_std": 11.283099142573517, + "latency_50": 655.43737, + "latency_90": 681.5352313999999, + "latency_95": 682.9110206, + "latency_99": 683.9518636399999, + "latency_999": 684.184243964 + }, + "optimized": { + "nb_forwards": 32, + "throughput": 2.13, + "latency_mean": 472.49058509375, + "latency_std": 8.963136445787091, + "latency_50": 474.708593, + "latency_90": 483.1333381, + "latency_95": 485.4448544, + "latency_99": 488.61856141000004, + "latency_999": 489.732493741 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1283.6983385, + "latency_std": 22.13170499191711, + "latency_50": 1275.341306, + "latency_90": 1321.6280871, + "latency_95": 1324.7206230499999, + "latency_99": 1326.57760301, + "latency_999": 1326.995423501 + }, + "optimized": { + "nb_forwards": 17, + "throughput": 1.13, + "latency_mean": 927.8742942352941, + "latency_std": 13.192113526204107, + "latency_50": 928.112074, + "latency_90": 943.312943, + "latency_95": 947.7967182000001, + "latency_99": 950.06144204, + "latency_999": 950.571004904 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 92, + "throughput": 6.13, + "latency_mean": 163.7631747173913, + "latency_std": 16.656417748995572, + "latency_50": 155.936432, + "latency_90": 197.9161219, + "latency_95": 199.74363870000002, + "latency_99": 201.92121115999998, + "latency_999": 203.62556981600002 + }, + "optimized": { + "nb_forwards": 116, + "throughput": 7.73, + "latency_mean": 129.6231810086207, + "latency_std": 1.9548923667657356, + "latency_50": 129.4212125, + "latency_90": 132.2460935, + "latency_95": 132.81225825, + "latency_99": 133.86391385, + "latency_999": 138.651827795 + } + }, + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1286.5124550833332, + "latency_std": 23.2357907207992, + "latency_50": 1276.5111835, + "latency_90": 1323.3058475999999, + "latency_95": 1327.5837783, + "latency_99": 1330.1992940599998, + "latency_999": 1330.787785106 + }, + "optimized": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1317.95578525, + "latency_std": 1.490128618702992, + "latency_50": 1317.6128805, + "latency_90": 1320.2002315, + "latency_95": 1320.6199098, + "latency_99": 1320.96551396, + "latency_999": 1321.043274896 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 666.9763079565217, + "latency_std": 11.149385297770825, + "latency_50": 661.47222, + "latency_90": 684.7818294, + "latency_95": 685.6954787000001, + "latency_99": 686.6990396799999, + "latency_999": 686.939640568 + }, + "optimized": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 655.3178510869565, + "latency_std": 2.77492679226272, + "latency_50": 655.00068, + "latency_90": 659.4658294, + "latency_95": 660.3613685, + "latency_99": 660.48331564, + "latency_999": 660.4916490640001 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 86, + "throughput": 5.73, + "latency_mean": 176.08157447674418, + "latency_std": 20.32932776355446, + "latency_50": 167.453177, + "latency_90": 197.6717735, + "latency_95": 198.75702125, + "latency_99": 199.23389685, + "latency_999": 199.447866585 + }, + "optimized": { + "nb_forwards": 93, + "throughput": 6.2, + "latency_mean": 161.78841179569892, + "latency_std": 0.2689887068096146, + "latency_50": 161.820936, + "latency_90": 162.0967684, + "latency_95": 162.21570319999998, + "latency_99": 162.39115031999998, + "latency_999": 162.649013532 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + }, + { + "model_name_or_path": "nateraw/vit-base-beans", + "task": "image-classification", + "task_args": null, + "dataset": { + "path": "beans", + "eval_split": "validation", + "data_keys": { + "primary": "image", + "secondary": null + }, + "ref_keys": [ + "labels" + ], + "name": null, + "calibration_split": "train" + }, + "quantization_approach": "dynamic", + "operators_to_quantize": [ + "Add", + "MatMul" + ], + "node_exclusion": [ + "layernorm", + "gelu", + "residual", + "gather", + "softmax" + ], + "aware_training": false, + "per_channel": true, + "calibration": { + "method": "minmax", + "num_calibration_samples": 100, + "calibration_histogram_percentile": null, + "calibration_moving_average": null, + "calibration_moving_average_constant": null + }, + "framework": "onnxruntime", + "framework_args": { + "opset": 11, + "optimization_level": 1 + }, + "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.350\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", + "versions": { + "transformers": "4.20.1", + "optimum": "1.2.3.dev0", + "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" + }, + "evaluation": { + "time": [ + { + "batch_size": 8, + "input_length": 64, + "baseline": { + "nb_forwards": 13, + "throughput": 0.87, + "latency_mean": 1157.0002205384615, + "latency_std": 173.96892806199435, + "latency_50": 1268.399731, + "latency_90": 1373.240541, + "latency_95": 1394.8925212000001, + "latency_99": 1395.4346634400001, + "latency_999": 1395.556645444 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 542.74753675, + "latency_std": 1.5526347456374558, + "latency_50": 542.544525, + "latency_90": 544.3944952, + "latency_95": 545.1945222999999, + "latency_99": 547.32045758, + "latency_999": 547.9295482580001 + } + }, + { + "batch_size": 1, + "input_length": 32, + "baseline": { + "nb_forwards": 76, + "throughput": 5.07, + "latency_mean": 198.1925660394737, + "latency_std": 1.5139423529661107, + "latency_50": 198.3774705, + "latency_90": 199.9859375, + "latency_95": 200.47868375, + "latency_99": 200.94235025, + "latency_999": 201.309621125 + }, + "optimized": { + "nb_forwards": 207, + "throughput": 13.8, + "latency_mean": 72.64656536231884, + "latency_std": 0.5352263228340901, + "latency_50": 72.569523, + "latency_90": 73.2376486, + "latency_95": 73.53506229999999, + "latency_99": 74.89236506, + "latency_999": 76.09928199800001 + } + }, + { + "batch_size": 8, + "input_length": 32, + "baseline": { + "nb_forwards": 12, + "throughput": 0.8, + "latency_mean": 1287.5803281666667, + "latency_std": 36.91340143279409, + "latency_50": 1269.6482595, + "latency_90": 1303.1859430999998, + "latency_95": 1346.71807165, + "latency_99": 1388.7522119300002, + "latency_999": 1398.209893493 + }, + "optimized": { + "nb_forwards": 28, + "throughput": 1.87, + "latency_mean": 542.7170135714285, + "latency_std": 1.3567039497139863, + "latency_50": 542.287221, + "latency_90": 544.4592137999999, + "latency_95": 545.4364649, + "latency_99": 546.21864882, + "latency_999": 546.315030882 + } + }, + { + "batch_size": 4, + "input_length": 64, + "baseline": { + "nb_forwards": 30, + "throughput": 2.0, + "latency_mean": 507.2400474666666, + "latency_std": 4.290148550841821, + "latency_50": 507.659231, + "latency_90": 511.6910329, + "latency_95": 514.4643897, + "latency_99": 516.23375722, + "latency_999": 516.269666122 + }, + "optimized": { + "nb_forwards": 57, + "throughput": 3.8, + "latency_mean": 265.6226849122807, + "latency_std": 2.3036455255332684, + "latency_50": 265.179855, + "latency_90": 269.8071572, + "latency_95": 271.4136062, + "latency_99": 271.69196852, + "latency_999": 271.82992995200004 + } + }, + { + "batch_size": 4, + "input_length": 32, + "baseline": { + "nb_forwards": 25, + "throughput": 1.67, + "latency_mean": 603.25719464, + "latency_std": 66.25874091678149, + "latency_50": 654.828359, + "latency_90": 660.8697482, + "latency_95": 661.393019, + "latency_99": 661.8344047999999, + "latency_999": 661.93484588 + }, + "optimized": { + "nb_forwards": 57, + "throughput": 3.8, + "latency_mean": 266.1465179649123, + "latency_std": 2.133106034837133, + "latency_50": 265.532055, + "latency_90": 269.2449408, + "latency_95": 270.3573558, + "latency_99": 271.02784404000005, + "latency_999": 271.716555504 + } + }, + { + "batch_size": 1, + "input_length": 64, + "baseline": { + "nb_forwards": 95, + "throughput": 6.33, + "latency_mean": 159.68801769473683, + "latency_std": 12.773363897780701, + "latency_50": 155.413717, + "latency_90": 176.90005560000023, + "latency_95": 197.0968159, + "latency_99": 198.07615253999998, + "latency_999": 198.10961945399998 + }, + "optimized": { + "nb_forwards": 207, + "throughput": 13.8, + "latency_mean": 72.7664098888889, + "latency_std": 0.38816228058385593, + "latency_50": 72.718874, + "latency_90": 73.24516159999999, + "latency_95": 73.43794679999999, + "latency_99": 73.8501665, + "latency_999": 73.996995134 + } + }, + { + "batch_size": 4, + "input_length": 128, + "baseline": { + "nb_forwards": 23, + "throughput": 1.53, + "latency_mean": 657.163290173913, + "latency_std": 1.3013676808424233, + "latency_50": 657.058665, + "latency_90": 657.6071907999999, + "latency_95": 659.0426878, + "latency_99": 661.57735552, + "latency_999": 662.180470252 + }, + "optimized": { + "nb_forwards": 63, + "throughput": 4.2, + "latency_mean": 240.11091615873016, + "latency_std": 21.14828674924693, + "latency_50": 226.793935, + "latency_90": 269.1414722, + "latency_95": 270.4462369, + "latency_99": 279.1629919800001, + "latency_999": 290.00248849800005 + } + }, + { + "batch_size": 1, + "input_length": 128, + "baseline": { + "nb_forwards": 95, + "throughput": 6.33, + "latency_mean": 158.60142149473685, + "latency_std": 11.827907365032553, + "latency_50": 155.594958, + "latency_90": 159.5463688, + "latency_95": 198.4467831, + "latency_99": 201.84369322, + "latency_999": 202.49142212200002 + }, + "optimized": { + "nb_forwards": 207, + "throughput": 13.8, + "latency_mean": 72.53135863285024, + "latency_std": 0.4182249154784389, + "latency_50": 72.447941, + "latency_90": 73.07772, + "latency_95": 73.2243443, + "latency_99": 73.8286485, + "latency_999": 73.97194594400001 + } + }, + { + "batch_size": 8, + "input_length": 128, + "baseline": { + "nb_forwards": 16, + "throughput": 1.07, + "latency_mean": 951.313504375, + "latency_std": 6.690910380125991, + "latency_50": 951.8069655, + "latency_90": 958.854554, + "latency_95": 960.087836, + "latency_99": 962.6794064, + "latency_999": 963.26250974 + }, + "optimized": { + "nb_forwards": 31, + "throughput": 2.07, + "latency_mean": 489.4035592258065, + "latency_std": 45.5667283182196, + "latency_50": 469.093915, + "latency_90": 553.663101, + "latency_95": 555.243522, + "latency_99": 556.1204486, + "latency_999": 556.42928486 + } + } + ], + "others": { + "baseline": { + "accuracy": 0.98 + }, + "optimized": { + "accuracy": 0.98 + } + } + }, + "max_eval_samples": null, + "time_benchmark_args": { + "duration": 15, + "warmup_runs": 5 + }, + "model_type": "vit" + } +] \ No newline at end of file