[ { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3104.203\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 1007.1949454375, "latency_std": 78.84058358158838, "latency_50": 985.576602, "latency_90": 1059.7841785, "latency_95": 1145.035921, "latency_99": 1250.375245, "latency_999": 1274.0765929 }, "optimized": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1331.11930825, "latency_std": 2.6333668250894813, "latency_50": 1331.341538, "latency_90": 1333.9482434000001, "latency_95": 1334.2215575, "latency_99": 1334.4844530999999, "latency_999": 1334.5436046099999 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 81, "throughput": 5.4, "latency_mean": 186.5835592716049, "latency_std": 20.00760723775789, "latency_50": 198.81694, "latency_90": 201.024797, "latency_95": 201.779275, "latency_99": 202.413402, "latency_999": 202.5442512 }, "optimized": { "nb_forwards": 92, "throughput": 6.13, "latency_mean": 163.38389757608698, "latency_std": 0.3275123768495738, "latency_50": 163.3941135, "latency_90": 163.7525755, "latency_95": 163.926186, "latency_99": 164.34097907, "latency_999": 164.45778730700002 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 76, "throughput": 5.07, "latency_mean": 198.9630147368421, "latency_std": 1.8034042650036124, "latency_50": 198.5655705, "latency_90": 200.2815155, "latency_95": 203.1906375, "latency_99": 206.4380845, "latency_999": 206.66971884999998 }, "optimized": { "nb_forwards": 117, "throughput": 7.8, "latency_mean": 128.82420041880343, "latency_std": 1.701156916298848, "latency_50": 128.623607, "latency_90": 131.323657, "latency_95": 131.90711579999999, "latency_99": 132.77440764, "latency_999": 133.447005692 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 655.365458, "latency_std": 0.6236008690322891, "latency_50": 655.34975, "latency_90": 656.1519954, "latency_95": 656.2574049, "latency_99": 656.3535554199999, "latency_999": 656.375521342 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 473.76923496875, "latency_std": 7.2747277092183165, "latency_50": 472.62492, "latency_90": 483.41045330000003, "latency_95": 484.61633445, "latency_99": 489.37835364, "latency_999": 490.82021336400004 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 96, "throughput": 6.4, "latency_mean": 157.24992970833335, "latency_std": 4.732553962424211, "latency_50": 155.406953, "latency_90": 164.713482, "latency_95": 167.8590275, "latency_99": 170.8462496, "latency_999": 171.27275096 }, "optimized": { "nb_forwards": 117, "throughput": 7.8, "latency_mean": 129.13340215384616, "latency_std": 2.1864167617663997, "latency_50": 128.928664, "latency_90": 131.8907006, "latency_95": 132.5507548, "latency_99": 135.60957616, "latency_999": 136.973290652 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1280.0728285, "latency_std": 15.395205966597677, "latency_50": 1270.8922015, "latency_90": 1298.3091468, "latency_95": 1300.7668938, "latency_99": 1303.06202356, "latency_999": 1303.578427756 }, "optimized": { "nb_forwards": 17, "throughput": 1.13, "latency_mean": 926.9665002352941, "latency_std": 9.31210282206815, "latency_50": 930.379688, "latency_90": 937.0084018, "latency_95": 939.6581278, "latency_99": 941.59419436, "latency_999": 942.029809336 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 656.3433247826088, "latency_std": 0.8294053936962497, "latency_50": 656.248342, "latency_90": 657.3568842000001, "latency_95": 657.4566738999999, "latency_99": 658.36745674, "latency_999": 658.596371074 }, "optimized": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 661.2441610869565, "latency_std": 1.5993679984326394, "latency_50": 661.382511, "latency_90": 662.625988, "latency_95": 663.5314744, "latency_99": 665.2728648, "latency_999": 665.69171598 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1276.3800261666668, "latency_std": 14.72110021637953, "latency_50": 1268.308021, "latency_90": 1294.9160152, "latency_95": 1300.88363675, "latency_99": 1306.34986895, "latency_999": 1307.579771195 }, "optimized": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1319.8366895833333, "latency_std": 2.9678765372708384, "latency_50": 1319.0768965, "latency_90": 1323.875593, "latency_95": 1325.10346415, "latency_99": 1326.0648056300001, "latency_999": 1326.2811074630001 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 26, "throughput": 1.73, "latency_mean": 588.8595787307693, "latency_std": 75.12307910293818, "latency_50": 656.586274, "latency_90": 659.335121, "latency_95": 659.60222425, "latency_99": 661.19556825, "latency_999": 661.6635563250001 }, "optimized": { "nb_forwards": 25, "throughput": 1.67, "latency_mean": 602.9062813200001, "latency_std": 87.18802711972026, "latency_50": 660.573071, "latency_90": 662.2857194, "latency_95": 662.9589754, "latency_99": 664.0132692799999, "latency_999": 664.269413528 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": true, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.882\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 25, "throughput": 1.67, "latency_mean": 609.81228388, "latency_std": 73.78848735281055, "latency_50": 654.990393, "latency_90": 690.0503104, "latency_95": 692.5913532000001, "latency_99": 692.88928036, "latency_999": 692.967085936 }, "optimized": { "nb_forwards": 24, "throughput": 1.6, "latency_mean": 632.7304657083333, "latency_std": 67.87527940398857, "latency_50": 665.6419855, "latency_90": 668.6654992, "latency_95": 668.7597744, "latency_99": 669.6960696, "latency_999": 669.94376166 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 13, "throughput": 0.87, "latency_mean": 1186.5381284615385, "latency_std": 161.4237069238961, "latency_50": 1269.670847, "latency_90": 1355.9492604000002, "latency_95": 1390.1450375999998, "latency_99": 1411.6607203199999, "latency_999": 1416.5017489319998 }, "optimized": { "nb_forwards": 17, "throughput": 1.13, "latency_mean": 934.0936628235294, "latency_std": 14.582536342073604, "latency_50": 935.050755, "latency_90": 952.4674898, "latency_95": 957.0302138, "latency_99": 962.9052563600001, "latency_999": 964.227140936 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 76, "throughput": 5.07, "latency_mean": 198.5647816973684, "latency_std": 3.4715584501636134, "latency_50": 198.506581, "latency_90": 200.2144805, "latency_95": 200.55136775, "latency_99": 207.540032, "latency_999": 223.25337049999993 }, "optimized": { "nb_forwards": 93, "throughput": 6.2, "latency_mean": 162.5132122580645, "latency_std": 0.513116818309582, "latency_50": 162.371852, "latency_90": 163.2859852, "latency_95": 163.7528158, "latency_99": 163.85883275999998, "latency_999": 163.915920876 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 13, "throughput": 0.87, "latency_mean": 1173.0143999230768, "latency_std": 130.54371262227698, "latency_50": 1260.605467, "latency_90": 1296.8186462, "latency_95": 1308.2374858, "latency_99": 1319.8486531600001, "latency_999": 1322.461165816 }, "optimized": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 995.22072925, "latency_std": 129.45556415437287, "latency_50": 946.254189, "latency_90": 1213.3328715, "latency_95": 1315.33706625, "latency_99": 1318.61972205, "latency_999": 1319.358319605 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 29, "throughput": 1.93, "latency_mean": 521.0909943793104, "latency_std": 46.475380124669094, "latency_50": 506.281592, "latency_90": 544.5816703999999, "latency_95": 656.4042, "latency_99": 658.84548128, "latency_999": 659.635381328 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 472.8995814375, "latency_std": 7.852107004932393, "latency_50": 471.6044875, "latency_90": 483.1297946, "latency_95": 485.7553554, "latency_99": 490.66026108, "latency_999": 492.539614008 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 97, "throughput": 6.47, "latency_mean": 156.2440793814433, "latency_std": 1.9840595219565056, "latency_50": 156.225903, "latency_90": 158.593671, "latency_95": 159.64843179999997, "latency_99": 161.01634776, "latency_999": 161.629544976 }, "optimized": { "nb_forwards": 93, "throughput": 6.2, "latency_mean": 162.63202710752688, "latency_std": 0.7398820002769471, "latency_50": 162.382878, "latency_90": 163.62236180000002, "latency_95": 163.8380992, "latency_99": 164.88131604, "latency_999": 165.77600150400002 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 24, "throughput": 1.6, "latency_mean": 642.4793479166666, "latency_std": 41.85394569042994, "latency_50": 654.998135, "latency_90": 656.7839087000001, "latency_95": 657.02353375, "latency_99": 657.8482725700001, "latency_999": 658.060870057 }, "optimized": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 666.2755957826088, "latency_std": 1.7410908635173166, "latency_50": 666.717442, "latency_90": 668.0602384, "latency_95": 668.910585, "latency_99": 669.7671928, "latency_999": 669.9619772799999 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 75, "throughput": 5.0, "latency_mean": 201.75692086666666, "latency_std": 1.8320009608554637, "latency_50": 201.71631, "latency_90": 204.2971264, "latency_95": 204.8295851, "latency_99": 206.53276574, "latency_999": 206.55384397400002 }, "optimized": { "nb_forwards": 115, "throughput": 7.67, "latency_mean": 131.24782122608696, "latency_std": 4.242274517523887, "latency_50": 129.902701, "latency_90": 138.24299720000002, "latency_95": 141.03294480000002, "latency_99": 142.079643, "latency_999": 142.57829744400001 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1263.2922429166667, "latency_std": 77.36389645216727, "latency_50": 1278.0368575, "latency_90": 1328.6916195, "latency_95": 1342.47310885, "latency_99": 1353.1162041700002, "latency_999": 1355.510900617 }, "optimized": { "nb_forwards": 15, "throughput": 1.0, "latency_mean": 1012.6554306, "latency_std": 156.31890427073202, "latency_50": 934.872073, "latency_90": 1322.7367448, "latency_95": 1323.9433467000001, "latency_99": 1325.30759014, "latency_999": 1325.6145449140001 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.508\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 30, "throughput": 2.0, "latency_mean": 504.13991656666667, "latency_std": 4.513179886057779, "latency_50": 503.19925, "latency_90": 511.38798610000003, "latency_95": 512.42949615, "latency_99": 513.05507925, "latency_999": 513.2059829250001 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 217.46600939130434, "latency_std": 2.972318261728456, "latency_50": 217.184814, "latency_90": 220.705846, "latency_95": 222.9682218, "latency_99": 224.70305839999997, "latency_999": 226.06056314000003 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 79, "throughput": 5.27, "latency_mean": 191.43747164556962, "latency_std": 15.655307063217192, "latency_50": 198.595354, "latency_90": 200.57276240000002, "latency_95": 200.87837530000002, "latency_99": 201.16657547999998, "latency_999": 201.356981748 }, "optimized": { "nb_forwards": 238, "throughput": 15.87, "latency_mean": 63.268969394957985, "latency_std": 0.9427228875615695, "latency_50": 63.1408055, "latency_90": 64.2273272, "latency_95": 65.1072349, "latency_99": 66.96859157, "latency_999": 67.69891359200001 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 968.852768375, "latency_std": 38.04343260757334, "latency_50": 952.2546675, "latency_90": 1038.7362795, "latency_95": 1054.559503, "latency_99": 1057.6485166, "latency_999": 1058.34354466 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 537.6509522857143, "latency_std": 3.8349742843783643, "latency_50": 536.4832135, "latency_90": 541.9351384, "latency_95": 546.1624122000001, "latency_99": 550.43430736, "latency_999": 551.363741536 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1278.7261375, "latency_std": 13.900041175798188, "latency_50": 1275.258701, "latency_90": 1298.9398739, "latency_95": 1304.62491295, "latency_99": 1308.62324179, "latency_999": 1309.522865779 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 537.52356, "latency_std": 3.4322833452073542, "latency_50": 536.6087305, "latency_90": 542.7677001000001, "latency_95": 545.60897155, "latency_99": 546.32498686, "latency_999": 546.4325099859999 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 655.3645512173913, "latency_std": 1.9257989113526919, "latency_50": 654.646442, "latency_90": 657.282423, "latency_95": 659.662086, "latency_99": 660.8313084800001, "latency_999": 661.061189648 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 219.61360882608696, "latency_std": 10.272944961242715, "latency_50": 217.050361, "latency_90": 221.35356240000002, "latency_95": 244.56632359999978, "latency_99": 260.41507764, "latency_999": 261.058168464 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 654.7864544347826, "latency_std": 1.3045747652970006, "latency_50": 654.519817, "latency_90": 655.7390292, "latency_95": 656.9679398, "latency_99": 659.0271437, "latency_999": 659.51908757 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 217.45058371014494, "latency_std": 2.719898318573885, "latency_50": 217.033952, "latency_90": 221.1297136, "latency_95": 222.2854166, "latency_99": 223.81499584, "latency_999": 224.26408878400002 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 15, "throughput": 1.0, "latency_mean": 1033.3692992666668, "latency_std": 121.20856568749772, "latency_50": 963.132488, "latency_90": 1262.2027165999998, "latency_95": 1265.428772, "latency_99": 1270.8415024, "latency_999": 1272.05936674 }, "optimized": { "nb_forwards": 35, "throughput": 2.33, "latency_mean": 433.3171128, "latency_std": 5.620352976190466, "latency_50": 432.092966, "latency_90": 439.51550460000004, "latency_95": 442.0759091, "latency_99": 449.04011729999996, "latency_999": 451.02373323 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 75, "throughput": 5.0, "latency_mean": 200.45710213333334, "latency_std": 0.912546986602039, "latency_50": 200.443548, "latency_90": 201.5305846, "latency_95": 202.211401, "latency_99": 202.77304016, "latency_999": 202.841815316 }, "optimized": { "nb_forwards": 239, "throughput": 15.93, "latency_mean": 62.95156658158996, "latency_std": 0.6494732254002147, "latency_50": 62.920241, "latency_90": 63.867974, "latency_95": 64.1582552, "latency_99": 64.69974346000001, "latency_999": 64.96208175 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 82, "throughput": 5.47, "latency_mean": 183.10327674390246, "latency_std": 21.77607771500049, "latency_50": 197.6686845, "latency_90": 202.83920880000002, "latency_95": 203.04726945, "latency_99": 204.23605759999998, "latency_999": 204.50013056 }, "optimized": { "nb_forwards": 235, "throughput": 15.67, "latency_mean": 64.02330576170213, "latency_std": 1.559037537525157, "latency_50": 63.56593, "latency_90": 66.4842204, "latency_95": 67.527317, "latency_99": 68.7520603, "latency_999": 70.43527871799999 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.359\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 13, "throughput": 0.87, "latency_mean": 1188.9833955384615, "latency_std": 156.3766424391915, "latency_50": 1271.045981, "latency_90": 1320.508096, "latency_95": 1351.0301161999998, "latency_99": 1381.7705920399999, "latency_999": 1388.6871991039998 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 537.5845056428572, "latency_std": 24.479382275992702, "latency_50": 544.233773, "latency_90": 549.9048538999999, "latency_95": 550.3655834, "latency_99": 551.47189197, "latency_999": 551.817216597 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1294.067229, "latency_std": 42.49668221169809, "latency_50": 1276.250713, "latency_90": 1376.546508, "latency_95": 1386.4398829000002, "latency_99": 1387.82451098, "latency_999": 1388.136052298 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 472.54393821875, "latency_std": 44.30277356434995, "latency_50": 448.9115105, "latency_90": 538.9957706, "latency_95": 539.2858448, "latency_99": 541.8835004299999, "latency_999": 542.8368844429999 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 75, "throughput": 5.0, "latency_mean": 200.50050148, "latency_std": 0.8285022604698893, "latency_50": 200.26392, "latency_90": 201.65092280000002, "latency_95": 202.4235563, "latency_99": 202.9026252, "latency_999": 203.01259512000001 }, "optimized": { "nb_forwards": 239, "throughput": 15.93, "latency_mean": 62.99670253974895, "latency_std": 0.6391249990225535, "latency_50": 62.961666, "latency_90": 63.847294399999996, "latency_95": 64.0924426, "latency_99": 64.62881584, "latency_999": 64.996266822 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1273.6051258333332, "latency_std": 12.303101937934832, "latency_50": 1270.9759965, "latency_90": 1294.2611932, "latency_95": 1297.39174085, "latency_99": 1299.14183777, "latency_999": 1299.5356095769998 }, "optimized": { "nb_forwards": 35, "throughput": 2.33, "latency_mean": 435.2721461714286, "latency_std": 5.5886169074680705, "latency_50": 434.611038, "latency_90": 441.9488788, "latency_95": 444.0377945, "latency_99": 447.65647636, "latency_999": 448.09250983600003 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 656.8662442608696, "latency_std": 1.963675172824631, "latency_50": 656.523145, "latency_90": 658.1741306, "latency_95": 658.2483522, "latency_99": 663.4018644600001, "latency_999": 664.7094380460001 }, "optimized": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 216.32413269999998, "latency_std": 2.573320955654004, "latency_50": 216.1438355, "latency_90": 219.18598509999998, "latency_95": 220.69771815000001, "latency_99": 223.89661489000002, "latency_999": 226.28716618899998 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 654.2367223043478, "latency_std": 1.3812737872427574, "latency_50": 653.93458, "latency_90": 655.7302913999999, "latency_95": 656.3990489, "latency_99": 658.70954416, "latency_999": 659.2843920160001 }, "optimized": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 216.82448075714288, "latency_std": 2.4549019943848234, "latency_50": 216.682762, "latency_90": 219.8229577, "latency_95": 221.14088784999998, "latency_99": 222.15547067000003, "latency_999": 223.170220067 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 93, "throughput": 6.2, "latency_mean": 162.75067422580645, "latency_std": 16.670443274832795, "latency_50": 155.50524, "latency_90": 197.1873276, "latency_95": 198.3428246, "latency_99": 199.21549572, "latency_999": 199.303705872 }, "optimized": { "nb_forwards": 224, "throughput": 14.93, "latency_mean": 67.17887016071428, "latency_std": 3.767553337517106, "latency_50": 69.853314, "latency_90": 70.8704738, "latency_95": 71.48267955, "latency_99": 72.17330481, "latency_999": 73.872151108 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 94, "throughput": 6.27, "latency_mean": 160.5772765319149, "latency_std": 13.789324382626834, "latency_50": 155.949789, "latency_90": 192.97832880000007, "latency_95": 199.62176935, "latency_99": 200.06097094, "latency_999": 200.418572494 }, "optimized": { "nb_forwards": 222, "throughput": 14.8, "latency_mean": 67.64596318918919, "latency_std": 3.8012002735511974, "latency_50": 70.4326435, "latency_90": 71.0351658, "latency_95": 71.17170809999999, "latency_99": 71.37054176000001, "latency_999": 71.686580853 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 30, "throughput": 2.0, "latency_mean": 503.8281891666667, "latency_std": 3.79639368455217, "latency_50": 503.337252, "latency_90": 509.18891210000004, "latency_95": 509.43762219999996, "latency_99": 511.16002555, "latency_999": 511.72013285500003 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 219.61836788405796, "latency_std": 6.812193992706365, "latency_50": 216.499815, "latency_90": 231.0559726, "latency_95": 233.1648294, "latency_99": 239.13513347999998, "latency_999": 240.964685448 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.080\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1270.30411025, "latency_std": 8.657521693333212, "latency_50": 1269.1937825, "latency_90": 1281.7940697000001, "latency_95": 1283.1798158, "latency_99": 1284.16803116, "latency_999": 1284.390379616 }, "optimized": { "nb_forwards": 14, "throughput": 0.93, "latency_mean": 1092.104026, "latency_std": 192.38390460063763, "latency_50": 942.50711, "latency_90": 1317.2986979000002, "latency_95": 1317.7165482999999, "latency_99": 1317.88203206, "latency_999": 1317.919265906 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 97, "throughput": 6.47, "latency_mean": 155.31766736082474, "latency_std": 2.58376281346955, "latency_50": 154.80961, "latency_90": 158.62567140000002, "latency_95": 159.8607244, "latency_99": 162.57999983999997, "latency_999": 166.451882784 }, "optimized": { "nb_forwards": 115, "throughput": 7.67, "latency_mean": 130.7428943826087, "latency_std": 4.439134079400091, "latency_50": 129.426836, "latency_90": 139.1114094, "latency_95": 140.0790016, "latency_99": 141.99444142, "latency_999": 143.111380738 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 953.2654264375, "latency_std": 52.43131265130536, "latency_50": 939.297859, "latency_90": 953.7382385, "latency_95": 1007.0155795, "latency_99": 1124.8867758999997, "latency_999": 1151.4077950899998 }, "optimized": { "nb_forwards": 17, "throughput": 1.13, "latency_mean": 926.1379787647058, "latency_std": 10.221038220575734, "latency_50": 925.067649, "latency_90": 942.8075994, "latency_95": 943.8492997999999, "latency_99": 944.12823036, "latency_999": 944.1909897359999 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 656.297728173913, "latency_std": 0.9507206548680598, "latency_50": 656.379836, "latency_90": 657.2358052000001, "latency_95": 657.2469835, "latency_99": 657.8591597000001, "latency_999": 658.01437487 }, "optimized": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 652.7235733913044, "latency_std": 1.4541608709475486, "latency_50": 652.974982, "latency_90": 654.6616273999999, "latency_95": 654.7420641, "latency_99": 655.24253678, "latency_999": 655.3693460779999 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1265.5476989166668, "latency_std": 3.289433212667684, "latency_50": 1263.5434205, "latency_90": 1269.744916, "latency_95": 1270.16758755, "latency_99": 1270.5304551099998, "latency_999": 1270.6121003110002 }, "optimized": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1304.50654875, "latency_std": 2.6884396280606753, "latency_50": 1303.907755, "latency_90": 1307.1289129000002, "latency_95": 1309.2907671500002, "latency_99": 1311.21018223, "latency_999": 1311.642050623 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 97, "throughput": 6.47, "latency_mean": 155.8433487113402, "latency_std": 2.5029692630646236, "latency_50": 155.989606, "latency_90": 158.707679, "latency_95": 159.601676, "latency_99": 161.73731819999998, "latency_999": 162.09523452000002 }, "optimized": { "nb_forwards": 115, "throughput": 7.67, "latency_mean": 130.9514579652174, "latency_std": 8.81473257206767, "latency_50": 128.898162, "latency_90": 131.6082024, "latency_95": 163.16474219999998, "latency_99": 163.3782847, "latency_999": 163.563403254 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 653.9863154782608, "latency_std": 0.7289228494064774, "latency_50": 653.794938, "latency_90": 654.6420688, "latency_95": 655.7360347, "latency_99": 656.03099652, "latency_999": 656.0760086519999 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 479.06163646875, "latency_std": 19.59131442173658, "latency_50": 474.2416765, "latency_90": 511.8005115, "latency_95": 519.3751149, "latency_99": 526.80620621, "latency_999": 527.546256221 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 76, "throughput": 5.07, "latency_mean": 197.58542217105264, "latency_std": 0.9013285926255195, "latency_50": 197.396874, "latency_90": 198.8574485, "latency_95": 199.188459, "latency_99": 200.012024, "latency_999": 201.7015409 }, "optimized": { "nb_forwards": 117, "throughput": 7.8, "latency_mean": 128.7953315897436, "latency_std": 1.7882824907070387, "latency_50": 128.806478, "latency_90": 131.2709188, "latency_95": 131.67982, "latency_99": 132.40365796, "latency_999": 132.676468532 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 654.1969950434783, "latency_std": 1.1220804198391827, "latency_50": 654.001645, "latency_90": 655.7840166, "latency_95": 656.016458, "latency_99": 656.36563072, "latency_999": 656.447835172 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 481.61485321875, "latency_std": 33.724130139913434, "latency_50": 473.2563825, "latency_90": 504.5333757, "latency_95": 520.7592738999999, "latency_99": 612.8124424300001, "latency_999": 644.3983803430002 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [], "aware_training": false, "per_channel": true, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 2594.420\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 96, "throughput": 6.4, "latency_mean": 157.21188266666667, "latency_std": 4.2296673831318135, "latency_50": 156.0635645, "latency_90": 164.39148, "latency_95": 168.00414375, "latency_99": 169.24721239999997, "latency_999": 172.15546754 }, "optimized": { "nb_forwards": 234, "throughput": 15.6, "latency_mean": 64.15645528205128, "latency_std": 0.8760033202077914, "latency_50": 64.078188, "latency_90": 65.0844361, "latency_95": 65.42577375, "latency_99": 66.03388837, "latency_999": 70.50518821699998 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 655.935887826087, "latency_std": 1.5184807551034185, "latency_50": 656.150496, "latency_90": 657.7225057999999, "latency_95": 658.3034359, "latency_99": 658.49345218, "latency_999": 658.5271777180001 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 220.12218289855073, "latency_std": 2.8124226950487565, "latency_50": 220.269108, "latency_90": 223.81522719999998, "latency_95": 224.73072179999997, "latency_99": 226.68743828, "latency_999": 227.86304682800002 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 14, "throughput": 0.93, "latency_mean": 1107.6612250714286, "latency_std": 153.3041188785755, "latency_50": 1068.835057, "latency_90": 1280.4905781, "latency_95": 1289.3787047, "latency_99": 1300.66569054, "latency_999": 1303.205262354 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 472.52847371875, "latency_std": 41.051488900475434, "latency_50": 449.4546335, "latency_90": 548.263878, "latency_95": 549.79502655, "latency_99": 552.90739222, "latency_999": 553.856337622 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 25, "throughput": 1.67, "latency_mean": 613.27841144, "latency_std": 65.27331298428419, "latency_50": 656.540737, "latency_90": 661.4057177999999, "latency_95": 661.429921, "latency_99": 661.6534559600001, "latency_999": 661.716672896 }, "optimized": { "nb_forwards": 68, "throughput": 4.53, "latency_mean": 220.9577393235294, "latency_std": 2.202118334734023, "latency_50": 220.84368, "latency_90": 223.2997182, "latency_95": 224.4097824, "latency_99": 226.46243421, "latency_999": 227.056110021 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 1005.3757105, "latency_std": 110.08740222252929, "latency_50": 951.764254, "latency_90": 1173.9785045, "latency_95": 1275.012984, "latency_99": 1277.4162648, "latency_999": 1277.95700298 }, "optimized": { "nb_forwards": 35, "throughput": 2.33, "latency_mean": 440.0128511142857, "latency_std": 6.045956719281095, "latency_50": 440.399668, "latency_90": 446.0546886, "latency_95": 449.1048619, "latency_99": 454.09199782, "latency_999": 455.953342882 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 77, "throughput": 5.13, "latency_mean": 195.38825248051947, "latency_std": 15.566776674930802, "latency_50": 201.26664, "latency_90": 203.9999776, "latency_95": 204.9478114, "latency_99": 208.10657124, "latency_999": 211.47291212400003 }, "optimized": { "nb_forwards": 208, "throughput": 13.87, "latency_mean": 72.28192877884617, "latency_std": 0.30617482097915, "latency_50": 72.2594515, "latency_90": 72.687902, "latency_95": 72.8431132, "latency_99": 73.03046864, "latency_999": 73.322488771 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 97, "throughput": 6.47, "latency_mean": 154.83997250515463, "latency_std": 1.6989375176627797, "latency_50": 154.770658, "latency_90": 156.874254, "latency_95": 157.6805368, "latency_99": 158.86760128, "latency_999": 159.947736928 }, "optimized": { "nb_forwards": 207, "throughput": 13.8, "latency_mean": 72.51127445410629, "latency_std": 0.27142401425407037, "latency_50": 72.511206, "latency_90": 72.86571620000001, "latency_95": 72.9502418, "latency_99": 73.14818106, "latency_999": 73.181334838 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 15, "throughput": 1.0, "latency_mean": 1030.1399602666665, "latency_std": 124.65913381994983, "latency_50": 955.359583, "latency_90": 1262.7360772, "latency_95": 1268.8635989000002, "latency_99": 1279.42059738, "latency_999": 1281.795922038 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 542.358436, "latency_std": 5.562261406290527, "latency_50": 540.1506655, "latency_90": 552.4821344, "latency_95": 552.7215868999999, "latency_99": 558.17499099, "latency_999": 559.9655274989999 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 654.3281370434783, "latency_std": 1.8605606203681557, "latency_50": 653.634216, "latency_90": 656.6254864, "latency_95": 658.6546205, "latency_99": 660.00098088, "latency_999": 660.294841788 }, "optimized": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 219.54447333333334, "latency_std": 2.7890297777760664, "latency_50": 219.354462, "latency_90": 223.2168158, "latency_95": 224.12707319999998, "latency_99": 225.20001263999998, "latency_999": 225.522583764 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [], "aware_training": false, "per_channel": true, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.006\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 981.8066059375, "latency_std": 44.423175785791656, "latency_50": 968.222589, "latency_90": 1030.6252395, "latency_95": 1051.9451035, "latency_99": 1099.3337503, "latency_999": 1109.99619583 }, "optimized": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 940.686023875, "latency_std": 34.50079570717892, "latency_50": 932.533706, "latency_90": 954.537913, "latency_95": 982.67146125, "latency_99": 1049.86251225, "latency_999": 1064.980498725 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 653.6214047826087, "latency_std": 0.554367349056088, "latency_50": 653.56593, "latency_90": 654.3410592, "latency_95": 654.4254941, "latency_99": 654.86828346, "latency_999": 654.978897546 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 468.82132275, "latency_std": 6.479021216722283, "latency_50": 468.046428, "latency_90": 477.1373799, "latency_95": 478.81192385, "latency_99": 481.5715929, "latency_999": 482.31672099 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 76, "throughput": 5.07, "latency_mean": 199.75279011842105, "latency_std": 1.1029265450656718, "latency_50": 199.5758745, "latency_90": 200.400115, "latency_95": 200.559785, "latency_99": 202.84980075, "latency_999": 207.81083137499996 }, "optimized": { "nb_forwards": 115, "throughput": 7.67, "latency_mean": 131.4627194347826, "latency_std": 4.6871974959568545, "latency_50": 130.19088, "latency_90": 139.359352, "latency_95": 141.0757014, "latency_99": 143.8943466, "latency_999": 150.29109535799998 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 661.8593041304348, "latency_std": 11.283099142573517, "latency_50": 655.43737, "latency_90": 681.5352313999999, "latency_95": 682.9110206, "latency_99": 683.9518636399999, "latency_999": 684.184243964 }, "optimized": { "nb_forwards": 32, "throughput": 2.13, "latency_mean": 472.49058509375, "latency_std": 8.963136445787091, "latency_50": 474.708593, "latency_90": 483.1333381, "latency_95": 485.4448544, "latency_99": 488.61856141000004, "latency_999": 489.732493741 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1283.6983385, "latency_std": 22.13170499191711, "latency_50": 1275.341306, "latency_90": 1321.6280871, "latency_95": 1324.7206230499999, "latency_99": 1326.57760301, "latency_999": 1326.995423501 }, "optimized": { "nb_forwards": 17, "throughput": 1.13, "latency_mean": 927.8742942352941, "latency_std": 13.192113526204107, "latency_50": 928.112074, "latency_90": 943.312943, "latency_95": 947.7967182000001, "latency_99": 950.06144204, "latency_999": 950.571004904 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 92, "throughput": 6.13, "latency_mean": 163.7631747173913, "latency_std": 16.656417748995572, "latency_50": 155.936432, "latency_90": 197.9161219, "latency_95": 199.74363870000002, "latency_99": 201.92121115999998, "latency_999": 203.62556981600002 }, "optimized": { "nb_forwards": 116, "throughput": 7.73, "latency_mean": 129.6231810086207, "latency_std": 1.9548923667657356, "latency_50": 129.4212125, "latency_90": 132.2460935, "latency_95": 132.81225825, "latency_99": 133.86391385, "latency_999": 138.651827795 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1286.5124550833332, "latency_std": 23.2357907207992, "latency_50": 1276.5111835, "latency_90": 1323.3058475999999, "latency_95": 1327.5837783, "latency_99": 1330.1992940599998, "latency_999": 1330.787785106 }, "optimized": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1317.95578525, "latency_std": 1.490128618702992, "latency_50": 1317.6128805, "latency_90": 1320.2002315, "latency_95": 1320.6199098, "latency_99": 1320.96551396, "latency_999": 1321.043274896 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 666.9763079565217, "latency_std": 11.149385297770825, "latency_50": 661.47222, "latency_90": 684.7818294, "latency_95": 685.6954787000001, "latency_99": 686.6990396799999, "latency_999": 686.939640568 }, "optimized": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 655.3178510869565, "latency_std": 2.77492679226272, "latency_50": 655.00068, "latency_90": 659.4658294, "latency_95": 660.3613685, "latency_99": 660.48331564, "latency_999": 660.4916490640001 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 86, "throughput": 5.73, "latency_mean": 176.08157447674418, "latency_std": 20.32932776355446, "latency_50": 167.453177, "latency_90": 197.6717735, "latency_95": 198.75702125, "latency_99": 199.23389685, "latency_999": 199.447866585 }, "optimized": { "nb_forwards": 93, "throughput": 6.2, "latency_mean": 161.78841179569892, "latency_std": 0.2689887068096146, "latency_50": 161.820936, "latency_90": 162.0967684, "latency_95": 162.21570319999998, "latency_99": 162.39115031999998, "latency_999": 162.649013532 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" }, { "model_name_or_path": "nateraw/vit-base-beans", "task": "image-classification", "task_args": null, "dataset": { "path": "beans", "eval_split": "validation", "data_keys": { "primary": "image", "secondary": null }, "ref_keys": [ "labels" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": true, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.350\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 13, "throughput": 0.87, "latency_mean": 1157.0002205384615, "latency_std": 173.96892806199435, "latency_50": 1268.399731, "latency_90": 1373.240541, "latency_95": 1394.8925212000001, "latency_99": 1395.4346634400001, "latency_999": 1395.556645444 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 542.74753675, "latency_std": 1.5526347456374558, "latency_50": 542.544525, "latency_90": 544.3944952, "latency_95": 545.1945222999999, "latency_99": 547.32045758, "latency_999": 547.9295482580001 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 76, "throughput": 5.07, "latency_mean": 198.1925660394737, "latency_std": 1.5139423529661107, "latency_50": 198.3774705, "latency_90": 199.9859375, "latency_95": 200.47868375, "latency_99": 200.94235025, "latency_999": 201.309621125 }, "optimized": { "nb_forwards": 207, "throughput": 13.8, "latency_mean": 72.64656536231884, "latency_std": 0.5352263228340901, "latency_50": 72.569523, "latency_90": 73.2376486, "latency_95": 73.53506229999999, "latency_99": 74.89236506, "latency_999": 76.09928199800001 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 12, "throughput": 0.8, "latency_mean": 1287.5803281666667, "latency_std": 36.91340143279409, "latency_50": 1269.6482595, "latency_90": 1303.1859430999998, "latency_95": 1346.71807165, "latency_99": 1388.7522119300002, "latency_999": 1398.209893493 }, "optimized": { "nb_forwards": 28, "throughput": 1.87, "latency_mean": 542.7170135714285, "latency_std": 1.3567039497139863, "latency_50": 542.287221, "latency_90": 544.4592137999999, "latency_95": 545.4364649, "latency_99": 546.21864882, "latency_999": 546.315030882 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 30, "throughput": 2.0, "latency_mean": 507.2400474666666, "latency_std": 4.290148550841821, "latency_50": 507.659231, "latency_90": 511.6910329, "latency_95": 514.4643897, "latency_99": 516.23375722, "latency_999": 516.269666122 }, "optimized": { "nb_forwards": 57, "throughput": 3.8, "latency_mean": 265.6226849122807, "latency_std": 2.3036455255332684, "latency_50": 265.179855, "latency_90": 269.8071572, "latency_95": 271.4136062, "latency_99": 271.69196852, "latency_999": 271.82992995200004 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 25, "throughput": 1.67, "latency_mean": 603.25719464, "latency_std": 66.25874091678149, "latency_50": 654.828359, "latency_90": 660.8697482, "latency_95": 661.393019, "latency_99": 661.8344047999999, "latency_999": 661.93484588 }, "optimized": { "nb_forwards": 57, "throughput": 3.8, "latency_mean": 266.1465179649123, "latency_std": 2.133106034837133, "latency_50": 265.532055, "latency_90": 269.2449408, "latency_95": 270.3573558, "latency_99": 271.02784404000005, "latency_999": 271.716555504 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 95, "throughput": 6.33, "latency_mean": 159.68801769473683, "latency_std": 12.773363897780701, "latency_50": 155.413717, "latency_90": 176.90005560000023, "latency_95": 197.0968159, "latency_99": 198.07615253999998, "latency_999": 198.10961945399998 }, "optimized": { "nb_forwards": 207, "throughput": 13.8, "latency_mean": 72.7664098888889, "latency_std": 0.38816228058385593, "latency_50": 72.718874, "latency_90": 73.24516159999999, "latency_95": 73.43794679999999, "latency_99": 73.8501665, "latency_999": 73.996995134 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 23, "throughput": 1.53, "latency_mean": 657.163290173913, "latency_std": 1.3013676808424233, "latency_50": 657.058665, "latency_90": 657.6071907999999, "latency_95": 659.0426878, "latency_99": 661.57735552, "latency_999": 662.180470252 }, "optimized": { "nb_forwards": 63, "throughput": 4.2, "latency_mean": 240.11091615873016, "latency_std": 21.14828674924693, "latency_50": 226.793935, "latency_90": 269.1414722, "latency_95": 270.4462369, "latency_99": 279.1629919800001, "latency_999": 290.00248849800005 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 95, "throughput": 6.33, "latency_mean": 158.60142149473685, "latency_std": 11.827907365032553, "latency_50": 155.594958, "latency_90": 159.5463688, "latency_95": 198.4467831, "latency_99": 201.84369322, "latency_999": 202.49142212200002 }, "optimized": { "nb_forwards": 207, "throughput": 13.8, "latency_mean": 72.53135863285024, "latency_std": 0.4182249154784389, "latency_50": 72.447941, "latency_90": 73.07772, "latency_95": 73.2243443, "latency_99": 73.8286485, "latency_999": 73.97194594400001 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 16, "throughput": 1.07, "latency_mean": 951.313504375, "latency_std": 6.690910380125991, "latency_50": 951.8069655, "latency_90": 958.854554, "latency_95": 960.087836, "latency_99": 962.6794064, "latency_999": 963.26250974 }, "optimized": { "nb_forwards": 31, "throughput": 2.07, "latency_mean": 489.4035592258065, "latency_std": 45.5667283182196, "latency_50": 469.093915, "latency_90": 553.663101, "latency_95": 555.243522, "latency_99": 556.1204486, "latency_999": 556.42928486 } } ], "others": { "baseline": { "accuracy": 0.98 }, "optimized": { "accuracy": 0.98 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "vit" } ]