|
[ |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3104.203\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 1007.1949454375, |
|
"latency_std": 78.84058358158838, |
|
"latency_50": 985.576602, |
|
"latency_90": 1059.7841785, |
|
"latency_95": 1145.035921, |
|
"latency_99": 1250.375245, |
|
"latency_999": 1274.0765929 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1331.11930825, |
|
"latency_std": 2.6333668250894813, |
|
"latency_50": 1331.341538, |
|
"latency_90": 1333.9482434000001, |
|
"latency_95": 1334.2215575, |
|
"latency_99": 1334.4844530999999, |
|
"latency_999": 1334.5436046099999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 81, |
|
"throughput": 5.4, |
|
"latency_mean": 186.5835592716049, |
|
"latency_std": 20.00760723775789, |
|
"latency_50": 198.81694, |
|
"latency_90": 201.024797, |
|
"latency_95": 201.779275, |
|
"latency_99": 202.413402, |
|
"latency_999": 202.5442512 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 92, |
|
"throughput": 6.13, |
|
"latency_mean": 163.38389757608698, |
|
"latency_std": 0.3275123768495738, |
|
"latency_50": 163.3941135, |
|
"latency_90": 163.7525755, |
|
"latency_95": 163.926186, |
|
"latency_99": 164.34097907, |
|
"latency_999": 164.45778730700002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 76, |
|
"throughput": 5.07, |
|
"latency_mean": 198.9630147368421, |
|
"latency_std": 1.8034042650036124, |
|
"latency_50": 198.5655705, |
|
"latency_90": 200.2815155, |
|
"latency_95": 203.1906375, |
|
"latency_99": 206.4380845, |
|
"latency_999": 206.66971884999998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 117, |
|
"throughput": 7.8, |
|
"latency_mean": 128.82420041880343, |
|
"latency_std": 1.701156916298848, |
|
"latency_50": 128.623607, |
|
"latency_90": 131.323657, |
|
"latency_95": 131.90711579999999, |
|
"latency_99": 132.77440764, |
|
"latency_999": 133.447005692 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 655.365458, |
|
"latency_std": 0.6236008690322891, |
|
"latency_50": 655.34975, |
|
"latency_90": 656.1519954, |
|
"latency_95": 656.2574049, |
|
"latency_99": 656.3535554199999, |
|
"latency_999": 656.375521342 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 473.76923496875, |
|
"latency_std": 7.2747277092183165, |
|
"latency_50": 472.62492, |
|
"latency_90": 483.41045330000003, |
|
"latency_95": 484.61633445, |
|
"latency_99": 489.37835364, |
|
"latency_999": 490.82021336400004 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 96, |
|
"throughput": 6.4, |
|
"latency_mean": 157.24992970833335, |
|
"latency_std": 4.732553962424211, |
|
"latency_50": 155.406953, |
|
"latency_90": 164.713482, |
|
"latency_95": 167.8590275, |
|
"latency_99": 170.8462496, |
|
"latency_999": 171.27275096 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 117, |
|
"throughput": 7.8, |
|
"latency_mean": 129.13340215384616, |
|
"latency_std": 2.1864167617663997, |
|
"latency_50": 128.928664, |
|
"latency_90": 131.8907006, |
|
"latency_95": 132.5507548, |
|
"latency_99": 135.60957616, |
|
"latency_999": 136.973290652 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1280.0728285, |
|
"latency_std": 15.395205966597677, |
|
"latency_50": 1270.8922015, |
|
"latency_90": 1298.3091468, |
|
"latency_95": 1300.7668938, |
|
"latency_99": 1303.06202356, |
|
"latency_999": 1303.578427756 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 17, |
|
"throughput": 1.13, |
|
"latency_mean": 926.9665002352941, |
|
"latency_std": 9.31210282206815, |
|
"latency_50": 930.379688, |
|
"latency_90": 937.0084018, |
|
"latency_95": 939.6581278, |
|
"latency_99": 941.59419436, |
|
"latency_999": 942.029809336 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 656.3433247826088, |
|
"latency_std": 0.8294053936962497, |
|
"latency_50": 656.248342, |
|
"latency_90": 657.3568842000001, |
|
"latency_95": 657.4566738999999, |
|
"latency_99": 658.36745674, |
|
"latency_999": 658.596371074 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 661.2441610869565, |
|
"latency_std": 1.5993679984326394, |
|
"latency_50": 661.382511, |
|
"latency_90": 662.625988, |
|
"latency_95": 663.5314744, |
|
"latency_99": 665.2728648, |
|
"latency_999": 665.69171598 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1276.3800261666668, |
|
"latency_std": 14.72110021637953, |
|
"latency_50": 1268.308021, |
|
"latency_90": 1294.9160152, |
|
"latency_95": 1300.88363675, |
|
"latency_99": 1306.34986895, |
|
"latency_999": 1307.579771195 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1319.8366895833333, |
|
"latency_std": 2.9678765372708384, |
|
"latency_50": 1319.0768965, |
|
"latency_90": 1323.875593, |
|
"latency_95": 1325.10346415, |
|
"latency_99": 1326.0648056300001, |
|
"latency_999": 1326.2811074630001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 26, |
|
"throughput": 1.73, |
|
"latency_mean": 588.8595787307693, |
|
"latency_std": 75.12307910293818, |
|
"latency_50": 656.586274, |
|
"latency_90": 659.335121, |
|
"latency_95": 659.60222425, |
|
"latency_99": 661.19556825, |
|
"latency_999": 661.6635563250001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 25, |
|
"throughput": 1.67, |
|
"latency_mean": 602.9062813200001, |
|
"latency_std": 87.18802711972026, |
|
"latency_50": 660.573071, |
|
"latency_90": 662.2857194, |
|
"latency_95": 662.9589754, |
|
"latency_99": 664.0132692799999, |
|
"latency_999": 664.269413528 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": true, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.882\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 25, |
|
"throughput": 1.67, |
|
"latency_mean": 609.81228388, |
|
"latency_std": 73.78848735281055, |
|
"latency_50": 654.990393, |
|
"latency_90": 690.0503104, |
|
"latency_95": 692.5913532000001, |
|
"latency_99": 692.88928036, |
|
"latency_999": 692.967085936 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 24, |
|
"throughput": 1.6, |
|
"latency_mean": 632.7304657083333, |
|
"latency_std": 67.87527940398857, |
|
"latency_50": 665.6419855, |
|
"latency_90": 668.6654992, |
|
"latency_95": 668.7597744, |
|
"latency_99": 669.6960696, |
|
"latency_999": 669.94376166 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 13, |
|
"throughput": 0.87, |
|
"latency_mean": 1186.5381284615385, |
|
"latency_std": 161.4237069238961, |
|
"latency_50": 1269.670847, |
|
"latency_90": 1355.9492604000002, |
|
"latency_95": 1390.1450375999998, |
|
"latency_99": 1411.6607203199999, |
|
"latency_999": 1416.5017489319998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 17, |
|
"throughput": 1.13, |
|
"latency_mean": 934.0936628235294, |
|
"latency_std": 14.582536342073604, |
|
"latency_50": 935.050755, |
|
"latency_90": 952.4674898, |
|
"latency_95": 957.0302138, |
|
"latency_99": 962.9052563600001, |
|
"latency_999": 964.227140936 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 76, |
|
"throughput": 5.07, |
|
"latency_mean": 198.5647816973684, |
|
"latency_std": 3.4715584501636134, |
|
"latency_50": 198.506581, |
|
"latency_90": 200.2144805, |
|
"latency_95": 200.55136775, |
|
"latency_99": 207.540032, |
|
"latency_999": 223.25337049999993 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 93, |
|
"throughput": 6.2, |
|
"latency_mean": 162.5132122580645, |
|
"latency_std": 0.513116818309582, |
|
"latency_50": 162.371852, |
|
"latency_90": 163.2859852, |
|
"latency_95": 163.7528158, |
|
"latency_99": 163.85883275999998, |
|
"latency_999": 163.915920876 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 13, |
|
"throughput": 0.87, |
|
"latency_mean": 1173.0143999230768, |
|
"latency_std": 130.54371262227698, |
|
"latency_50": 1260.605467, |
|
"latency_90": 1296.8186462, |
|
"latency_95": 1308.2374858, |
|
"latency_99": 1319.8486531600001, |
|
"latency_999": 1322.461165816 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 995.22072925, |
|
"latency_std": 129.45556415437287, |
|
"latency_50": 946.254189, |
|
"latency_90": 1213.3328715, |
|
"latency_95": 1315.33706625, |
|
"latency_99": 1318.61972205, |
|
"latency_999": 1319.358319605 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 29, |
|
"throughput": 1.93, |
|
"latency_mean": 521.0909943793104, |
|
"latency_std": 46.475380124669094, |
|
"latency_50": 506.281592, |
|
"latency_90": 544.5816703999999, |
|
"latency_95": 656.4042, |
|
"latency_99": 658.84548128, |
|
"latency_999": 659.635381328 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 472.8995814375, |
|
"latency_std": 7.852107004932393, |
|
"latency_50": 471.6044875, |
|
"latency_90": 483.1297946, |
|
"latency_95": 485.7553554, |
|
"latency_99": 490.66026108, |
|
"latency_999": 492.539614008 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 97, |
|
"throughput": 6.47, |
|
"latency_mean": 156.2440793814433, |
|
"latency_std": 1.9840595219565056, |
|
"latency_50": 156.225903, |
|
"latency_90": 158.593671, |
|
"latency_95": 159.64843179999997, |
|
"latency_99": 161.01634776, |
|
"latency_999": 161.629544976 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 93, |
|
"throughput": 6.2, |
|
"latency_mean": 162.63202710752688, |
|
"latency_std": 0.7398820002769471, |
|
"latency_50": 162.382878, |
|
"latency_90": 163.62236180000002, |
|
"latency_95": 163.8380992, |
|
"latency_99": 164.88131604, |
|
"latency_999": 165.77600150400002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 24, |
|
"throughput": 1.6, |
|
"latency_mean": 642.4793479166666, |
|
"latency_std": 41.85394569042994, |
|
"latency_50": 654.998135, |
|
"latency_90": 656.7839087000001, |
|
"latency_95": 657.02353375, |
|
"latency_99": 657.8482725700001, |
|
"latency_999": 658.060870057 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 666.2755957826088, |
|
"latency_std": 1.7410908635173166, |
|
"latency_50": 666.717442, |
|
"latency_90": 668.0602384, |
|
"latency_95": 668.910585, |
|
"latency_99": 669.7671928, |
|
"latency_999": 669.9619772799999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 75, |
|
"throughput": 5.0, |
|
"latency_mean": 201.75692086666666, |
|
"latency_std": 1.8320009608554637, |
|
"latency_50": 201.71631, |
|
"latency_90": 204.2971264, |
|
"latency_95": 204.8295851, |
|
"latency_99": 206.53276574, |
|
"latency_999": 206.55384397400002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 115, |
|
"throughput": 7.67, |
|
"latency_mean": 131.24782122608696, |
|
"latency_std": 4.242274517523887, |
|
"latency_50": 129.902701, |
|
"latency_90": 138.24299720000002, |
|
"latency_95": 141.03294480000002, |
|
"latency_99": 142.079643, |
|
"latency_999": 142.57829744400001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1263.2922429166667, |
|
"latency_std": 77.36389645216727, |
|
"latency_50": 1278.0368575, |
|
"latency_90": 1328.6916195, |
|
"latency_95": 1342.47310885, |
|
"latency_99": 1353.1162041700002, |
|
"latency_999": 1355.510900617 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 15, |
|
"throughput": 1.0, |
|
"latency_mean": 1012.6554306, |
|
"latency_std": 156.31890427073202, |
|
"latency_50": 934.872073, |
|
"latency_90": 1322.7367448, |
|
"latency_95": 1323.9433467000001, |
|
"latency_99": 1325.30759014, |
|
"latency_999": 1325.6145449140001 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.508\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 30, |
|
"throughput": 2.0, |
|
"latency_mean": 504.13991656666667, |
|
"latency_std": 4.513179886057779, |
|
"latency_50": 503.19925, |
|
"latency_90": 511.38798610000003, |
|
"latency_95": 512.42949615, |
|
"latency_99": 513.05507925, |
|
"latency_999": 513.2059829250001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 217.46600939130434, |
|
"latency_std": 2.972318261728456, |
|
"latency_50": 217.184814, |
|
"latency_90": 220.705846, |
|
"latency_95": 222.9682218, |
|
"latency_99": 224.70305839999997, |
|
"latency_999": 226.06056314000003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 79, |
|
"throughput": 5.27, |
|
"latency_mean": 191.43747164556962, |
|
"latency_std": 15.655307063217192, |
|
"latency_50": 198.595354, |
|
"latency_90": 200.57276240000002, |
|
"latency_95": 200.87837530000002, |
|
"latency_99": 201.16657547999998, |
|
"latency_999": 201.356981748 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 238, |
|
"throughput": 15.87, |
|
"latency_mean": 63.268969394957985, |
|
"latency_std": 0.9427228875615695, |
|
"latency_50": 63.1408055, |
|
"latency_90": 64.2273272, |
|
"latency_95": 65.1072349, |
|
"latency_99": 66.96859157, |
|
"latency_999": 67.69891359200001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 968.852768375, |
|
"latency_std": 38.04343260757334, |
|
"latency_50": 952.2546675, |
|
"latency_90": 1038.7362795, |
|
"latency_95": 1054.559503, |
|
"latency_99": 1057.6485166, |
|
"latency_999": 1058.34354466 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 537.6509522857143, |
|
"latency_std": 3.8349742843783643, |
|
"latency_50": 536.4832135, |
|
"latency_90": 541.9351384, |
|
"latency_95": 546.1624122000001, |
|
"latency_99": 550.43430736, |
|
"latency_999": 551.363741536 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1278.7261375, |
|
"latency_std": 13.900041175798188, |
|
"latency_50": 1275.258701, |
|
"latency_90": 1298.9398739, |
|
"latency_95": 1304.62491295, |
|
"latency_99": 1308.62324179, |
|
"latency_999": 1309.522865779 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 537.52356, |
|
"latency_std": 3.4322833452073542, |
|
"latency_50": 536.6087305, |
|
"latency_90": 542.7677001000001, |
|
"latency_95": 545.60897155, |
|
"latency_99": 546.32498686, |
|
"latency_999": 546.4325099859999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 655.3645512173913, |
|
"latency_std": 1.9257989113526919, |
|
"latency_50": 654.646442, |
|
"latency_90": 657.282423, |
|
"latency_95": 659.662086, |
|
"latency_99": 660.8313084800001, |
|
"latency_999": 661.061189648 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 219.61360882608696, |
|
"latency_std": 10.272944961242715, |
|
"latency_50": 217.050361, |
|
"latency_90": 221.35356240000002, |
|
"latency_95": 244.56632359999978, |
|
"latency_99": 260.41507764, |
|
"latency_999": 261.058168464 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 654.7864544347826, |
|
"latency_std": 1.3045747652970006, |
|
"latency_50": 654.519817, |
|
"latency_90": 655.7390292, |
|
"latency_95": 656.9679398, |
|
"latency_99": 659.0271437, |
|
"latency_999": 659.51908757 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 217.45058371014494, |
|
"latency_std": 2.719898318573885, |
|
"latency_50": 217.033952, |
|
"latency_90": 221.1297136, |
|
"latency_95": 222.2854166, |
|
"latency_99": 223.81499584, |
|
"latency_999": 224.26408878400002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 15, |
|
"throughput": 1.0, |
|
"latency_mean": 1033.3692992666668, |
|
"latency_std": 121.20856568749772, |
|
"latency_50": 963.132488, |
|
"latency_90": 1262.2027165999998, |
|
"latency_95": 1265.428772, |
|
"latency_99": 1270.8415024, |
|
"latency_999": 1272.05936674 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 35, |
|
"throughput": 2.33, |
|
"latency_mean": 433.3171128, |
|
"latency_std": 5.620352976190466, |
|
"latency_50": 432.092966, |
|
"latency_90": 439.51550460000004, |
|
"latency_95": 442.0759091, |
|
"latency_99": 449.04011729999996, |
|
"latency_999": 451.02373323 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 75, |
|
"throughput": 5.0, |
|
"latency_mean": 200.45710213333334, |
|
"latency_std": 0.912546986602039, |
|
"latency_50": 200.443548, |
|
"latency_90": 201.5305846, |
|
"latency_95": 202.211401, |
|
"latency_99": 202.77304016, |
|
"latency_999": 202.841815316 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 239, |
|
"throughput": 15.93, |
|
"latency_mean": 62.95156658158996, |
|
"latency_std": 0.6494732254002147, |
|
"latency_50": 62.920241, |
|
"latency_90": 63.867974, |
|
"latency_95": 64.1582552, |
|
"latency_99": 64.69974346000001, |
|
"latency_999": 64.96208175 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 82, |
|
"throughput": 5.47, |
|
"latency_mean": 183.10327674390246, |
|
"latency_std": 21.77607771500049, |
|
"latency_50": 197.6686845, |
|
"latency_90": 202.83920880000002, |
|
"latency_95": 203.04726945, |
|
"latency_99": 204.23605759999998, |
|
"latency_999": 204.50013056 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 235, |
|
"throughput": 15.67, |
|
"latency_mean": 64.02330576170213, |
|
"latency_std": 1.559037537525157, |
|
"latency_50": 63.56593, |
|
"latency_90": 66.4842204, |
|
"latency_95": 67.527317, |
|
"latency_99": 68.7520603, |
|
"latency_999": 70.43527871799999 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3098.359\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 13, |
|
"throughput": 0.87, |
|
"latency_mean": 1188.9833955384615, |
|
"latency_std": 156.3766424391915, |
|
"latency_50": 1271.045981, |
|
"latency_90": 1320.508096, |
|
"latency_95": 1351.0301161999998, |
|
"latency_99": 1381.7705920399999, |
|
"latency_999": 1388.6871991039998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 537.5845056428572, |
|
"latency_std": 24.479382275992702, |
|
"latency_50": 544.233773, |
|
"latency_90": 549.9048538999999, |
|
"latency_95": 550.3655834, |
|
"latency_99": 551.47189197, |
|
"latency_999": 551.817216597 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1294.067229, |
|
"latency_std": 42.49668221169809, |
|
"latency_50": 1276.250713, |
|
"latency_90": 1376.546508, |
|
"latency_95": 1386.4398829000002, |
|
"latency_99": 1387.82451098, |
|
"latency_999": 1388.136052298 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 472.54393821875, |
|
"latency_std": 44.30277356434995, |
|
"latency_50": 448.9115105, |
|
"latency_90": 538.9957706, |
|
"latency_95": 539.2858448, |
|
"latency_99": 541.8835004299999, |
|
"latency_999": 542.8368844429999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 75, |
|
"throughput": 5.0, |
|
"latency_mean": 200.50050148, |
|
"latency_std": 0.8285022604698893, |
|
"latency_50": 200.26392, |
|
"latency_90": 201.65092280000002, |
|
"latency_95": 202.4235563, |
|
"latency_99": 202.9026252, |
|
"latency_999": 203.01259512000001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 239, |
|
"throughput": 15.93, |
|
"latency_mean": 62.99670253974895, |
|
"latency_std": 0.6391249990225535, |
|
"latency_50": 62.961666, |
|
"latency_90": 63.847294399999996, |
|
"latency_95": 64.0924426, |
|
"latency_99": 64.62881584, |
|
"latency_999": 64.996266822 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1273.6051258333332, |
|
"latency_std": 12.303101937934832, |
|
"latency_50": 1270.9759965, |
|
"latency_90": 1294.2611932, |
|
"latency_95": 1297.39174085, |
|
"latency_99": 1299.14183777, |
|
"latency_999": 1299.5356095769998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 35, |
|
"throughput": 2.33, |
|
"latency_mean": 435.2721461714286, |
|
"latency_std": 5.5886169074680705, |
|
"latency_50": 434.611038, |
|
"latency_90": 441.9488788, |
|
"latency_95": 444.0377945, |
|
"latency_99": 447.65647636, |
|
"latency_999": 448.09250983600003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 656.8662442608696, |
|
"latency_std": 1.963675172824631, |
|
"latency_50": 656.523145, |
|
"latency_90": 658.1741306, |
|
"latency_95": 658.2483522, |
|
"latency_99": 663.4018644600001, |
|
"latency_999": 664.7094380460001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 216.32413269999998, |
|
"latency_std": 2.573320955654004, |
|
"latency_50": 216.1438355, |
|
"latency_90": 219.18598509999998, |
|
"latency_95": 220.69771815000001, |
|
"latency_99": 223.89661489000002, |
|
"latency_999": 226.28716618899998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 654.2367223043478, |
|
"latency_std": 1.3812737872427574, |
|
"latency_50": 653.93458, |
|
"latency_90": 655.7302913999999, |
|
"latency_95": 656.3990489, |
|
"latency_99": 658.70954416, |
|
"latency_999": 659.2843920160001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 216.82448075714288, |
|
"latency_std": 2.4549019943848234, |
|
"latency_50": 216.682762, |
|
"latency_90": 219.8229577, |
|
"latency_95": 221.14088784999998, |
|
"latency_99": 222.15547067000003, |
|
"latency_999": 223.170220067 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 93, |
|
"throughput": 6.2, |
|
"latency_mean": 162.75067422580645, |
|
"latency_std": 16.670443274832795, |
|
"latency_50": 155.50524, |
|
"latency_90": 197.1873276, |
|
"latency_95": 198.3428246, |
|
"latency_99": 199.21549572, |
|
"latency_999": 199.303705872 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 224, |
|
"throughput": 14.93, |
|
"latency_mean": 67.17887016071428, |
|
"latency_std": 3.767553337517106, |
|
"latency_50": 69.853314, |
|
"latency_90": 70.8704738, |
|
"latency_95": 71.48267955, |
|
"latency_99": 72.17330481, |
|
"latency_999": 73.872151108 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 94, |
|
"throughput": 6.27, |
|
"latency_mean": 160.5772765319149, |
|
"latency_std": 13.789324382626834, |
|
"latency_50": 155.949789, |
|
"latency_90": 192.97832880000007, |
|
"latency_95": 199.62176935, |
|
"latency_99": 200.06097094, |
|
"latency_999": 200.418572494 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 222, |
|
"throughput": 14.8, |
|
"latency_mean": 67.64596318918919, |
|
"latency_std": 3.8012002735511974, |
|
"latency_50": 70.4326435, |
|
"latency_90": 71.0351658, |
|
"latency_95": 71.17170809999999, |
|
"latency_99": 71.37054176000001, |
|
"latency_999": 71.686580853 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 30, |
|
"throughput": 2.0, |
|
"latency_mean": 503.8281891666667, |
|
"latency_std": 3.79639368455217, |
|
"latency_50": 503.337252, |
|
"latency_90": 509.18891210000004, |
|
"latency_95": 509.43762219999996, |
|
"latency_99": 511.16002555, |
|
"latency_999": 511.72013285500003 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 219.61836788405796, |
|
"latency_std": 6.812193992706365, |
|
"latency_50": 216.499815, |
|
"latency_90": 231.0559726, |
|
"latency_95": 233.1648294, |
|
"latency_99": 239.13513347999998, |
|
"latency_999": 240.964685448 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.080\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1270.30411025, |
|
"latency_std": 8.657521693333212, |
|
"latency_50": 1269.1937825, |
|
"latency_90": 1281.7940697000001, |
|
"latency_95": 1283.1798158, |
|
"latency_99": 1284.16803116, |
|
"latency_999": 1284.390379616 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 14, |
|
"throughput": 0.93, |
|
"latency_mean": 1092.104026, |
|
"latency_std": 192.38390460063763, |
|
"latency_50": 942.50711, |
|
"latency_90": 1317.2986979000002, |
|
"latency_95": 1317.7165482999999, |
|
"latency_99": 1317.88203206, |
|
"latency_999": 1317.919265906 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 97, |
|
"throughput": 6.47, |
|
"latency_mean": 155.31766736082474, |
|
"latency_std": 2.58376281346955, |
|
"latency_50": 154.80961, |
|
"latency_90": 158.62567140000002, |
|
"latency_95": 159.8607244, |
|
"latency_99": 162.57999983999997, |
|
"latency_999": 166.451882784 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 115, |
|
"throughput": 7.67, |
|
"latency_mean": 130.7428943826087, |
|
"latency_std": 4.439134079400091, |
|
"latency_50": 129.426836, |
|
"latency_90": 139.1114094, |
|
"latency_95": 140.0790016, |
|
"latency_99": 141.99444142, |
|
"latency_999": 143.111380738 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 953.2654264375, |
|
"latency_std": 52.43131265130536, |
|
"latency_50": 939.297859, |
|
"latency_90": 953.7382385, |
|
"latency_95": 1007.0155795, |
|
"latency_99": 1124.8867758999997, |
|
"latency_999": 1151.4077950899998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 17, |
|
"throughput": 1.13, |
|
"latency_mean": 926.1379787647058, |
|
"latency_std": 10.221038220575734, |
|
"latency_50": 925.067649, |
|
"latency_90": 942.8075994, |
|
"latency_95": 943.8492997999999, |
|
"latency_99": 944.12823036, |
|
"latency_999": 944.1909897359999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 656.297728173913, |
|
"latency_std": 0.9507206548680598, |
|
"latency_50": 656.379836, |
|
"latency_90": 657.2358052000001, |
|
"latency_95": 657.2469835, |
|
"latency_99": 657.8591597000001, |
|
"latency_999": 658.01437487 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 652.7235733913044, |
|
"latency_std": 1.4541608709475486, |
|
"latency_50": 652.974982, |
|
"latency_90": 654.6616273999999, |
|
"latency_95": 654.7420641, |
|
"latency_99": 655.24253678, |
|
"latency_999": 655.3693460779999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1265.5476989166668, |
|
"latency_std": 3.289433212667684, |
|
"latency_50": 1263.5434205, |
|
"latency_90": 1269.744916, |
|
"latency_95": 1270.16758755, |
|
"latency_99": 1270.5304551099998, |
|
"latency_999": 1270.6121003110002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1304.50654875, |
|
"latency_std": 2.6884396280606753, |
|
"latency_50": 1303.907755, |
|
"latency_90": 1307.1289129000002, |
|
"latency_95": 1309.2907671500002, |
|
"latency_99": 1311.21018223, |
|
"latency_999": 1311.642050623 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 97, |
|
"throughput": 6.47, |
|
"latency_mean": 155.8433487113402, |
|
"latency_std": 2.5029692630646236, |
|
"latency_50": 155.989606, |
|
"latency_90": 158.707679, |
|
"latency_95": 159.601676, |
|
"latency_99": 161.73731819999998, |
|
"latency_999": 162.09523452000002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 115, |
|
"throughput": 7.67, |
|
"latency_mean": 130.9514579652174, |
|
"latency_std": 8.81473257206767, |
|
"latency_50": 128.898162, |
|
"latency_90": 131.6082024, |
|
"latency_95": 163.16474219999998, |
|
"latency_99": 163.3782847, |
|
"latency_999": 163.563403254 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 653.9863154782608, |
|
"latency_std": 0.7289228494064774, |
|
"latency_50": 653.794938, |
|
"latency_90": 654.6420688, |
|
"latency_95": 655.7360347, |
|
"latency_99": 656.03099652, |
|
"latency_999": 656.0760086519999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 479.06163646875, |
|
"latency_std": 19.59131442173658, |
|
"latency_50": 474.2416765, |
|
"latency_90": 511.8005115, |
|
"latency_95": 519.3751149, |
|
"latency_99": 526.80620621, |
|
"latency_999": 527.546256221 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 76, |
|
"throughput": 5.07, |
|
"latency_mean": 197.58542217105264, |
|
"latency_std": 0.9013285926255195, |
|
"latency_50": 197.396874, |
|
"latency_90": 198.8574485, |
|
"latency_95": 199.188459, |
|
"latency_99": 200.012024, |
|
"latency_999": 201.7015409 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 117, |
|
"throughput": 7.8, |
|
"latency_mean": 128.7953315897436, |
|
"latency_std": 1.7882824907070387, |
|
"latency_50": 128.806478, |
|
"latency_90": 131.2709188, |
|
"latency_95": 131.67982, |
|
"latency_99": 132.40365796, |
|
"latency_999": 132.676468532 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 654.1969950434783, |
|
"latency_std": 1.1220804198391827, |
|
"latency_50": 654.001645, |
|
"latency_90": 655.7840166, |
|
"latency_95": 656.016458, |
|
"latency_99": 656.36563072, |
|
"latency_999": 656.447835172 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 481.61485321875, |
|
"latency_std": 33.724130139913434, |
|
"latency_50": 473.2563825, |
|
"latency_90": 504.5333757, |
|
"latency_95": 520.7592738999999, |
|
"latency_99": 612.8124424300001, |
|
"latency_999": 644.3983803430002 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": true, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 2594.420\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 96, |
|
"throughput": 6.4, |
|
"latency_mean": 157.21188266666667, |
|
"latency_std": 4.2296673831318135, |
|
"latency_50": 156.0635645, |
|
"latency_90": 164.39148, |
|
"latency_95": 168.00414375, |
|
"latency_99": 169.24721239999997, |
|
"latency_999": 172.15546754 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 234, |
|
"throughput": 15.6, |
|
"latency_mean": 64.15645528205128, |
|
"latency_std": 0.8760033202077914, |
|
"latency_50": 64.078188, |
|
"latency_90": 65.0844361, |
|
"latency_95": 65.42577375, |
|
"latency_99": 66.03388837, |
|
"latency_999": 70.50518821699998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 655.935887826087, |
|
"latency_std": 1.5184807551034185, |
|
"latency_50": 656.150496, |
|
"latency_90": 657.7225057999999, |
|
"latency_95": 658.3034359, |
|
"latency_99": 658.49345218, |
|
"latency_999": 658.5271777180001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 220.12218289855073, |
|
"latency_std": 2.8124226950487565, |
|
"latency_50": 220.269108, |
|
"latency_90": 223.81522719999998, |
|
"latency_95": 224.73072179999997, |
|
"latency_99": 226.68743828, |
|
"latency_999": 227.86304682800002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 14, |
|
"throughput": 0.93, |
|
"latency_mean": 1107.6612250714286, |
|
"latency_std": 153.3041188785755, |
|
"latency_50": 1068.835057, |
|
"latency_90": 1280.4905781, |
|
"latency_95": 1289.3787047, |
|
"latency_99": 1300.66569054, |
|
"latency_999": 1303.205262354 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 472.52847371875, |
|
"latency_std": 41.051488900475434, |
|
"latency_50": 449.4546335, |
|
"latency_90": 548.263878, |
|
"latency_95": 549.79502655, |
|
"latency_99": 552.90739222, |
|
"latency_999": 553.856337622 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 25, |
|
"throughput": 1.67, |
|
"latency_mean": 613.27841144, |
|
"latency_std": 65.27331298428419, |
|
"latency_50": 656.540737, |
|
"latency_90": 661.4057177999999, |
|
"latency_95": 661.429921, |
|
"latency_99": 661.6534559600001, |
|
"latency_999": 661.716672896 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 220.9577393235294, |
|
"latency_std": 2.202118334734023, |
|
"latency_50": 220.84368, |
|
"latency_90": 223.2997182, |
|
"latency_95": 224.4097824, |
|
"latency_99": 226.46243421, |
|
"latency_999": 227.056110021 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 1005.3757105, |
|
"latency_std": 110.08740222252929, |
|
"latency_50": 951.764254, |
|
"latency_90": 1173.9785045, |
|
"latency_95": 1275.012984, |
|
"latency_99": 1277.4162648, |
|
"latency_999": 1277.95700298 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 35, |
|
"throughput": 2.33, |
|
"latency_mean": 440.0128511142857, |
|
"latency_std": 6.045956719281095, |
|
"latency_50": 440.399668, |
|
"latency_90": 446.0546886, |
|
"latency_95": 449.1048619, |
|
"latency_99": 454.09199782, |
|
"latency_999": 455.953342882 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 77, |
|
"throughput": 5.13, |
|
"latency_mean": 195.38825248051947, |
|
"latency_std": 15.566776674930802, |
|
"latency_50": 201.26664, |
|
"latency_90": 203.9999776, |
|
"latency_95": 204.9478114, |
|
"latency_99": 208.10657124, |
|
"latency_999": 211.47291212400003 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 208, |
|
"throughput": 13.87, |
|
"latency_mean": 72.28192877884617, |
|
"latency_std": 0.30617482097915, |
|
"latency_50": 72.2594515, |
|
"latency_90": 72.687902, |
|
"latency_95": 72.8431132, |
|
"latency_99": 73.03046864, |
|
"latency_999": 73.322488771 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 97, |
|
"throughput": 6.47, |
|
"latency_mean": 154.83997250515463, |
|
"latency_std": 1.6989375176627797, |
|
"latency_50": 154.770658, |
|
"latency_90": 156.874254, |
|
"latency_95": 157.6805368, |
|
"latency_99": 158.86760128, |
|
"latency_999": 159.947736928 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 207, |
|
"throughput": 13.8, |
|
"latency_mean": 72.51127445410629, |
|
"latency_std": 0.27142401425407037, |
|
"latency_50": 72.511206, |
|
"latency_90": 72.86571620000001, |
|
"latency_95": 72.9502418, |
|
"latency_99": 73.14818106, |
|
"latency_999": 73.181334838 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 15, |
|
"throughput": 1.0, |
|
"latency_mean": 1030.1399602666665, |
|
"latency_std": 124.65913381994983, |
|
"latency_50": 955.359583, |
|
"latency_90": 1262.7360772, |
|
"latency_95": 1268.8635989000002, |
|
"latency_99": 1279.42059738, |
|
"latency_999": 1281.795922038 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 542.358436, |
|
"latency_std": 5.562261406290527, |
|
"latency_50": 540.1506655, |
|
"latency_90": 552.4821344, |
|
"latency_95": 552.7215868999999, |
|
"latency_99": 558.17499099, |
|
"latency_999": 559.9655274989999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 654.3281370434783, |
|
"latency_std": 1.8605606203681557, |
|
"latency_50": 653.634216, |
|
"latency_90": 656.6254864, |
|
"latency_95": 658.6546205, |
|
"latency_99": 660.00098088, |
|
"latency_999": 660.294841788 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 219.54447333333334, |
|
"latency_std": 2.7890297777760664, |
|
"latency_50": 219.354462, |
|
"latency_90": 223.2168158, |
|
"latency_95": 224.12707319999998, |
|
"latency_99": 225.20001263999998, |
|
"latency_999": 225.522583764 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": true, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.006\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 981.8066059375, |
|
"latency_std": 44.423175785791656, |
|
"latency_50": 968.222589, |
|
"latency_90": 1030.6252395, |
|
"latency_95": 1051.9451035, |
|
"latency_99": 1099.3337503, |
|
"latency_999": 1109.99619583 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 940.686023875, |
|
"latency_std": 34.50079570717892, |
|
"latency_50": 932.533706, |
|
"latency_90": 954.537913, |
|
"latency_95": 982.67146125, |
|
"latency_99": 1049.86251225, |
|
"latency_999": 1064.980498725 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 653.6214047826087, |
|
"latency_std": 0.554367349056088, |
|
"latency_50": 653.56593, |
|
"latency_90": 654.3410592, |
|
"latency_95": 654.4254941, |
|
"latency_99": 654.86828346, |
|
"latency_999": 654.978897546 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 468.82132275, |
|
"latency_std": 6.479021216722283, |
|
"latency_50": 468.046428, |
|
"latency_90": 477.1373799, |
|
"latency_95": 478.81192385, |
|
"latency_99": 481.5715929, |
|
"latency_999": 482.31672099 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 76, |
|
"throughput": 5.07, |
|
"latency_mean": 199.75279011842105, |
|
"latency_std": 1.1029265450656718, |
|
"latency_50": 199.5758745, |
|
"latency_90": 200.400115, |
|
"latency_95": 200.559785, |
|
"latency_99": 202.84980075, |
|
"latency_999": 207.81083137499996 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 115, |
|
"throughput": 7.67, |
|
"latency_mean": 131.4627194347826, |
|
"latency_std": 4.6871974959568545, |
|
"latency_50": 130.19088, |
|
"latency_90": 139.359352, |
|
"latency_95": 141.0757014, |
|
"latency_99": 143.8943466, |
|
"latency_999": 150.29109535799998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 661.8593041304348, |
|
"latency_std": 11.283099142573517, |
|
"latency_50": 655.43737, |
|
"latency_90": 681.5352313999999, |
|
"latency_95": 682.9110206, |
|
"latency_99": 683.9518636399999, |
|
"latency_999": 684.184243964 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 32, |
|
"throughput": 2.13, |
|
"latency_mean": 472.49058509375, |
|
"latency_std": 8.963136445787091, |
|
"latency_50": 474.708593, |
|
"latency_90": 483.1333381, |
|
"latency_95": 485.4448544, |
|
"latency_99": 488.61856141000004, |
|
"latency_999": 489.732493741 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1283.6983385, |
|
"latency_std": 22.13170499191711, |
|
"latency_50": 1275.341306, |
|
"latency_90": 1321.6280871, |
|
"latency_95": 1324.7206230499999, |
|
"latency_99": 1326.57760301, |
|
"latency_999": 1326.995423501 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 17, |
|
"throughput": 1.13, |
|
"latency_mean": 927.8742942352941, |
|
"latency_std": 13.192113526204107, |
|
"latency_50": 928.112074, |
|
"latency_90": 943.312943, |
|
"latency_95": 947.7967182000001, |
|
"latency_99": 950.06144204, |
|
"latency_999": 950.571004904 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 92, |
|
"throughput": 6.13, |
|
"latency_mean": 163.7631747173913, |
|
"latency_std": 16.656417748995572, |
|
"latency_50": 155.936432, |
|
"latency_90": 197.9161219, |
|
"latency_95": 199.74363870000002, |
|
"latency_99": 201.92121115999998, |
|
"latency_999": 203.62556981600002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 116, |
|
"throughput": 7.73, |
|
"latency_mean": 129.6231810086207, |
|
"latency_std": 1.9548923667657356, |
|
"latency_50": 129.4212125, |
|
"latency_90": 132.2460935, |
|
"latency_95": 132.81225825, |
|
"latency_99": 133.86391385, |
|
"latency_999": 138.651827795 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1286.5124550833332, |
|
"latency_std": 23.2357907207992, |
|
"latency_50": 1276.5111835, |
|
"latency_90": 1323.3058475999999, |
|
"latency_95": 1327.5837783, |
|
"latency_99": 1330.1992940599998, |
|
"latency_999": 1330.787785106 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1317.95578525, |
|
"latency_std": 1.490128618702992, |
|
"latency_50": 1317.6128805, |
|
"latency_90": 1320.2002315, |
|
"latency_95": 1320.6199098, |
|
"latency_99": 1320.96551396, |
|
"latency_999": 1321.043274896 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 666.9763079565217, |
|
"latency_std": 11.149385297770825, |
|
"latency_50": 661.47222, |
|
"latency_90": 684.7818294, |
|
"latency_95": 685.6954787000001, |
|
"latency_99": 686.6990396799999, |
|
"latency_999": 686.939640568 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 655.3178510869565, |
|
"latency_std": 2.77492679226272, |
|
"latency_50": 655.00068, |
|
"latency_90": 659.4658294, |
|
"latency_95": 660.3613685, |
|
"latency_99": 660.48331564, |
|
"latency_999": 660.4916490640001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 86, |
|
"throughput": 5.73, |
|
"latency_mean": 176.08157447674418, |
|
"latency_std": 20.32932776355446, |
|
"latency_50": 167.453177, |
|
"latency_90": 197.6717735, |
|
"latency_95": 198.75702125, |
|
"latency_99": 199.23389685, |
|
"latency_999": 199.447866585 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 93, |
|
"throughput": 6.2, |
|
"latency_mean": 161.78841179569892, |
|
"latency_std": 0.2689887068096146, |
|
"latency_50": 161.820936, |
|
"latency_90": 162.0967684, |
|
"latency_95": 162.21570319999998, |
|
"latency_99": 162.39115031999998, |
|
"latency_999": 162.649013532 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
}, |
|
{ |
|
"model_name_or_path": "nateraw/vit-base-beans", |
|
"task": "image-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "beans", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "image", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"labels" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": true, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.350\nBogoMIPS: 4999.99\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 13, |
|
"throughput": 0.87, |
|
"latency_mean": 1157.0002205384615, |
|
"latency_std": 173.96892806199435, |
|
"latency_50": 1268.399731, |
|
"latency_90": 1373.240541, |
|
"latency_95": 1394.8925212000001, |
|
"latency_99": 1395.4346634400001, |
|
"latency_999": 1395.556645444 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 542.74753675, |
|
"latency_std": 1.5526347456374558, |
|
"latency_50": 542.544525, |
|
"latency_90": 544.3944952, |
|
"latency_95": 545.1945222999999, |
|
"latency_99": 547.32045758, |
|
"latency_999": 547.9295482580001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 76, |
|
"throughput": 5.07, |
|
"latency_mean": 198.1925660394737, |
|
"latency_std": 1.5139423529661107, |
|
"latency_50": 198.3774705, |
|
"latency_90": 199.9859375, |
|
"latency_95": 200.47868375, |
|
"latency_99": 200.94235025, |
|
"latency_999": 201.309621125 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 207, |
|
"throughput": 13.8, |
|
"latency_mean": 72.64656536231884, |
|
"latency_std": 0.5352263228340901, |
|
"latency_50": 72.569523, |
|
"latency_90": 73.2376486, |
|
"latency_95": 73.53506229999999, |
|
"latency_99": 74.89236506, |
|
"latency_999": 76.09928199800001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 12, |
|
"throughput": 0.8, |
|
"latency_mean": 1287.5803281666667, |
|
"latency_std": 36.91340143279409, |
|
"latency_50": 1269.6482595, |
|
"latency_90": 1303.1859430999998, |
|
"latency_95": 1346.71807165, |
|
"latency_99": 1388.7522119300002, |
|
"latency_999": 1398.209893493 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 28, |
|
"throughput": 1.87, |
|
"latency_mean": 542.7170135714285, |
|
"latency_std": 1.3567039497139863, |
|
"latency_50": 542.287221, |
|
"latency_90": 544.4592137999999, |
|
"latency_95": 545.4364649, |
|
"latency_99": 546.21864882, |
|
"latency_999": 546.315030882 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 30, |
|
"throughput": 2.0, |
|
"latency_mean": 507.2400474666666, |
|
"latency_std": 4.290148550841821, |
|
"latency_50": 507.659231, |
|
"latency_90": 511.6910329, |
|
"latency_95": 514.4643897, |
|
"latency_99": 516.23375722, |
|
"latency_999": 516.269666122 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 57, |
|
"throughput": 3.8, |
|
"latency_mean": 265.6226849122807, |
|
"latency_std": 2.3036455255332684, |
|
"latency_50": 265.179855, |
|
"latency_90": 269.8071572, |
|
"latency_95": 271.4136062, |
|
"latency_99": 271.69196852, |
|
"latency_999": 271.82992995200004 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 25, |
|
"throughput": 1.67, |
|
"latency_mean": 603.25719464, |
|
"latency_std": 66.25874091678149, |
|
"latency_50": 654.828359, |
|
"latency_90": 660.8697482, |
|
"latency_95": 661.393019, |
|
"latency_99": 661.8344047999999, |
|
"latency_999": 661.93484588 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 57, |
|
"throughput": 3.8, |
|
"latency_mean": 266.1465179649123, |
|
"latency_std": 2.133106034837133, |
|
"latency_50": 265.532055, |
|
"latency_90": 269.2449408, |
|
"latency_95": 270.3573558, |
|
"latency_99": 271.02784404000005, |
|
"latency_999": 271.716555504 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 95, |
|
"throughput": 6.33, |
|
"latency_mean": 159.68801769473683, |
|
"latency_std": 12.773363897780701, |
|
"latency_50": 155.413717, |
|
"latency_90": 176.90005560000023, |
|
"latency_95": 197.0968159, |
|
"latency_99": 198.07615253999998, |
|
"latency_999": 198.10961945399998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 207, |
|
"throughput": 13.8, |
|
"latency_mean": 72.7664098888889, |
|
"latency_std": 0.38816228058385593, |
|
"latency_50": 72.718874, |
|
"latency_90": 73.24516159999999, |
|
"latency_95": 73.43794679999999, |
|
"latency_99": 73.8501665, |
|
"latency_999": 73.996995134 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 23, |
|
"throughput": 1.53, |
|
"latency_mean": 657.163290173913, |
|
"latency_std": 1.3013676808424233, |
|
"latency_50": 657.058665, |
|
"latency_90": 657.6071907999999, |
|
"latency_95": 659.0426878, |
|
"latency_99": 661.57735552, |
|
"latency_999": 662.180470252 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 63, |
|
"throughput": 4.2, |
|
"latency_mean": 240.11091615873016, |
|
"latency_std": 21.14828674924693, |
|
"latency_50": 226.793935, |
|
"latency_90": 269.1414722, |
|
"latency_95": 270.4462369, |
|
"latency_99": 279.1629919800001, |
|
"latency_999": 290.00248849800005 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 95, |
|
"throughput": 6.33, |
|
"latency_mean": 158.60142149473685, |
|
"latency_std": 11.827907365032553, |
|
"latency_50": 155.594958, |
|
"latency_90": 159.5463688, |
|
"latency_95": 198.4467831, |
|
"latency_99": 201.84369322, |
|
"latency_999": 202.49142212200002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 207, |
|
"throughput": 13.8, |
|
"latency_mean": 72.53135863285024, |
|
"latency_std": 0.4182249154784389, |
|
"latency_50": 72.447941, |
|
"latency_90": 73.07772, |
|
"latency_95": 73.2243443, |
|
"latency_99": 73.8286485, |
|
"latency_999": 73.97194594400001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 16, |
|
"throughput": 1.07, |
|
"latency_mean": 951.313504375, |
|
"latency_std": 6.690910380125991, |
|
"latency_50": 951.8069655, |
|
"latency_90": 958.854554, |
|
"latency_95": 960.087836, |
|
"latency_99": 962.6794064, |
|
"latency_999": 963.26250974 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 31, |
|
"throughput": 2.07, |
|
"latency_mean": 489.4035592258065, |
|
"latency_std": 45.5667283182196, |
|
"latency_50": 469.093915, |
|
"latency_90": 553.663101, |
|
"latency_95": 555.243522, |
|
"latency_99": 556.1204486, |
|
"latency_999": 556.42928486 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"accuracy": 0.98 |
|
}, |
|
"optimized": { |
|
"accuracy": 0.98 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "vit" |
|
} |
|
] |