| polygraphy run \ | |
| ./encoder.onnx \ | |
| --fp16 \ | |
| --onnxrt \ | |
| --trt \ | |
| --atol 1e-3 --rtol 1e-3 \ | |
| --pool-limit workspace:1000000000 \ | |
| --save-engine=./encoder1_fp16.plan \ | |
| --verbose \ | |
| --onnx-outputs mark all \ | |
| --trt-outputs mark all \ | |
| --trt-min-shapes \ | |
| chunk_xs:[1,67,80] chunk_lens:[1] \ | |
| offset:[1,1] att_cache:[1,12,4,80,128] \ | |
| cnn_cache:[1,12,256,7] cache_mask:[1,1,80] \ | |
| --trt-opt-shapes \ | |
| chunk_xs:[16,67,80] chunk_lens:[16] \ | |
| offset:[16,1] att_cache:[16,12,4,80,128] \ | |
| cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \ | |
| --trt-max-shapes \ | |
| chunk_xs:[32,67,80] chunk_lens:[32] \ | |
| offset:[32,1] att_cache:[32,12,4,80,128] \ | |
| cnn_cache:[32,12,256,7] cache_mask:[32,1,80] \ | |
| --input-shapes \ | |
| chunk_xs:[16,67,80] chunk_lens:[16] \ | |
| offset:[16,1] att_cache:[16,12,4,80,128] \ | |
| cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \ | |
| --validate |