fxmarty
/

trt-llm-test-whisper

Model card Files Files and versions Community

fxmarty commited on Feb 16, 2024

Commit

56121b9

verified ·

1 Parent(s): 510e35f

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.gitattributes +2 -0
decoder/build.json +195 -0
decoder/config.json +1 -0
decoder/timings.cache +0 -0
decoder/whisper_float16_tp1_rank0.engine +3 -0
encoder/build.json +189 -0
encoder/config.json +1 -0
encoder/timings.cache +0 -0
encoder/whisper_float16_tp1_rank0.engine +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+decoder/whisper_float16_tp1_rank0.engine filter=lfs diff=lfs merge=lfs -text
+encoder/whisper_float16_tp1_rank0.engine filter=lfs diff=lfs merge=lfs -text

decoder/build.json ADDED Viewed

	@@ -0,0 +1,195 @@

+{
+  "builder_config": {
+    "apply_query_key_layer_scaling": false,
+    "cross_attention": true,
+    "fp8": false,
+    "has_position_embedding": true,
+    "has_token_type_embedding": false,
+    "hidden_act": "gelu",
+    "hidden_size": 768,
+    "huggingface": {
+      "_name_or_path": "openai/whisper-small",
+      "activation_dropout": 0.0,
+      "activation_function": "gelu",
+      "architectures": [
+        "WhisperForConditionalGeneration"
+      ],
+      "attention_dropout": 0.0,
+      "begin_suppress_tokens": [
+        220,
+        50257
+      ],
+      "bos_token_id": 50257,
+      "d_model": 768,
+      "decoder_attention_heads": 12,
+      "decoder_ffn_dim": 3072,
+      "decoder_layerdrop": 0.0,
+      "decoder_layers": 12,
+      "decoder_start_token_id": 50258,
+      "dropout": 0.0,
+      "encoder_attention_heads": 12,
+      "encoder_ffn_dim": 3072,
+      "encoder_layerdrop": 0.0,
+      "encoder_layers": 12,
+      "eos_token_id": 50257,
+      "forced_decoder_ids": [
+        [
+          1,
+          50259
+        ],
+        [
+          2,
+          50359
+        ],
+        [
+          3,
+          50363
+        ]
+      ],
+      "hidden_size": 768,
+      "init_std": 0.02,
+      "is_encoder_decoder": true,
+      "max_length": 448,
+      "max_sequence_length": 448,
+      "max_source_positions": 1500,
+      "max_target_positions": 448,
+      "model_type": "whisper",
+      "num_hidden_layers": 12,
+      "num_layers": 12,
+      "num_mel_bins": 80,
+      "pad_token_id": 50257,
+      "scale_embedding": false,
+      "suppress_tokens": [
+        1,
+        2,
+        7,
+        8,
+        9,
+        10,
+        14,
+        25,
+        26,
+        27,
+        28,
+        29,
+        31,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        90,
+        91,
+        92,
+        93,
+        359,
+        503,
+        522,
+        542,
+        873,
+        893,
+        902,
+        918,
+        922,
+        931,
+        1350,
+        1853,
+        1982,
+        2460,
+        2627,
+        3246,
+        3253,
+        3268,
+        3536,
+        3846,
+        3961,
+        4183,
+        4667,
+        6585,
+        6647,
+        7273,
+        9061,
+        9383,
+        10428,
+        10929,
+        11938,
+        12033,
+        12331,
+        12562,
+        13793,
+        14157,
+        14635,
+        15265,
+        15618,
+        16553,
+        16604,
+        18362,
+        18956,
+        20075,
+        21675,
+        22520,
+        26130,
+        26161,
+        26435,
+        28279,
+        29464,
+        31650,
+        32302,
+        32470,
+        36865,
+        42863,
+        47425,
+        49870,
+        50254,
+        50258,
+        50360,
+        50361,
+        50362
+      ],
+      "torch_dtype": "float32",
+      "transformers_version": "4.27.0.dev0",
+      "use_cache": true,
+      "vocab_size": 51865
+    },
+    "int8": false,
+    "max_batch_size": 1,
+    "max_input_len": 1,
+    "max_output_len": 448,
+    "max_position_embeddings": 448,
+    "name": "whisper",
+    "num_heads": 12,
+    "num_layers": 12,
+    "precision": "float16",
+    "quant_mode": 0,
+    "tensor_parallel": 1,
+    "tensorrt": "9.2.0.post12.dev5",
+    "use_refit": false
+  },
+  "plugin_config": {
+    "attention_qk_half_accumulation": false,
+    "bert_attention_plugin": false,
+    "context_fmha_type": 1,
+    "gemm_plugin": "float16",
+    "gpt_attention_plugin": "float16",
+    "identity_plugin": false,
+    "layernorm_plugin": false,
+    "layernorm_quantization_plugin": false,
+    "lookup_plugin": false,
+    "lora_plugin": false,
+    "multi_block_mode": false,
+    "nccl_plugin": false,
+    "paged_kv_cache": false,
+    "quantize_per_token_plugin": false,
+    "quantize_tensor_plugin": false,
+    "remove_input_padding": true,
+    "rmsnorm_plugin": false,
+    "rmsnorm_quantization_plugin": false,
+    "smooth_quant_gemm_plugin": false,
+    "tokens_per_block": 0,
+    "use_custom_all_reduce": false,
+    "use_paged_context_fmha": false,
+    "weight_only_groupwise_quant_matmul_plugin": false,
+    "weight_only_quant_matmul_plugin": false
+  }
+}

decoder/config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"_name_or_path": "openai/whisper-small", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 768, "decoder_attention_heads": 12, "decoder_ffn_dim": 3072, "decoder_layerdrop": 0.0, "decoder_layers": 12, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 12, "encoder_ffn_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 12, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865, "num_layers": 12, "max_sequence_length": 448, "hidden_size": 768}

decoder/timings.cache ADDED Viewed

Binary file (102 kB). View file

decoder/whisper_float16_tp1_rank0.engine ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:932161932241b2e40711ecc96c46350a1d818ce2aac7572581d2eef407494c99
+size 388396564

encoder/build.json ADDED Viewed

	@@ -0,0 +1,189 @@

+{
+  "builder_config": {
+    "fp8": false,
+    "hidden_size": 768,
+    "huggingface": {
+      "_name_or_path": "openai/whisper-small",
+      "activation_dropout": 0.0,
+      "activation_function": "gelu",
+      "architectures": [
+        "WhisperForConditionalGeneration"
+      ],
+      "attention_dropout": 0.0,
+      "begin_suppress_tokens": [
+        220,
+        50257
+      ],
+      "bos_token_id": 50257,
+      "d_model": 768,
+      "decoder_attention_heads": 12,
+      "decoder_ffn_dim": 3072,
+      "decoder_layerdrop": 0.0,
+      "decoder_layers": 12,
+      "decoder_start_token_id": 50258,
+      "dropout": 0.0,
+      "encoder_attention_heads": 12,
+      "encoder_ffn_dim": 3072,
+      "encoder_layerdrop": 0.0,
+      "encoder_layers": 12,
+      "eos_token_id": 50257,
+      "forced_decoder_ids": [
+        [
+          1,
+          50259
+        ],
+        [
+          2,
+          50359
+        ],
+        [
+          3,
+          50363
+        ]
+      ],
+      "hidden_size": 768,
+      "init_std": 0.02,
+      "is_encoder_decoder": true,
+      "max_length": 448,
+      "max_sequence_length": 448,
+      "max_source_positions": 1500,
+      "max_target_positions": 448,
+      "model_type": "whisper",
+      "num_hidden_layers": 12,
+      "num_layers": 12,
+      "num_mel_bins": 80,
+      "pad_token_id": 50257,
+      "scale_embedding": false,
+      "suppress_tokens": [
+        1,
+        2,
+        7,
+        8,
+        9,
+        10,
+        14,
+        25,
+        26,
+        27,
+        28,
+        29,
+        31,
+        58,
+        59,
+        60,
+        61,
+        62,
+        63,
+        90,
+        91,
+        92,
+        93,
+        359,
+        503,
+        522,
+        542,
+        873,
+        893,
+        902,
+        918,
+        922,
+        931,
+        1350,
+        1853,
+        1982,
+        2460,
+        2627,
+        3246,
+        3253,
+        3268,
+        3536,
+        3846,
+        3961,
+        4183,
+        4667,
+        6585,
+        6647,
+        7273,
+        9061,
+        9383,
+        10428,
+        10929,
+        11938,
+        12033,
+        12331,
+        12562,
+        13793,
+        14157,
+        14635,
+        15265,
+        15618,
+        16553,
+        16604,
+        18362,
+        18956,
+        20075,
+        21675,
+        22520,
+        26130,
+        26161,
+        26435,
+        28279,
+        29464,
+        31650,
+        32302,
+        32470,
+        36865,
+        42863,
+        47425,
+        49870,
+        50254,
+        50258,
+        50360,
+        50361,
+        50362
+      ],
+      "torch_dtype": "float32",
+      "transformers_version": "4.27.0.dev0",
+      "use_cache": true,
+      "vocab_size": 51865
+    },
+    "int8": false,
+    "max_batch_size": 1,
+    "n_mels": 80,
+    "name": "whisper",
+    "num_heads": 12,
+    "num_languages": 99,
+    "num_layers": 12,
+    "precision": "float16",
+    "quant_mode": 0,
+    "tensor_parallel": 1,
+    "tensorrt": "9.2.0.post12.dev5",
+    "use_refit": false
+  },
+  "plugin_config": {
+    "attention_qk_half_accumulation": false,
+    "bert_attention_plugin": false,
+    "context_fmha_type": 1,
+    "gemm_plugin": "float16",
+    "gpt_attention_plugin": "float16",
+    "identity_plugin": false,
+    "layernorm_plugin": false,
+    "layernorm_quantization_plugin": false,
+    "lookup_plugin": false,
+    "lora_plugin": false,
+    "multi_block_mode": false,
+    "nccl_plugin": false,
+    "paged_kv_cache": false,
+    "quantize_per_token_plugin": false,
+    "quantize_tensor_plugin": false,
+    "remove_input_padding": true,
+    "rmsnorm_plugin": false,
+    "rmsnorm_quantization_plugin": false,
+    "smooth_quant_gemm_plugin": false,
+    "tokens_per_block": 0,
+    "use_custom_all_reduce": false,
+    "use_paged_context_fmha": false,
+    "weight_only_groupwise_quant_matmul_plugin": false,
+    "weight_only_quant_matmul_plugin": false
+  }
+}

encoder/config.json ADDED Viewed

	@@ -0,0 +1 @@

encoder/timings.cache ADDED Viewed

Binary file (217 kB). View file

encoder/whisper_float16_tp1_rank0.engine ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cec1582bb8aacee7c9005e0a3eb791de8c427825d64d927f4c0461aad071190a
+size 183202884