Devops-hestabit commited on
Commit
cb98100
1 Parent(s): 501c1f2

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -24,7 +24,7 @@
24
  "plugin_config": {
25
  "attention_qk_half_accumulation": false,
26
  "bert_attention_plugin": false,
27
- "context_fmha_type": 1,
28
  "gemm_plugin": "float16",
29
  "gpt_attention_plugin": "float16",
30
  "identity_plugin": false,
@@ -32,14 +32,14 @@
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
34
  "nccl_plugin": "float16",
35
- "paged_kv_cache": false,
36
  "quantize_per_token_plugin": false,
37
  "quantize_tensor_plugin": false,
38
  "remove_input_padding": true,
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
- "tokens_per_block": 0,
43
  "use_custom_all_reduce": false,
44
  "weight_only_groupwise_quant_matmul_plugin": "float16",
45
  "weight_only_quant_matmul_plugin": false
 
24
  "plugin_config": {
25
  "attention_qk_half_accumulation": false,
26
  "bert_attention_plugin": false,
27
+ "context_fmha_type": 0,
28
  "gemm_plugin": "float16",
29
  "gpt_attention_plugin": "float16",
30
  "identity_plugin": false,
 
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
34
  "nccl_plugin": "float16",
35
+ "paged_kv_cache": true,
36
  "quantize_per_token_plugin": false,
37
  "quantize_tensor_plugin": false,
38
  "remove_input_padding": true,
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
+ "tokens_per_block": 64,
43
  "use_custom_all_reduce": false,
44
  "weight_only_groupwise_quant_matmul_plugin": "float16",
45
  "weight_only_quant_matmul_plugin": false
llama_float16_tp2_rank0.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75488f564582b9856b4c31ece6ab4e24df662ec0961f46dba1fd70d41f244108
3
- size 18261991260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed39223086dd488a82e72c7fe581965b10db03f196771b3037112f67af58583
3
+ size 18261922724
llama_float16_tp2_rank1.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59d366ece059bb046c18644b72adc88bde251e4d32bff944d8738fc0d0ffc5e3
3
- size 18261991260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bae29bdff0191252a6942f858d707233b59f1c9ce726e3de06e8dabdbf553b
3
+ size 18261922724
model.cache CHANGED
Binary files a/model.cache and b/model.cache differ