pankajroark commited on
Commit
bd472e5
1 Parent(s): a3806a1

update no-quant engine

Browse files
7b-no-quant-tp1/config.json CHANGED
@@ -32,14 +32,14 @@
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
34
  "nccl_plugin": false,
35
- "paged_kv_cache": false,
36
  "quantize_per_token_plugin": false,
37
  "quantize_tensor_plugin": false,
38
  "remove_input_padding": true,
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
- "tokens_per_block": 0,
43
  "use_custom_all_reduce": false,
44
  "weight_only_groupwise_quant_matmul_plugin": false,
45
  "weight_only_quant_matmul_plugin": false
 
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
34
  "nccl_plugin": false,
35
+ "paged_kv_cache": true,
36
  "quantize_per_token_plugin": false,
37
  "quantize_tensor_plugin": false,
38
  "remove_input_padding": true,
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
+ "tokens_per_block": 64,
43
  "use_custom_all_reduce": false,
44
  "weight_only_groupwise_quant_matmul_plugin": false,
45
  "weight_only_quant_matmul_plugin": false
7b-no-quant-tp1/llama_float16_tp1_rank0.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a7558d8628cbe8ce2e09377743fe47d33235d33b201fb23ebffd1b6d6ca1905
3
- size 13480868308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44082abbf34483808729d46983500858093fddc8e4eb31c282471f93dba3fd33
3
+ size 13480840252
7b-no-quant-tp1/model.cache CHANGED
Binary files a/7b-no-quant-tp1/model.cache and b/7b-no-quant-tp1/model.cache differ