Devops-hestabit commited on
Commit
a4bce1c
1 Parent(s): 9060678

Upload folder using huggingface_hub

Browse files
yi-34b-tp2-awq/config.json CHANGED
@@ -1,14 +1,17 @@
1
  {
2
  "builder_config": {
3
- "fp8": false,
4
  "hidden_act": "silu",
5
  "hidden_size": 7168,
6
- "int8": false,
 
7
  "max_batch_size": 24,
 
8
  "max_input_len": 3500,
9
  "max_num_tokens": null,
10
  "max_output_len": 1000,
11
  "max_position_embeddings": 4096,
 
12
  "name": "llama",
13
  "num_heads": 56,
14
  "num_kv_heads": 8,
@@ -24,13 +27,15 @@
24
  "plugin_config": {
25
  "attention_qk_half_accumulation": false,
26
  "bert_attention_plugin": false,
27
- "context_fmha_type": 1,
28
  "gemm_plugin": "float16",
29
  "gpt_attention_plugin": "float16",
30
  "identity_plugin": false,
31
  "layernorm_plugin": false,
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
 
 
34
  "nccl_plugin": "float16",
35
  "paged_kv_cache": true,
36
  "quantize_per_token_plugin": false,
@@ -39,8 +44,10 @@
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
- "tokens_per_block": 64,
 
43
  "use_custom_all_reduce": false,
 
44
  "weight_only_groupwise_quant_matmul_plugin": "float16",
45
  "weight_only_quant_matmul_plugin": false
46
  }
 
1
  {
2
  "builder_config": {
3
+ "gather_all_token_logits": false,
4
  "hidden_act": "silu",
5
  "hidden_size": 7168,
6
+ "int8": true,
7
+ "lora_target_modules": [],
8
  "max_batch_size": 24,
9
+ "max_beam_width": 1,
10
  "max_input_len": 3500,
11
  "max_num_tokens": null,
12
  "max_output_len": 1000,
13
  "max_position_embeddings": 4096,
14
+ "max_prompt_embedding_table_size": 0,
15
  "name": "llama",
16
  "num_heads": 56,
17
  "num_kv_heads": 8,
 
27
  "plugin_config": {
28
  "attention_qk_half_accumulation": false,
29
  "bert_attention_plugin": false,
30
+ "context_fmha_type": 0,
31
  "gemm_plugin": "float16",
32
  "gpt_attention_plugin": "float16",
33
  "identity_plugin": false,
34
  "layernorm_plugin": false,
35
  "layernorm_quantization_plugin": false,
36
  "lookup_plugin": false,
37
+ "lora_plugin": false,
38
+ "multi_block_mode": false,
39
  "nccl_plugin": "float16",
40
  "paged_kv_cache": true,
41
  "quantize_per_token_plugin": false,
 
44
  "rmsnorm_plugin": false,
45
  "rmsnorm_quantization_plugin": false,
46
  "smooth_quant_gemm_plugin": false,
47
+ "tokens_per_block": 128,
48
+ "use_context_fmha_for_generation": false,
49
  "use_custom_all_reduce": false,
50
+ "use_paged_context_fmha": false,
51
  "weight_only_groupwise_quant_matmul_plugin": "float16",
52
  "weight_only_quant_matmul_plugin": false
53
  }
yi-34b-tp2-awq/llama_float16_tp2_rank0.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d189be8c8d03648abf44ec18af59312921f42e9df4ad76417066068cc26adee
3
- size 9681099628
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db27e514a00d42f74be833f4c93aed531ad2cc0f1f48df0d922c5411bed674b
3
+ size 9681139644
yi-34b-tp2-awq/llama_float16_tp2_rank1.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e433ba1d599675c63d0b1653627ce9e827b7097cec8038cf8a5bc2afe2711c30
3
- size 9681099628
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea0a03899eee67eb856d55d1182839cff74804c0083390e47fe3d079db13efe2
3
+ size 9681139644
yi-34b-tp2-awq/model.cache CHANGED
Binary files a/yi-34b-tp2-awq/model.cache and b/yi-34b-tp2-awq/model.cache differ