AlexHung29629 commited on
Commit
8fe3e0e
·
verified ·
1 Parent(s): 55da4c7

Upload Llama4ForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +4 -4
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_chunk_size": 8192,
7
  "attention_dropout": 0.0,
8
  "attn_scale": 0.1,
9
- "attn_temperature_tuning": true,
10
  "bos_token_id": 2,
11
  "cache_implementation": "hybrid",
12
  "eos_token_id": 1,
@@ -54,7 +54,7 @@
54
  "num_attention_heads": 16,
55
  "num_experts_per_tok": 1,
56
  "num_hidden_layers": 28,
57
- "num_key_value_heads": 8,
58
  "num_local_experts": 16,
59
  "output_router_logits": false,
60
  "pad_token_id": 0,
@@ -65,8 +65,8 @@
65
  "router_jitter_noise": 0.0,
66
  "tie_word_embeddings": true,
67
  "torch_dtype": "bfloat16",
68
- "transformers_version": "4.52.4",
69
  "use_cache": true,
70
- "use_qk_norm": false,
71
  "vocab_size": 262144
72
  }
 
6
  "attention_chunk_size": 8192,
7
  "attention_dropout": 0.0,
8
  "attn_scale": 0.1,
9
+ "attn_temperature_tuning": 4,
10
  "bos_token_id": 2,
11
  "cache_implementation": "hybrid",
12
  "eos_token_id": 1,
 
54
  "num_attention_heads": 16,
55
  "num_experts_per_tok": 1,
56
  "num_hidden_layers": 28,
57
+ "num_key_value_heads": 16,
58
  "num_local_experts": 16,
59
  "output_router_logits": false,
60
  "pad_token_id": 0,
 
65
  "router_jitter_noise": 0.0,
66
  "tie_word_embeddings": true,
67
  "torch_dtype": "bfloat16",
68
+ "transformers_version": "4.51.3",
69
  "use_cache": true,
70
+ "use_qk_norm": true,
71
  "vocab_size": 262144
72
  }
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "cache_implementation": "hybrid",
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
- "transformers_version": "4.52.4"
8
  }
 
4
  "cache_implementation": "hybrid",
5
  "eos_token_id": 1,
6
  "pad_token_id": 0,
7
+ "transformers_version": "4.51.3"
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6cd045d8249d33cc9f0f85f9fb65acbc6f8844a3cbd9206d262769d97a02b80
3
- size 1417821464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5b9374055ac2ec373f39998f435a4866053127773528bb51e7327ae2e8b76e
3
+ size 1535262008