| { | |
| "architectures": [ | |
| "Qwen3ForCausalLM" | |
| ], | |
| "model_type": "qwen3", | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.44.2", | |
| "vocab_size": 151936, | |
| "use_cache": true, | |
| "rope_theta": 1000000.0, | |
| "max_position_embeddings": 131072, | |
| "thinking_tokens": 512000, | |
| "_name_or_path": "zenlm/zen-coder-480b-instruct", | |
| "hidden_size": 5120, | |
| "num_hidden_layers": 64, | |
| "num_attention_heads": 40, | |
| "num_key_value_heads": 8, | |
| "intermediate_size": 27648, | |
| "num_experts": 16, | |
| "num_experts_per_tok": 2, | |
| "expert_interval": 1, | |
| "_architecture_type": "moe", | |
| "_total_params": "480B", | |
| "_active_params": "30B" | |
| } |