Severian commited on
Commit
d6705a4
1 Parent(s): 9ff96e0

Upload JambaForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "ai21labs/Jamba-v0.1",
3
  "architectures": [
4
  "JambaForCausalLM"
5
  ],
@@ -7,10 +7,10 @@
7
  "attn_layer_offset": 4,
8
  "attn_layer_period": 8,
9
  "auto_map": {
10
- "AutoConfig": "ai21labs/Jamba-v0.1--configuration_jamba.JambaConfig",
11
- "AutoModel": "ai21labs/Jamba-v0.1--modeling_jamba.JambaModel",
12
- "AutoModelForCausalLM": "ai21labs/Jamba-v0.1--modeling_jamba.JambaForCausalLM",
13
- "AutoModelForSequenceClassification": "ai21labs/Jamba-v0.1--model.JambaForSequenceClassification"
14
  },
15
  "bos_token_id": 1,
16
  "calc_logits_for_entire_prompt": false,
@@ -31,29 +31,12 @@
31
  "model_type": "jamba",
32
  "n_ctx": 262144,
33
  "num_attention_heads": 32,
34
- "num_experts": 16,
35
  "num_experts_per_tok": 2,
36
  "num_hidden_layers": 32,
37
  "num_key_value_heads": 8,
38
  "output_router_logits": false,
39
  "pad_token_id": 0,
40
- "quantization_config": {
41
- "_load_in_4bit": true,
42
- "_load_in_8bit": false,
43
- "bnb_4bit_compute_dtype": "float32",
44
- "bnb_4bit_quant_storage": "uint8",
45
- "bnb_4bit_quant_type": "fp4",
46
- "bnb_4bit_use_double_quant": false,
47
- "llm_int8_enable_fp32_cpu_offload": false,
48
- "llm_int8_has_fp16_weight": false,
49
- "llm_int8_skip_modules": [
50
- "mamba"
51
- ],
52
- "llm_int8_threshold": 6.0,
53
- "load_in_4bit": true,
54
- "load_in_8bit": false,
55
- "quant_method": "bitsandbytes"
56
- },
57
  "rms_norm_eps": 1e-06,
58
  "router_aux_loss_coef": 0.001,
59
  "sliding_window": null,
 
1
  {
2
+ "_name_or_path": "isemmanuelolowe/Jamba-4xMoE_slerp",
3
  "architectures": [
4
  "JambaForCausalLM"
5
  ],
 
7
  "attn_layer_offset": 4,
8
  "attn_layer_period": 8,
9
  "auto_map": {
10
+ "AutoConfig": "isemmanuelolowe/Jamba-4xMoE_slerp--configuration_jamba.JambaConfig",
11
+ "AutoModel": "isemmanuelolowe/Jamba-4xMoE_slerp--modeling_jamba.JambaModel",
12
+ "AutoModelForCausalLM": "isemmanuelolowe/Jamba-4xMoE_slerp--modeling_jamba.JambaForCausalLM",
13
+ "AutoModelForSequenceClassification": "isemmanuelolowe/Jamba-4xMoE_slerp--model.JambaForSequenceClassification"
14
  },
15
  "bos_token_id": 1,
16
  "calc_logits_for_entire_prompt": false,
 
31
  "model_type": "jamba",
32
  "n_ctx": 262144,
33
  "num_attention_heads": 32,
34
+ "num_experts": 4,
35
  "num_experts_per_tok": 2,
36
  "num_hidden_layers": 32,
37
  "num_key_value_heads": 8,
38
  "output_router_logits": false,
39
  "pad_token_id": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "rms_norm_eps": 1e-06,
41
  "router_aux_loss_coef": 0.001,
42
  "sliding_window": null,
model-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9222ed281e8f2cbf3b531e22ca1514267559e5c408668f3985e944850a8a61ba
3
+ size 4987630232
model-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9074b89993cb1c8a4df25b413c15ac811367ab34263e2cfc26aeb3f4b544ce15
3
+ size 4930124392
model-00003-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b72f78ad801bde740c0860faf2d24e2423e0771faae1b791c5e023f9e798999
3
+ size 4944522688
model-00004-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a49c767416c0f468462f6f25fcb49ac927647ac8117a248c5d862deff77a9171
3
+ size 4954075928
model-00005-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d1521e9dfbf5479c2bc589ea1a202a5f5beae8a5c088cb47f2bf92fd3a8ef7
3
+ size 4921178040
model-00006-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87864b5e4d96a9a76d71bc798b67854ba567fbfdf77b9580a9adb58ca9efc19b
3
+ size 4929484872
model-00007-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b73c561783801dc085949ac87eae658d122dd3c7826dc660e10d4b530be547e
3
+ size 4944522776
model-00008-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f88e8de536daee5b7da10980e98b4d735efa5a5ab16bd671a4332143aac9a042
3
+ size 889217840
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff