mgoin commited on
Commit
f94d93e
1 Parent(s): 7c61824

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -10,7 +10,7 @@
10
  "hidden_size": 6144,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 16384,
13
- "max_position_embeddings": 8192,
14
  "model_type": "mixtral",
15
  "num_attention_heads": 48,
16
  "num_experts_per_tok": 2,
@@ -21,63 +21,63 @@
21
  "quantization_config": {
22
  "activation_scheme": "static",
23
  "ignored_layers": [
24
- "model.layers.9.block_sparse_moe.gate",
25
- "model.layers.21.block_sparse_moe.gate",
26
- "model.layers.17.block_sparse_moe.gate",
27
- "model.layers.34.block_sparse_moe.gate",
28
- "lm_head",
29
- "model.layers.7.block_sparse_moe.gate",
30
- "model.layers.28.block_sparse_moe.gate",
31
  "model.layers.40.block_sparse_moe.gate",
32
- "model.layers.4.block_sparse_moe.gate",
33
- "model.layers.12.block_sparse_moe.gate",
34
- "model.layers.15.block_sparse_moe.gate",
35
- "model.layers.44.block_sparse_moe.gate",
36
- "model.layers.26.block_sparse_moe.gate",
37
- "model.layers.38.block_sparse_moe.gate",
38
- "model.layers.47.block_sparse_moe.gate",
39
- "model.layers.27.block_sparse_moe.gate",
40
- "model.layers.6.block_sparse_moe.gate",
41
- "model.layers.5.block_sparse_moe.gate",
42
  "model.layers.11.block_sparse_moe.gate",
 
 
43
  "model.layers.10.block_sparse_moe.gate",
44
- "model.layers.54.block_sparse_moe.gate",
45
- "model.layers.25.block_sparse_moe.gate",
 
 
46
  "model.layers.1.block_sparse_moe.gate",
 
 
 
 
47
  "model.layers.41.block_sparse_moe.gate",
48
- "model.layers.33.block_sparse_moe.gate",
49
  "model.layers.45.block_sparse_moe.gate",
50
- "model.layers.14.block_sparse_moe.gate",
51
- "model.layers.2.block_sparse_moe.gate",
52
- "model.layers.52.block_sparse_moe.gate",
53
- "model.layers.24.block_sparse_moe.gate",
54
- "model.layers.43.block_sparse_moe.gate",
 
 
 
 
55
  "model.layers.48.block_sparse_moe.gate",
 
 
56
  "model.layers.29.block_sparse_moe.gate",
57
- "model.layers.35.block_sparse_moe.gate",
58
- "model.layers.18.block_sparse_moe.gate",
59
- "model.layers.50.block_sparse_moe.gate",
60
- "model.layers.0.block_sparse_moe.gate",
61
- "model.layers.8.block_sparse_moe.gate",
62
- "model.layers.23.block_sparse_moe.gate",
63
- "model.layers.49.block_sparse_moe.gate",
64
  "model.layers.42.block_sparse_moe.gate",
65
- "model.layers.22.block_sparse_moe.gate",
66
- "model.layers.39.block_sparse_moe.gate",
67
  "model.layers.51.block_sparse_moe.gate",
68
- "model.layers.31.block_sparse_moe.gate",
69
- "model.layers.36.block_sparse_moe.gate",
70
  "model.layers.32.block_sparse_moe.gate",
71
- "model.layers.37.block_sparse_moe.gate",
72
- "model.layers.16.block_sparse_moe.gate",
73
- "model.layers.46.block_sparse_moe.gate",
74
- "model.layers.53.block_sparse_moe.gate",
75
- "model.layers.19.block_sparse_moe.gate",
 
76
  "model.layers.3.block_sparse_moe.gate",
77
- "model.layers.30.block_sparse_moe.gate",
78
- "model.layers.55.block_sparse_moe.gate",
79
- "model.layers.20.block_sparse_moe.gate",
80
- "model.layers.13.block_sparse_moe.gate"
 
 
 
81
  ],
82
  "quant_method": "fp8"
83
  },
 
10
  "hidden_size": 6144,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 16384,
13
+ "max_position_embeddings": 65536,
14
  "model_type": "mixtral",
15
  "num_attention_heads": 48,
16
  "num_experts_per_tok": 2,
 
21
  "quantization_config": {
22
  "activation_scheme": "static",
23
  "ignored_layers": [
24
+ "model.layers.55.block_sparse_moe.gate",
25
+ "model.layers.0.block_sparse_moe.gate",
26
+ "model.layers.36.block_sparse_moe.gate",
27
+ "model.layers.2.block_sparse_moe.gate",
28
+ "model.layers.8.block_sparse_moe.gate",
 
 
29
  "model.layers.40.block_sparse_moe.gate",
 
 
 
 
 
 
 
 
 
 
30
  "model.layers.11.block_sparse_moe.gate",
31
+ "model.layers.15.block_sparse_moe.gate",
32
+ "model.layers.23.block_sparse_moe.gate",
33
  "model.layers.10.block_sparse_moe.gate",
34
+ "model.layers.7.block_sparse_moe.gate",
35
+ "model.layers.38.block_sparse_moe.gate",
36
+ "model.layers.43.block_sparse_moe.gate",
37
+ "model.layers.53.block_sparse_moe.gate",
38
  "model.layers.1.block_sparse_moe.gate",
39
+ "model.layers.37.block_sparse_moe.gate",
40
+ "model.layers.54.block_sparse_moe.gate",
41
+ "model.layers.24.block_sparse_moe.gate",
42
+ "model.layers.28.block_sparse_moe.gate",
43
  "model.layers.41.block_sparse_moe.gate",
44
+ "model.layers.17.block_sparse_moe.gate",
45
  "model.layers.45.block_sparse_moe.gate",
46
+ "model.layers.6.block_sparse_moe.gate",
47
+ "model.layers.20.block_sparse_moe.gate",
48
+ "model.layers.25.block_sparse_moe.gate",
49
+ "model.layers.34.block_sparse_moe.gate",
50
+ "model.layers.12.block_sparse_moe.gate",
51
+ "model.layers.26.block_sparse_moe.gate",
52
+ "model.layers.50.block_sparse_moe.gate",
53
+ "lm_head",
54
+ "model.layers.19.block_sparse_moe.gate",
55
  "model.layers.48.block_sparse_moe.gate",
56
+ "model.layers.33.block_sparse_moe.gate",
57
+ "model.layers.4.block_sparse_moe.gate",
58
  "model.layers.29.block_sparse_moe.gate",
59
+ "model.layers.44.block_sparse_moe.gate",
60
+ "model.layers.16.block_sparse_moe.gate",
 
 
 
 
 
61
  "model.layers.42.block_sparse_moe.gate",
62
+ "model.layers.30.block_sparse_moe.gate",
 
63
  "model.layers.51.block_sparse_moe.gate",
64
+ "model.layers.21.block_sparse_moe.gate",
65
+ "model.layers.18.block_sparse_moe.gate",
66
  "model.layers.32.block_sparse_moe.gate",
67
+ "model.layers.35.block_sparse_moe.gate",
68
+ "model.layers.49.block_sparse_moe.gate",
69
+ "model.layers.14.block_sparse_moe.gate",
70
+ "model.layers.31.block_sparse_moe.gate",
71
+ "model.layers.47.block_sparse_moe.gate",
72
+ "model.layers.22.block_sparse_moe.gate",
73
  "model.layers.3.block_sparse_moe.gate",
74
+ "model.layers.46.block_sparse_moe.gate",
75
+ "model.layers.9.block_sparse_moe.gate",
76
+ "model.layers.13.block_sparse_moe.gate",
77
+ "model.layers.39.block_sparse_moe.gate",
78
+ "model.layers.52.block_sparse_moe.gate",
79
+ "model.layers.5.block_sparse_moe.gate",
80
+ "model.layers.27.block_sparse_moe.gate"
81
  ],
82
  "quant_method": "fp8"
83
  },
model-00001-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a6839dca978613fb1f71e04f8a1e1fc5276f785f96553ad401adc6b6e9ce1de
3
- size 4907575664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9fd1bf1820131291ecc53097e9f01963df9b636dfeacb5cd0d491d892dd0e8
3
+ size 4907575696
model-00002-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1aa5f53050502784836c375b06acb5ba8b1ec590caab38ae15609938c216e53
3
- size 4907601776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c86c023c75d4ea23d9e89ef602dbec3a6c8db837e3fd4656d9c201c615cef3
3
+ size 4907601808
model-00003-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8e7be43e4d252398a894e85b5c6c34cb70904dd58f2ac23233e71990e1b2663
3
- size 4907601776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5055964602f29228e5e3715d514b7d490880c84deaf22b855e5714e3da5b2a1
3
+ size 4907601808
model-00004-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51e2eeb8f54100450f4932e7bf6da41c76ca2a3ddddab7a36f2de6cfa1358372
3
- size 4907601776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:497ddf951a49f27e6a03efbbe0df106d1b7ce351b4008df5caae2877953f05f8
3
+ size 4907601808
model-00005-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f3ac19f4bc32e03f50eab48c0b6231a3a68c127acc5ed0aa47d4a2af39ae47d
3
- size 4907601776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd29ac1a2126c2b70bfa5c66344afb1b74a03003dd64390438eb4e89bd66c5f1
3
+ size 4907601808
model-00006-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5822e59664e8263b6bebcab8554e2799779db3f55358e1f746331ba0d103d154
3
- size 4907601920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76df8b4518e0b9a681f7873eb2d9e447dc76f4b50c0da8ab84616e6bcea0a7d2
3
+ size 4907601952
model-00007-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eba20fd06ef42fd5db554886ead700dd3fe3d25da39e8e0483180210dbd8ed0
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e7e0565cdc76c70e337c8434d897e858090749c23f8a8eb1463dce988cbb6d
3
+ size 4907601976
model-00008-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cbb189b8ea9f501c8b54fda421898626572619eba1fcfbc3516ae2e78e8b7f6
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a7fdebfc190def06160df1a29b3caa28874d8de1ee0470a5c16faeea69ca92e
3
+ size 4907601976
model-00009-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:693f0167082cf3061ec78f473db2166e842ecd3032d2c8ebc00facfcbd9076ca
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1cd5fbb538b130b3a23aa251ef1cecff8d6087d0d881506dddc9327640988a
3
+ size 4907601976
model-00010-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3def56999aa6cc9b2c83fb2e5f291c8e422bed9964884111cc9d9de1472502b1
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3411f179c7098c112fbc07bce52cbd08e5de0cf68e718fc3486bc7ac0c92511a
3
+ size 4907601976
model-00011-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93263bb445fff535352bc5b87403a9d7615bccd4a2cb1681948aac7bf07cd56b
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e99303ff5e2f842b19c3bf18da8fe846c05ad4ef26eb63d110ff870d6cfe1d
3
+ size 4907601976
model-00012-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca248042a37e625718ff51d2e864a18d216ba7381248b9344a34b42253adcd9d
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e2d921e191feb8441e762a492094bbcc94dcfcac26ca322f8c443da02278a5
3
+ size 4907601976
model-00013-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aafd76541a14f31a88d673cd4b9b9bf84739abea72a1e21d1bfcc8ed7ecc1ed1
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5512fd75fd75118e95fafee9aff4d0ab1ba651050d542a6d62d9efc238290618
3
+ size 4907601976
model-00014-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a02e4327aef42bb0bf90baafc3b28f292a238b111307453669be7813a15c89b
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484bc510c824bacd410f85f874a49951bed8e01b4878ad0f47f3113134db0684
3
+ size 4907601976
model-00015-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac3a24b909d4c61aea1b3f553f643daaef13d96ed7cfcb15219642da4e02b328
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c5f317d94a603029d07879f2ac8b930b3041b787fb5fbd73ebe5556df0bbc0
3
+ size 4907601976
model-00016-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6abfd72e6f11f526b02588fce71d598b5d562dd7551fe71246299a5de987f761
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f1cadb9eff927459aeaef7dc9e9327ea34e634cdd8863dd11c5448f8b0f3bf
3
+ size 4907601976
model-00017-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52a131de3a977b6cfce3f6b07670e88d42f1cf34cf86a94b5f578e893aa2eacd
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf015a92807ff1fdc144890ec704d0bfc3849eb92063b68fa3d121cda57477dc
3
+ size 4907601976
model-00018-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3105f2bf9a02dd8eb70dae40688ae3c2f683a6c9eecc1ad1e08449806c274bb4
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ce405075879cfb31269de08648cbe768c511287a58942c8dd4d58790dcbea4
3
+ size 4907601976
model-00019-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5e3ad75a63414a39e5c4d4179ded0e7a56df3be9d8a8755e85fb7ccdd1228c2
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba0d4a1c5285221b3546c1408c0322fa767e7a46ff81a4d501256245de9c521
3
+ size 4907601976
model-00020-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e50df8f92e2e1e8350244e6d7194040ae2a91f76493a5a86c359ed8e0f113e2
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d2fd2dc4feedfa6d884a1b436250aa901ded80fba6cab78bce38aa1c428ad62
3
+ size 4907601976
model-00021-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df105cc17350040fa187e8af5bcb85ba2b33cdb38e7d4915e2e975d354a31e68
3
- size 4970418128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38d5833f9d845481d56bfe336d554599ebdf1dcdbee71ad85ef94cb97b9b62d
3
+ size 4970418160
model-00022-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1650386ee4277216b9fabb8f9c955fdc453a71b5d3f4c6b9970926a33dfe47bc
3
- size 4995682048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e2614b99ebaf1e7a6b0613fd02098421f5f3a57754f4a2b475633de3415c69
3
+ size 4995682080
model-00023-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e9d79bf0e9b15af2d19700a2ff5a2ebee8a8d8e0b23c09fd26ea3843bd1d9b8
3
- size 4970516552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5c92ac521e109306b960e43fcb6ff74852324518e7eab7985ff0ac49603cd7
3
+ size 4970516584
model-00024-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160786e8ff0ea635822dde189b32d99a0d530239e7d61fb39de0407af66d9a82
3
- size 4907577160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b79d06fcd1dce85215273f956fa8aad2460ad3a67df6d317fd8eddda1b66609
3
+ size 4907577184
model-00025-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba4c19f664771f71e72eb8fc2154cb0604e10c0cfe70453fbfc237382a77c52
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5184119557e2640e08dd892562b8ffc5a47b4c73390c0f1bef5057a9c6ab43d7
3
+ size 4907601976
model-00026-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7a2c530281337e2943afac5b09834767a016d81a2c9317e7f69017582d85fac
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699e2e62245c205ce1a5676a9f4a398dca3928e3a2a6c1cdb849e77ff35e061a
3
+ size 4907601976
model-00027-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dbda58f7f2ffb2f754404877420ee52ffd9a64c20d5e556689a28cba43c8583
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b2d35138e1157153d3bfea0790b3260baae3ce3346e5adecff248f314b7088
3
+ size 4907601976
model-00028-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96963db640a7ed3067c1cc302ac036d1e2463cc5c5e11c68927edb8972c550b3
3
- size 4907601944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed08a4b8c2d63bb036758dfd4b2c6d894461ecdb7b836cbf5d009ae0c9f4a89e
3
+ size 4907601976
model-00029-of-00029.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fbf9de7d52e3d43d4819c922000ce1992106b66391d289769f2dbe28ce641a0
3
- size 3410141576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45cfc14555c6e8bbe7656435203c0da1f936890cdd401402d80a12f86036e836
3
+ size 3410141608
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff