Upload MixtralForCausalLM
Browse files- config.json +36 -0
- generation_config.json +6 -0
- model-00001-of-00034.safetensors +3 -0
- model-00002-of-00034.safetensors +3 -0
- model-00003-of-00034.safetensors +3 -0
- model-00004-of-00034.safetensors +3 -0
- model-00005-of-00034.safetensors +3 -0
- model-00006-of-00034.safetensors +3 -0
- model-00007-of-00034.safetensors +3 -0
- model-00008-of-00034.safetensors +3 -0
- model-00009-of-00034.safetensors +3 -0
- model-00010-of-00034.safetensors +3 -0
- model-00011-of-00034.safetensors +3 -0
- model-00012-of-00034.safetensors +3 -0
- model-00013-of-00034.safetensors +3 -0
- model-00014-of-00034.safetensors +3 -0
- model-00015-of-00034.safetensors +3 -0
- model-00016-of-00034.safetensors +3 -0
- model-00017-of-00034.safetensors +3 -0
- model-00018-of-00034.safetensors +3 -0
- model-00019-of-00034.safetensors +3 -0
- model-00020-of-00034.safetensors +3 -0
- model-00021-of-00034.safetensors +3 -0
- model-00022-of-00034.safetensors +3 -0
- model-00023-of-00034.safetensors +3 -0
- model-00024-of-00034.safetensors +3 -0
- model-00025-of-00034.safetensors +3 -0
- model-00026-of-00034.safetensors +3 -0
- model-00027-of-00034.safetensors +3 -0
- model-00028-of-00034.safetensors +3 -0
- model-00029-of-00034.safetensors +3 -0
- model-00030-of-00034.safetensors +3 -0
- model-00031-of-00034.safetensors +3 -0
- model-00032-of-00034.safetensors +3 -0
- model-00033-of-00034.safetensors +3 -0
- model-00034-of-00034.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MixtralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": true,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"input_jitter_noise": 0.01,
|
13 |
+
"intermediate_size": 6400,
|
14 |
+
"lm_head_bias": true,
|
15 |
+
"lora_rank": 32,
|
16 |
+
"max_position_embeddings": 4096,
|
17 |
+
"model_type": "mixtral",
|
18 |
+
"num_attention_heads": 32,
|
19 |
+
"num_experts_per_tok": 2,
|
20 |
+
"num_hidden_layers": 32,
|
21 |
+
"num_key_value_heads": 8,
|
22 |
+
"num_local_experts": 16,
|
23 |
+
"output_router_logits": false,
|
24 |
+
"rms_norm_eps": 1e-05,
|
25 |
+
"rope_theta": 10000.0,
|
26 |
+
"router_aux_loss_coef": 0.0,
|
27 |
+
"router_jitter_noise": 0.01,
|
28 |
+
"separate_lora": false,
|
29 |
+
"sliding_window": 2047,
|
30 |
+
"tie_word_embeddings": false,
|
31 |
+
"torch_dtype": "float32",
|
32 |
+
"transformers_version": "4.39.2",
|
33 |
+
"use_cache": true,
|
34 |
+
"use_lora": true,
|
35 |
+
"vocab_size": 32064
|
36 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.39.2"
|
6 |
+
}
|
model-00001-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8b7e29396bf41490f8b29b0644697dad24cbd775738d95e4f76ffccbc07081c
|
3 |
+
size 4996774312
|
model-00002-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba3d55fbab76673e0ae7d49d835827a792971b978d6d5159c74e742fb17e015c
|
3 |
+
size 4995792256
|
model-00003-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a090db57fe9af2df2a797007563372ae17446734736dc8373dda6979653b2fd1
|
3 |
+
size 4995792256
|
model-00004-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:028d0db9c7f8400ca70aa387dbc2287db85ff28cae39b529bbf20a50ee0605e9
|
3 |
+
size 4995792264
|
model-00005-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d124b61438245317d3e27a6b9bd7db20a6c8b53574a45aea62b0d4e74c20819a
|
3 |
+
size 4995792264
|
model-00006-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ef68989854530655cac1b1c396c1b53f12ba2c7baac833118c0bf33b1d6f55c
|
3 |
+
size 4995792264
|
model-00007-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90c05152cdec05f34c2f2b9ccba0ae037d3150ef894aeb368d0aa2b5392b8cd6
|
3 |
+
size 4995792264
|
model-00008-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2089a859057695133f570f963d876572a3c88a99ddc13fcbd6b0637435959f1
|
3 |
+
size 4995792264
|
model-00009-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61b9cdcfb3f6949cca6758d049efea9967bae77d93dfaf17fb0453067644a118
|
3 |
+
size 4995792264
|
model-00010-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46e99eace2cbe2832f117442b520fa8abb594ab01a46c56f5ca0c095d671a1e7
|
3 |
+
size 4995792264
|
model-00011-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be5034280907f313955768a7c9096ce0d802ad6cfc0ac9a351bb7e6c80e9e851
|
3 |
+
size 4995792296
|
model-00012-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e95c29d2fbe83af18031b874354700daa9a9cf135fb66562ffb07424cc3167c2
|
3 |
+
size 4995792328
|
model-00013-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e357324901f023e3bcbf97cdafa2d4ffadb9d55f4ed8caa31dd4833134259bf2
|
3 |
+
size 4995792328
|
model-00014-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:896bd3d8f94ecc94ec9d1a13035acfc618eb65c80231313db5de8d20d89680f9
|
3 |
+
size 4995792328
|
model-00015-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f1effaa9c65fef970dec3aa6e85dd7ba3a2225fef8cec2bee67befb87cafc71
|
3 |
+
size 4995792328
|
model-00016-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:422a95730d8cc4c1a72c57f67a1f083ded87f0eaea6e4f9e1b292d3349641553
|
3 |
+
size 4995792328
|
model-00017-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7150c6a32fed198efb4758270e60da5e3a28122c2b6a58fb82ece7012ac324c7
|
3 |
+
size 4995792328
|
model-00018-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb6cdd3266bb31b474e6d2e0aefca243d50264aeb1ec86b0aae0d8fac9ff499
|
3 |
+
size 4995792328
|
model-00019-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:344f392dca7de68153c314eb99bd1f32bb98cf9876bb7955b6678024219aabb8
|
3 |
+
size 4995792328
|
model-00020-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d6ef5687a568a167861712269c9bbeff048479a1f2e3dbf824e619955e72d93
|
3 |
+
size 4995792328
|
model-00021-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab871dbfe7e71d7a98bb3f4b4777fa3ec8443b7e70cab3121a7ad7abbac8ed10
|
3 |
+
size 4995792328
|
model-00022-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9100096114aac0dbc586cc8899c1455cdea49253f73dae45c196b2c798aa158f
|
3 |
+
size 4995504816
|
model-00023-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6dc457baed019c026b39fe613b06ac59f48edd8ead0487d891db9cb7114d1d5
|
3 |
+
size 4928600848
|
model-00024-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bd1434508de49d93bc7bed1dcb69a6b225037a949fa87c5dde76b1f68bc7042
|
3 |
+
size 4995792312
|
model-00025-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25781674185e08b059a662c423bb197a2dae9a3642735b519595cf8e814fb9d8
|
3 |
+
size 4995792312
|
model-00026-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00bead8babb9cc66c9b48a667073be889ac027f3f3377b0bc2136969db15b42d
|
3 |
+
size 4995792312
|
model-00027-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:225121174d902196fc7c50dbeaf8213d034604c217e3297355178dd14c726d15
|
3 |
+
size 4995792312
|
model-00028-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba686043b7b3a5932b11fcd9a4ee7fa63cbb9d99b2cfb85b5245e3b6961528ba
|
3 |
+
size 4995792328
|
model-00029-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d79e0f792bd5271d1f3982aa012302100b00c66cff4e22180b4a78728b70d77
|
3 |
+
size 4995792328
|
model-00030-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6b9b13c9d752a53a4f0ff4545272fa41851424a680685f999d34b799969f983
|
3 |
+
size 4995792328
|
model-00031-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d27100810e318a37522c2a2c6dbc360058f909ac0188aa8286980cf2818ed451
|
3 |
+
size 4995792328
|
model-00032-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45c237522786c39ad0612492fea85aa19adaf48f7912f616fbc9014701228bde
|
3 |
+
size 4995792328
|
model-00033-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74603983130d748b4ac708153d9130942e4a3849833cd4cc78d5eda5f6b7f012
|
3 |
+
size 4995792328
|
model-00034-of-00034.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8142074f97ccd1e00be4ffbf4b2bbfe3c054789575e314db9adcd304640106ad
|
3 |
+
size 2832434048
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|