powermove72 commited on
Commit
a08b0e9
1 Parent(s): 22dd144

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ GK-MoE-0.1 is a Mixture of Experts (MoE) made with the following models using [L
22
  ## 🧩 Configuration
23
 
24
  ```yaml
25
- base_model: argilla/notus-7b-v1
26
  experts:
27
  - source_model: argilla/notus-7b-v1
28
  positive_prompts:
 
22
  ## 🧩 Configuration
23
 
24
  ```yaml
25
+ base_model: GritLM/GritLM-7B
26
  experts:
27
  - source_model: argilla/notus-7b-v1
28
  positive_prompts:
config.json CHANGED
@@ -1,15 +1,26 @@
1
  {
2
- "_name_or_path": "argilla/notus-7b-v1",
3
  "architectures": [
4
  "MixtralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
 
 
 
 
 
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
 
 
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 14336,
 
 
 
13
  "max_position_embeddings": 32768,
14
  "model_type": "mixtral",
15
  "num_attention_heads": 32,
@@ -26,6 +37,6 @@
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.41.2",
29
- "use_cache": false,
30
  "vocab_size": 32000
31
  }
 
1
  {
2
+ "_name_or_path": "GritLM/GritLM-7B",
3
  "architectures": [
4
  "MixtralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoModel": "GritLM/GritLM-7B--modeling_gritlm7b.MistralModel",
9
+ "AutoModelForCausalLM": "GritLM/GritLM-7B--modeling_gritlm7b.MistralForCausalLM",
10
+ "AutoModelForSequenceClassification": "GritLM/GritLM-7B--modeling_gritlm7b.MistralForSequenceClassification"
11
+ },
12
  "bos_token_id": 1,
13
  "eos_token_id": 2,
14
  "hidden_act": "silu",
15
  "hidden_size": 4096,
16
+ "id2label": {
17
+ "0": "LABEL_0"
18
+ },
19
  "initializer_range": 0.02,
20
  "intermediate_size": 14336,
21
+ "label2id": {
22
+ "LABEL_0": 0
23
+ },
24
  "max_position_embeddings": 32768,
25
  "model_type": "mixtral",
26
  "num_attention_heads": 32,
 
37
  "tie_word_embeddings": false,
38
  "torch_dtype": "bfloat16",
39
  "transformers_version": "4.41.2",
40
+ "use_cache": true,
41
  "vocab_size": 32000
42
  }
mergekit_moe_config.yml CHANGED
@@ -1,5 +1,5 @@
1
 
2
- base_model: argilla/notus-7b-v1
3
  experts:
4
  - source_model: argilla/notus-7b-v1
5
  positive_prompts:
 
1
 
2
+ base_model: GritLM/GritLM-7B
3
  experts:
4
  - source_model: argilla/notus-7b-v1
5
  positive_prompts:
model-00001-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c7ea3769cc03fb5b7f33accb8c889939297796ea8f630d42a8a19c220a5886
3
  size 1933849912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3747b7732ee3438c6321c10f8f5827bc8e1cb3630c3e9c138ddfe2744d7ca75d
3
  size 1933849912
model-00012-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab3ba073bbb0b9526c5aab1d2500d4bfb3feb1c855631cd2e80b0f65537c0a75
3
  size 1979981568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d454c3bbc7ef3ce2dc7b4c3eef97c2d66b9782699bbe89e843f82f3ce093cf
3
  size 1979981568
model-00013-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:996596a2d3cf0ab98b2dc42d041e306aef28406ed77ffdc1067ed166973cdf30
3
  size 1879588416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:221cf32ee89d16ae085752473b77d36b4708ef3ed889b3b186df1d183cc4b5d6
3
  size 1879588416
special_tokens_map.json CHANGED
@@ -1,9 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
  "bos_token": {
8
  "content": "<s>",
9
  "lstrip": false,
 
1
  {
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -27,22 +27,24 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<unk>",
32
- "<s>",
33
- "</s>"
34
- ],
35
  "bos_token": "<s>",
36
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
39
  "legacy": true,
 
40
  "model_max_length": 1000000000000000019884624838656,
 
41
  "pad_token": "<s>",
 
 
42
  "sp_model_kwargs": {},
43
  "spaces_between_special_tokens": false,
 
44
  "tokenizer_class": "LlamaTokenizer",
45
- "truncation_side": "left",
 
46
  "unk_token": "<unk>",
47
- "use_default_system_prompt": true
48
  }
 
27
  "special": true
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
 
 
31
  "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,
36
+ "max_length": 2048,
37
  "model_max_length": 1000000000000000019884624838656,
38
+ "pad_to_multiple_of": null,
39
  "pad_token": "<s>",
40
+ "pad_token_type_id": 0,
41
+ "padding_side": "left",
42
  "sp_model_kwargs": {},
43
  "spaces_between_special_tokens": false,
44
+ "stride": 0,
45
  "tokenizer_class": "LlamaTokenizer",
46
+ "truncation_side": "right",
47
+ "truncation_strategy": "longest_first",
48
  "unk_token": "<unk>",
49
+ "use_default_system_prompt": false
50
  }