DavidAU commited on
Commit
421679d
1 Parent(s): ead78db

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -6,43 +6,66 @@ tags:
6
  - merge
7
 
8
  ---
9
- # MN-Three-RCM-Instruct1-2a
10
 
11
  This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
 
13
  ## Merge Details
14
  ### Merge Method
15
 
16
- This model was merged using the [DARE](https://arxiv.org/abs/2311.03099) [TIES](https://arxiv.org/abs/2306.01708) merge method using E:/MN-Rocinante-12B-v1.1-Instruct as a base.
17
 
18
  ### Models Merged
19
 
20
  The following models were included in the merge:
21
- * E:/MN-magnum-v2.5-12b-kto-Instruct
22
- * E:/MN-12B-Celeste-V1.9-Instruct
 
23
 
24
  ### Configuration
25
 
26
  The following YAML configuration was used to produce this model:
27
 
28
  ```yaml
29
- # Config 1
30
- # E:/MN-Rocinante-12B-v1.1-Instruct
31
- # E:/MN-12B-Celeste-V1.9-Instruct
32
- # E:/MN-magnum-v2.5-12b-kto-Instruct
33
-
34
- models:
35
- - model: E:/MN-Rocinante-12B-v1.1-Instruct
36
- - model: E:/MN-magnum-v2.5-12b-kto-Instruct
37
- parameters:
38
- weight: .6
39
- density: .8
40
- - model: E:/MN-12B-Celeste-V1.9-Instruct
41
- parameters:
42
- weight: .38
43
- density: .6
44
- merge_method: dare_ties
45
- tokenizer_source: union
46
- base_model: E:/MN-Rocinante-12B-v1.1-Instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  dtype: bfloat16
48
  ```
 
6
  - merge
7
 
8
  ---
9
+ # Nemo-Grand-Gutenburg3
10
 
11
  This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
 
13
  ## Merge Details
14
  ### Merge Method
15
 
16
+ This model was merged using the passthrough merge method.
17
 
18
  ### Models Merged
19
 
20
  The following models were included in the merge:
21
+ * G:/11B/Lyra4-Gutenberg-12B
22
+ * G:/11B/Lyra-Gutenberg-mistral-nemo-12B
23
+ * G:/11B/mistral-nemo-gutenberg-12B-v4
24
 
25
  ### Configuration
26
 
27
  The following YAML configuration was used to produce this model:
28
 
29
  ```yaml
30
+ # NEMO-Mistral version -> Scaled to 40 layers.
31
+
32
+ # fix one ->
33
+ # Fix 3 point double match @ layers 14,15 all 3 models (remove 2 layers)
34
+
35
+ # fix 2 ?? - restored fix 1,
36
+ # 30-31 -> all models 2nd group.
37
+
38
+ # Fix 3??
39
+ # 36-39 -> second last block.
40
+
41
+ slices:
42
+ - sources:
43
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
44
+ layer_range: [0, 16]
45
+ - sources:
46
+ - model: G:/11B/Lyra4-Gutenberg-12B
47
+ layer_range: [12, 24]
48
+ - sources:
49
+ - model: G:/11B/Lyra-Gutenberg-mistral-nemo-12B
50
+ layer_range: [14, 28]
51
+
52
+ # secondary block
53
+
54
+ - sources:
55
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
56
+ layer_range: [18, 30] # remove layer 30
57
+ - sources:
58
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
59
+ layer_range: [31, 32] # remove layer 30
60
+ - sources:
61
+ - model: G:/11B/Lyra4-Gutenberg-12B
62
+ layer_range: [24, 31] # remove layer 31
63
+ - sources:
64
+ - model: G:/11B/Lyra4-Gutenberg-12B
65
+ layer_range: [32, 39] # remove layer 31
66
+ - sources:
67
+ - model: G:/11B/Lyra-Gutenberg-mistral-nemo-12B
68
+ layer_range: [30, 40]
69
+ merge_method: passthrough
70
  dtype: bfloat16
71
  ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "E:/MN-Rocinante-12B-v1.1-Instruct",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -14,7 +14,7 @@
14
  "max_position_embeddings": 1024000,
15
  "model_type": "mistral",
16
  "num_attention_heads": 32,
17
- "num_hidden_layers": 63,
18
  "num_key_value_heads": 8,
19
  "rms_norm_eps": 1e-05,
20
  "rope_theta": 1000000.0,
@@ -22,6 +22,6 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.43.3",
25
- "use_cache": true,
26
- "vocab_size": 131074
27
  }
 
1
  {
2
+ "_name_or_path": "G:/11B/Lyra4-Gutenberg-12B",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
14
  "max_position_embeddings": 1024000,
15
  "model_type": "mistral",
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 79,
18
  "num_key_value_heads": 8,
19
  "rms_norm_eps": 1e-05,
20
  "rope_theta": 1000000.0,
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.43.3",
25
+ "use_cache": false,
26
+ "vocab_size": 131072
27
  }
mergekit_config.yml CHANGED
@@ -1,19 +1,41 @@
1
- # Config 1
2
- # E:/MN-Rocinante-12B-v1.1-Instruct
3
- # E:/MN-12B-Celeste-V1.9-Instruct
4
- # E:/MN-magnum-v2.5-12b-kto-Instruct
5
 
6
- models:
7
- - model: E:/MN-Rocinante-12B-v1.1-Instruct
8
- - model: E:/MN-magnum-v2.5-12b-kto-Instruct
9
- parameters:
10
- weight: .6
11
- density: .8
12
- - model: E:/MN-12B-Celeste-V1.9-Instruct
13
- parameters:
14
- weight: .38
15
- density: .6
16
- merge_method: dare_ties
17
- tokenizer_source: union
18
- base_model: E:/MN-Rocinante-12B-v1.1-Instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  dtype: bfloat16
 
1
+ # NEMO-Mistral version -> Scaled to 40 layers.
 
 
 
2
 
3
+ # fix one ->
4
+ # Fix 3 point double match @ layers 14,15 all 3 models (remove 2 layers)
5
+
6
+ # fix 2 ?? - restored fix 1,
7
+ # 30-31 -> all models 2nd group.
8
+
9
+ # Fix 3??
10
+ # 36-39 -> second last block.
11
+
12
+ slices:
13
+ - sources:
14
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
15
+ layer_range: [0, 16]
16
+ - sources:
17
+ - model: G:/11B/Lyra4-Gutenberg-12B
18
+ layer_range: [12, 24]
19
+ - sources:
20
+ - model: G:/11B/Lyra-Gutenberg-mistral-nemo-12B
21
+ layer_range: [14, 28]
22
+
23
+ # secondary block
24
+
25
+ - sources:
26
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
27
+ layer_range: [18, 30] # remove layer 30
28
+ - sources:
29
+ - model: G:/11B/mistral-nemo-gutenberg-12B-v4
30
+ layer_range: [31, 32] # remove layer 30
31
+ - sources:
32
+ - model: G:/11B/Lyra4-Gutenberg-12B
33
+ layer_range: [24, 31] # remove layer 31
34
+ - sources:
35
+ - model: G:/11B/Lyra4-Gutenberg-12B
36
+ layer_range: [32, 39] # remove layer 31
37
+ - sources:
38
+ - model: G:/11B/Lyra-Gutenberg-mistral-nemo-12B
39
+ layer_range: [30, 40]
40
+ merge_method: passthrough
41
  dtype: bfloat16
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,23 +1,30 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff