brucethemoose commited on
Commit
4e5986b
1 Parent(s): 229a4a7

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -43,8 +43,5 @@ python /home/alpha/AI/exllamav2/convert.py --in_dir /home/alpha/FastModels/v8/v8
43
 
44
  Second pass:
45
  ```
46
- python /home/alpha/AI/exllamav2/convert.py --in_dir /home/alpha/FastModels/v8/v8 -o /home/alpha/FastModels/scratch -m /home/alpha/FastModels/v8meas.json --cal_dataset /home/alpha/Documents/stories.parquet -l 12288 -r 26 -ml 32768 -mr 8 -ss 4096 -b 3.1 -hb 6 -cf /home/alpha/FastModels/v8-exl2-31bpw-fiction -nr
47
  ```
48
-
49
-
50
-
 
43
 
44
  Second pass:
45
  ```
46
+ python /home/alpha/AI/exllamav2/convert.py --in_dir /home/alpha/FastModels/v8/v8 -o /home/alpha/FastModels/scratch -m /home/alpha/FastModels/v8meas.json --cal_dataset /home/alpha/Documents/stories.parquet -l 12288 -r 26 -ml 32768 -mr 8 -ss 4096 -b 4.0 -hb 6 -cf /home/alpha/FastModels/v8-exl2-4bpw-fiction -nr
47
  ```
 
 
 
config.json CHANGED
@@ -4,7 +4,6 @@
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
- "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
  "eos_token_id": 2,
10
  "hidden_act": "silu",
@@ -23,7 +22,7 @@
23
  "rope_theta": 5000000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
- "transformers_version": "4.36.2",
27
  "use_cache": true,
28
  "vocab_size": 64002
29
  }
 
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
 
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
 
22
  "rope_theta": 5000000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.35.2",
26
  "use_cache": true,
27
  "vocab_size": 64002
28
  }
mergekit_config.yml CHANGED
@@ -1,51 +1,60 @@
1
- models:
2
- - model: /home/alpha/Storage/Models/Raw/chargoddard_Yi-34B-200K-Llama
3
- # No parameters necessary for base model
4
- - model: /home/alpha/Storage/Models/Raw/migtissera_Tess-34B-v1.4
5
- parameters:
6
- weight: [0.23, 0.125, 0.125, 0.125, 0.125, 0.125]
7
- density: 0.59
8
- - model: /home/alpha/Models/Raw/Mihaiii_Pallas-0.5
9
- parameters:
10
- weight: [0.23, 0.125, 0.125, 0.125, 0.125, 0.125]
11
- density: 0.59
12
- - model: /home/alpha//Storage/Models/Raw/bhenrym14_airoboros-3_1-yi-34b-200k
 
 
 
 
 
 
 
 
 
13
  parameters:
14
- weight: [0.02, 0.106, 0.106, 0.106, 0.106, 0.106]
15
  density: 0.59
16
- - model: /home/alpha/Storage/Models/Raw/jondurbin_bagel-34b-v0.2
17
- #Only the SFT in the main merge since the DPO version seems to have no long context ability at all
 
18
  parameters:
19
- weight: [0.02, 0.100, 0.100, 0.100, 0.100, 0.100]
20
  density: 0.4
21
- - model: /home/alpha/Storage/Models/Raw/kyujinpy_PlatYi-34B-200k-Q-FastChat
 
 
22
  parameters:
23
- weight: [0.02, 0.100, 0.100, 0.100, 0.100, 0.100]
24
  density: 0.59
25
- #- model: /home/alpha/Storage/Models/Raw/ehartford_dolphin-2.2-yi-34b-200k
26
- # Dolphin 200K seems to be funky according to multiple leaderboards and perplexity tests?
27
- # parameters:
28
- # weight: 0.15
29
- # density: 0.6
30
- - model: /home/alpha/Models/Raw/adamo1139_Yi-34B-200K-AEZAKMI-v2
31
- parameters:
32
- weight: [0.02, 0.110, 0.110, 0.110, 0.110, 0.110]
33
- density: 0.59
34
- - model: /home/alpha/Storage/Models/Raw/Nous-Capybara-34B
35
  parameters:
36
- weight: [0.22, 0.126, 0.126, 0.126, 0.126, 0.126]
37
  density: 0.59
38
- - model: /home/alpha/Storage/Models/Raw/4kmerge
39
- parameters:
40
- weight: [0.02, 0.108, 0.108, 0.108, 0.108, 0.108]
41
- density: 0.5
42
- - model: /home/alpha/Models/Raw/migtissera_Tess-M-Creative-v1.0
43
  parameters:
44
- weight: [0.22, 0.100, 0.100, 0.100, 0.100, 0.10]
45
  density: 0.59
46
- merge_method: dare_ties
47
- tokenizer_source: union
48
- base_model: /home/alpha/Storage/Models/Raw/chargoddard_Yi-34B-200K-Llama
49
- parameters:
50
- int8_mask: true
51
- dtype: bfloat16
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: /home/alpha/Storage/Models/Raw/chargoddard_Yi-34B-200K-Llama
2
+ dtype: bfloat16
3
+ merge_method: dare_ties
4
+ parameters:
5
+ int8_mask: 1.0
6
+ slices:
7
+ - sources:
8
+ - layer_range: [0, 60]
9
+ model: /home/alpha/Storage/Models/Raw/chargoddard_Yi-34B-200K-Llama
10
+ - layer_range: [0, 60]
11
+ model: /home/alpha/Storage/Models/Raw/migtissera_Tess-34B-v1.4
12
+ parameters:
13
+ density: 0.61
14
+ weight: [0.22, 0.113, 0.113, 0.113, 0.113, 0.113]
15
+ - layer_range: [0, 60]
16
+ model: /home/alpha/Models/Raw/Mihaiii_Pallas-0.5
17
+ parameters:
18
+ density: 0.61
19
+ weight: [0.22, 0.113, 0.113, 0.113, 0.113, 0.113]
20
+ - layer_range: [0, 60]
21
+ model: /home/alpha//Storage/Models/Raw/bhenrym14_airoboros-3_1-yi-34b-200k
22
  parameters:
 
23
  density: 0.59
24
+ weight: [0.02, 0.081, 0.081, 0.081, 0.081, 0.081]
25
+ - layer_range: [0, 60]
26
+ model: /home/alpha/Storage/Models/Raw/jondurbin_bagel-34b-v0.2
27
  parameters:
 
28
  density: 0.4
29
+ weight: [0.02, 0.093, 0.093, 0.093, 0.093, 0.093]
30
+ - layer_range: [0, 60]
31
+ model: /home/alpha/Storage/Models/Raw/kyujinpy_PlatYi-34B-200k-Q-FastChat
32
  parameters:
 
33
  density: 0.59
34
+ weight: [0.02, 0.081, 0.081, 0.081, 0.081, 0.081]
35
+ - layer_range: [0, 60]
36
+ model: /home/alpha/Models/Raw/adamo1139_Yi-34B-200K-AEZAKMI-v2
 
 
 
 
 
 
 
37
  parameters:
 
38
  density: 0.59
39
+ weight: [0.02, 0.096, 0.096, 0.096, 0.096, 0.096]
40
+ - layer_range: [0, 60]
41
+ model: /home/alpha/Storage/Models/Raw/Nous-Capybara-34B
 
 
42
  parameters:
 
43
  density: 0.59
44
+ weight: [0.21, 0.115, 0.115, 0.115, 0.115, 0.115]
45
+ - layer_range: [0, 60]
46
+ model: /home/alpha/FastModels/v8/4kmerge-v2
47
+ parameters:
48
+ density: 0.4
49
+ weight: [0.02, 0.115, 0.115, 0.115, 0.115, 0.115]
50
+ - layer_range: [0, 60]
51
+ model: /home/alpha/Models/Raw/migtissera_Tess-M-Creative-v1.0
52
+ parameters:
53
+ density: 0.61
54
+ weight: [0.21, 0.09, 0.09, 0.09, 0.09, 0.09]
55
+ - layer_range: [0, 60]
56
+ model: /home/alpha/Models/Raw/TriadParty_deepmoney-34b-200k-base
57
+ parameters:
58
+ density: 0.61
59
+ weight: [0.04, 0.103, 0.103, 0.103, 0.103, 0.103]
60
+ tokenizer_source: union
output-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe7847b6bd38ad0aec4cd13922152747fd2865a39151d395b60bc8d927fbd415
3
- size 4243474688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741ec744d2283a49c5bb87fe353ea0b8f9a13104429da8e20232913d44153946
3
+ size 4294169880
output-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a99b7250790c05a7bfcbc65f867ac4e4093d6ec5d12287bd53addf0c938d4b14
3
- size 4292411376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512c23152ebd0345ea2dd0c4844ff9b9e63cac2a279f4bfc43acc6f2ba214faf
3
+ size 4256670720
output-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90b2ba416bb84d138a4398ac16629fcbf02c1ed07086604f7570f6ab54a50ccf
3
- size 4294489168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4036118c1c00b354f01a9b50a1c00272f95b713a26babebfba4783c097c266
3
+ size 4284799408
output-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25b4f037a70aeb3f9a7e392082cc09d5c2b9ba19da336922b5d1173df4c28b52
3
- size 1437881976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2f0dffa49ff18c10fdb02fd1add1f85f60d70a7d4c63bc28ac46e831f043545
3
+ size 1433196720
tokenizer.json CHANGED
@@ -69,6 +69,12 @@
69
  "post_processor": {
70
  "type": "TemplateProcessing",
71
  "single": [
 
 
 
 
 
 
72
  {
73
  "Sequence": {
74
  "id": "A",
@@ -77,12 +83,24 @@
77
  }
78
  ],
79
  "pair": [
 
 
 
 
 
 
80
  {
81
  "Sequence": {
82
  "id": "A",
83
  "type_id": 0
84
  }
85
  },
 
 
 
 
 
 
86
  {
87
  "Sequence": {
88
  "id": "B",
@@ -90,7 +108,17 @@
90
  }
91
  }
92
  ],
93
- "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
94
  },
95
  "decoder": {
96
  "type": "Sequence",
 
69
  "post_processor": {
70
  "type": "TemplateProcessing",
71
  "single": [
72
+ {
73
+ "SpecialToken": {
74
+ "id": "<|startoftext|>",
75
+ "type_id": 0
76
+ }
77
+ },
78
  {
79
  "Sequence": {
80
  "id": "A",
 
83
  }
84
  ],
85
  "pair": [
86
+ {
87
+ "SpecialToken": {
88
+ "id": "<|startoftext|>",
89
+ "type_id": 0
90
+ }
91
+ },
92
  {
93
  "Sequence": {
94
  "id": "A",
95
  "type_id": 0
96
  }
97
  },
98
+ {
99
+ "SpecialToken": {
100
+ "id": "<|startoftext|>",
101
+ "type_id": 1
102
+ }
103
+ },
104
  {
105
  "Sequence": {
106
  "id": "B",
 
108
  }
109
  }
110
  ],
111
+ "special_tokens": {
112
+ "<|startoftext|>": {
113
+ "id": "<|startoftext|>",
114
+ "ids": [
115
+ 1
116
+ ],
117
+ "tokens": [
118
+ "<|startoftext|>"
119
+ ]
120
+ }
121
+ }
122
  },
123
  "decoder": {
124
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",