win10 commited on
Commit
92cfa71
1 Parent(s): b026eab

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +23 -23
README.md CHANGED
@@ -4,19 +4,19 @@ tags:
4
  - merge
5
  - mergekit
6
  - lazymergekit
7
- - MediaTek-Research/Breeze-7B-32k-Base-v1_0
8
  ---
9
 
10
  # Breeze-13B-32k-Base-v1_0
11
 
12
  Breeze-13B-32k-Base-v1_0 is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):
13
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
14
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
15
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
16
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
17
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
18
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
19
- * [MediaTek-Research/Breeze-7B-32k-Base-v1_0](https://huggingface.co/MediaTek-Research/Breeze-7B-32k-Base-v1_0)
20
 
21
  ## 🧩 Configuration
22
 
@@ -26,53 +26,53 @@ merge_method: linear
26
  slices:
27
  - sources:
28
  - layer_range: [0, 8]
29
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
30
  - layer_range: [0, 8]
31
- model: meta-llama/Meta-Llama-3-8B
32
  parameters:
33
  weight: 0
34
  - sources:
35
  - layer_range: [4, 12]
36
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
37
  - layer_range: [4, 12]
38
- model: meta-llama/Meta-Llama-3-8B
39
  parameters:
40
  weight: 0
41
  - sources:
42
  - layer_range: [8, 16]
43
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
44
  - layer_range: [8, 16]
45
- model: meta-llama/Meta-Llama-3-8B
46
  parameters:
47
  weight: 0
48
  - sources:
49
  - layer_range: [12, 20]
50
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
51
  - layer_range: [12, 20]
52
- model: meta-llama/Meta-Llama-3-8B
53
  parameters:
54
  weight: 0
55
  - sources:
56
  - layer_range: [16, 24]
57
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
58
  - layer_range: [16, 24]
59
- model: meta-llama/Meta-Llama-3-8B
60
  parameters:
61
  weight: 0
62
  - sources:
63
  - layer_range: [20, 28]
64
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
65
  - layer_range: [20, 28]
66
- model: meta-llama/Meta-Llama-3-8B
67
  parameters:
68
  weight: 0
69
  - sources:
70
  - layer_range: [24, 32]
71
- model: MediaTek-Research/Breeze-7B-32k-Base-v1_0
72
  - layer_range: [24, 32]
73
- model: meta-llama/Meta-Llama-3-8B
74
  parameters:
75
  weight: 0
76
- tokenizer_source: union
77
 
78
  ```
 
4
  - merge
5
  - mergekit
6
  - lazymergekit
7
+ - deepseek-ai/deepseek-llm-7b-base
8
  ---
9
 
10
  # Breeze-13B-32k-Base-v1_0
11
 
12
  Breeze-13B-32k-Base-v1_0 is a merge of the following models using [mergekit](https://github.com/cg123/mergekit):
13
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
14
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
15
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
16
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
17
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
18
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
19
+ * [deepseek-ai/deepseek-llm-7b-base](https://huggingface.co/deepseek-ai/deepseek-llm-7b-base)
20
 
21
  ## 🧩 Configuration
22
 
 
26
  slices:
27
  - sources:
28
  - layer_range: [0, 8]
29
+ model: deepseek-ai/deepseek-llm-7b-base
30
  - layer_range: [0, 8]
31
+ model: meta-llama/Meta-Llama-3-8B
32
  parameters:
33
  weight: 0
34
  - sources:
35
  - layer_range: [4, 12]
36
+ model: deepseek-ai/deepseek-llm-7b-base
37
  - layer_range: [4, 12]
38
+ model: meta-llama/Meta-Llama-3-8B
39
  parameters:
40
  weight: 0
41
  - sources:
42
  - layer_range: [8, 16]
43
+ model: deepseek-ai/deepseek-llm-7b-base
44
  - layer_range: [8, 16]
45
+ model: meta-llama/Meta-Llama-3-8B
46
  parameters:
47
  weight: 0
48
  - sources:
49
  - layer_range: [12, 20]
50
+ model: deepseek-ai/deepseek-llm-7b-base
51
  - layer_range: [12, 20]
52
+ model: meta-llama/Meta-Llama-3-8B
53
  parameters:
54
  weight: 0
55
  - sources:
56
  - layer_range: [16, 24]
57
+ model: deepseek-ai/deepseek-llm-7b-base
58
  - layer_range: [16, 24]
59
+ model: meta-llama/Meta-Llama-3-8B
60
  parameters:
61
  weight: 0
62
  - sources:
63
  - layer_range: [20, 28]
64
+ model: deepseek-ai/deepseek-llm-7b-base
65
  - layer_range: [20, 28]
66
+ model: meta-llama/Meta-Llama-3-8B
67
  parameters:
68
  weight: 0
69
  - sources:
70
  - layer_range: [24, 32]
71
+ model: deepseek-ai/deepseek-llm-7b-base
72
  - layer_range: [24, 32]
73
+ model: meta-llama/Meta-Llama-3-8B
74
  parameters:
75
  weight: 0
76
+ tokenizer_source: union
77
 
78
  ```