dataautogpt3 commited on
Commit
2251a6a
1 Parent(s): c2cd618

Upload 36 files

Browse files
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - 152334H/miqu-1-70b-sf
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
+ ---
9
+ # miqu-big-fix
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the passthrough merge method.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+ * [152334H/miqu-1-70b-sf](https://huggingface.co/152334H/miqu-1-70b-sf)
22
+
23
+ ### Configuration
24
+
25
+ The following YAML configuration was used to produce this model:
26
+
27
+ ```yaml
28
+ slices:
29
+ - sources:
30
+ - model: 152334H/miqu-1-70b-sf
31
+ layer_range: [0, 80]
32
+ - sources:
33
+ - model: 152334H/miqu-1-70b-sf
34
+ layer_range: [0, 80]
35
+ merge_method: passthrough
36
+ dtype: float16
37
+
38
+ ```
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "152334H/miqu-1-70b-sf",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 8192,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 28672,
14
+ "max_position_embeddings": 32764,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 64,
17
+ "num_hidden_layers": 160,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 0,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 1000000,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "float16",
26
+ "transformers_version": "4.37.2",
27
+ "use_cache": true,
28
+ "vocab_size": 32000
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - model: 152334H/miqu-1-70b-sf
4
+ layer_range: [0, 80]
5
+ - sources:
6
+ - model: 152334H/miqu-1-70b-sf
7
+ layer_range: [0, 80]
8
+ merge_method: passthrough
9
+ dtype: float16
model-00001-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3537c0411f97fc512a65f3229c840c29e83c3195e426952924d5684c7509436d
3
+ size 9986776088
model-00002-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:092b0e7b6d22f1bea454bedbf72870614a41e44e3e1d4b054dbb390ab7e939b8
3
+ size 9663912040
model-00003-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c645dd18d2bb1e41083ca91e7021f3b46ecc711883c0286847a2cdca1dbb89
3
+ size 9663878336
model-00004-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d66902945d9f339a7110675640e6324454cc3ccbb8a22f4d34ac560a336503f
3
+ size 9932281024
model-00005-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d727097276e23a9feec14746a5e8664139f0b3da9dd0d98a04c59051e9b2f3be
3
+ size 9932248760
model-00006-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b983a2a502be54a7d6b6120880654daca454763e986eea044697d0e5a69dc14
3
+ size 9798130624
model-00007-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ad8854e6fc125f2785574ae18ddcaae4bdd38ffc7e14502038696201c4e8f7
3
+ size 9999423400
model-00008-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1b505a334dd7526faa2a1079ad270a5d9a4c4b17234bf9d6252d0b7381236f
3
+ size 9663878376
model-00009-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fe0f7fe38b7800509c3baa8e28acefebc1581e4d08ced0242651aed1899a071
3
+ size 9932281064
model-00010-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:483d70c64be34b248b6403e22f7a306b7e7a839dce68590131f2d1dcecddbe2e
3
+ size 9932282504
model-00011-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6cbf51160352804c622e219675c7db47a12f53100440b151e70addd536d0b78
3
+ size 9798096936
model-00012-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d6a7110bfcb3e5c5866920e8a9287d21bad136a538a2f5913bf11922f35106
3
+ size 9999457104
model-00013-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1866e4d9f75ec3690cd9f99f41150b9512f0c44e37e9c26146636a30234b7e6
3
+ size 9798129240
model-00014-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18278fbf5780d437868c9e45f7a6d4e13cba4b99fac3ed604c7f4535fbe4fb0
3
+ size 9932248072
model-00015-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731e3c8e8cbd0d92b61f8588b0d4f920cf6dda46d64257f271fa1ae423dd64cf
3
+ size 9798030936
model-00016-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5042626ad2863dad905f0ed36f465aacd579fb0f9e645b910f3aa9360f20a5
3
+ size 9932381488
model-00017-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae14a1739261f6541faf9569dddd4b239855c8b553015bef0e5740d0b467a965
3
+ size 9999423408
model-00018-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ad3450b51dfb64c471fabbc92e4832500eeafa1ba27cdfe1fc191e48e01b0c
3
+ size 9663878384
model-00019-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56487d4d72cb058c43196c3d916660ba76ff7d125d33d2e7fc291206d0b2ff3a
3
+ size 9932248072
model-00020-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a364fcaacf2b1ac52f9bb04fb03bd97739ea4c3123b68dbe852f38330fc28901
3
+ size 9932315488
model-00021-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dddb428ed53e9397cccf19f2d1bd64bd87ee98c96e85735e067ee0d6459dcce
3
+ size 9798096936
model-00022-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf66be88d0117a50c3a3845c4b1a4b085158231fe7e9c79a5e635adcaeb47f9
3
+ size 9999489400
model-00023-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86106c0b7e9c00179820bcdf2beecc28e9a8c6940874e6ca3a3ead34b72237f8
3
+ size 9932314072
model-00024-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e008bed8e008d5fd4d38f86d52274ae50fb4b1d6098ef7a5b6278b669ab071d6
3
+ size 9798030936
model-00025-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280723ca1fac0ec1b0d05ed4ace04706e35c4b0a2d36ea6b48ba557ec53b875e
3
+ size 9798030936
model-00026-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24653ea6fc75cd137e53154c674b078233f51988c073316f2db24a3734f74fcc
3
+ size 9798162936
model-00027-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec71790b6f60f64270991b15ebf53231ea9e9bfa1fd9a6086443467310ec7ff
3
+ size 9999423408
model-00028-of-00028.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fafeca3f066a7632877e7dc5f9cb3d4769e15762cb102b6f6ff02dd5474abef
3
+ size 8443318432
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": false,
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<unk>",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }