JeongwonChoi commited on
Commit
c6e33d9
1 Parent(s): 262cf91

Initial commit

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ tags:
4
  license: cc-by-nc-sa-4.0
5
  language:
6
  - ko
7
- base_model: megastudy/M-SOLAR-10.7B-v1.3
8
  pipeline_tag: text-generation
9
  ---
10
 
@@ -16,7 +16,7 @@ pipeline_tag: text-generation
16
 
17
  ### **Base Model**
18
 
19
- [megastudy/M-SOLAR-10.7B-v1.3](https://huggingface.co/megastudy/M-SOLAR-10.7B-v1.3)
20
 
21
  ### **Trained On**
22
 
@@ -48,13 +48,13 @@ text = """\
48
 
49
  ### **[Ko LM Eval Harness](https://github.com/Beomi/ko-lm-evaluation-harness)**
50
 
51
- | Task | 0-shot | 5-shot | 10-shot | 50-shot |
52
- | :--------------- | ----------: | -----------: | -----------: | -----------: |
53
- | kobest_boolq | 0.491356 | 0.632476 | 0.648679 | 0.640638 |
54
- | kobest_copa | 0.555845 | 0.589343 | 0.60019 | 0.580294 |
55
- | kobest_hellaswag | 0.36013 | 0.353449 | 0.364599 | 0.346524 |
56
- | kobest_sentineg | 0.448148 | 0.768864 | 0.753904 | 0.646915 |
57
- | **Average** | **0.46387** | **0.586033** | **0.591843** | **0.553593** |
58
 
59
  ### **[Ko-LLM-Leaderboard](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard)**
60
 
 
4
  license: cc-by-nc-sa-4.0
5
  language:
6
  - ko
7
+ base_model: LDCC/LDCC-SOLAR-10.7B
8
  pipeline_tag: text-generation
9
  ---
10
 
 
16
 
17
  ### **Base Model**
18
 
19
+ [LDCC/LDCC-SOLAR-10.7B](https://huggingface.co/LDCC/LDCC-SOLAR-10.7B)
20
 
21
  ### **Trained On**
22
 
 
48
 
49
  ### **[Ko LM Eval Harness](https://github.com/Beomi/ko-lm-evaluation-harness)**
50
 
51
+ | Task | 0-shot | 5-shot | 10-shot | 50-shot |
52
+ | :--------------- | -----------: | -----------: | ----------: | -----------: |
53
+ | kobest_boolq | 0.920118 | 0.92442 | 0.929443 | 0.927317 |
54
+ | kobest_copa | 0.727263 | 0.778936 | 0.804812 | 0.815761 |
55
+ | kobest_hellaswag | 0.433039 | 0.465922 | 0.459741 | 0.471022 |
56
+ | kobest_sentineg | 0.764909 | 0.93946 | 0.937002 | 0.931962 |
57
+ | **Average** | **0.711332** | **0.777185** | **0.78275** | **0.786516** |
58
 
59
  ### **[Ko-LLM-Leaderboard](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard)**
60
 
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
- "eos_token_id": 2,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
@@ -24,6 +24,6 @@
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.36.2",
27
- "use_cache": false,
28
- "vocab_size": 32000
29
  }
 
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 1,
9
+ "eos_token_id": 32000,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
 
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.36.2",
27
+ "use_cache": true,
28
+ "vocab_size": 48000
29
  }
generation_config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
- "eos_token_id": 2,
5
  "pad_token_id": 2,
6
- "transformers_version": "4.36.2",
7
- "use_cache": false
8
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "eos_token_id": 32000,
5
  "pad_token_id": 2,
6
+ "transformers_version": "4.36.2"
 
7
  }
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b28cf54ceb61e81eda534dc50da8a7db3210aa7a82ef73cbc8e5ed1de97e8e23
3
- size 4943162240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67551d2d7ab52883d6f177e5045f7881b3e9dd68a679a8c76088b88dc1a75513
3
+ size 4956793616
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4559cd078d16e996275e4efff6f320365f95dc23584cfe894686dd1043b6ee08
3
- size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20bbd1f375f5631f02908d3771e3ec6c6e790ec943a48fe8054b336520c4bc3b
3
+ size 4915916080
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a14dce327b2a34240acc75b3a5904c464d7c7074e66adf3693eb83fe165337b
3
- size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e402a67e7f36b8f724001b8f5948f4b37ef0f9819434fff34f2c81c4f0209386
3
+ size 4999819232
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba2df5ea05db5f5b57310167deb4db20ec8a26c14336477c3f27e09c3a490dbc
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6ab237b1ca1d4edbd7129db924f01dee41b8d39b804fffab15835c73a570302
3
  size 4915916080
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:011a0c491f2278b4ed59eed185f0bdd56c166466de8d0cd43879b20d77794860
3
- size 1688284744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dca4a648d12f9928d38f24dad3e68fce1901370254059922991d60dbf88e8e17
3
+ size 1936797384
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 21463048192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
@@ -26,7 +26,7 @@
26
  "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
28
  "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
29
- "model.layers.10.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
@@ -131,11 +131,11 @@
131
  "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
134
- "model.layers.21.input_layernorm.weight": "model-00002-of-00005.safetensors",
135
- "model.layers.21.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
137
  "model.layers.21.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
138
- "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
@@ -145,10 +145,10 @@
145
  "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
146
  "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
147
  "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
148
- "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
149
- "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
150
- "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
151
- "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
@@ -250,7 +250,7 @@
250
  "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
251
  "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
252
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
253
- "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
254
  "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
255
  "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
256
  "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
@@ -359,7 +359,7 @@
359
  "model.layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors",
360
  "model.layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
361
  "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
362
- "model.layers.44.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
363
  "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
364
  "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
365
  "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 21725192192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00005-of-00005.safetensors",
 
26
  "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
28
  "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
 
131
  "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
137
  "model.layers.21.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
 
145
  "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
146
  "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
147
  "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
 
250
  "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
251
  "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
252
  "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
254
  "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
255
  "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
256
  "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
 
359
  "model.layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors",
360
  "model.layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
361
  "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
362
+ "model.layers.44.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
363
  "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
364
  "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
365
  "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
special_tokens_map.json CHANGED
@@ -7,7 +7,7 @@
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "</s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
 
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|im_end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff