Initial commit

Browse files

Files changed (12) hide show

README.md +9 -9
config.json +3 -3
generation_config.json +2 -3
model-00001-of-00005.safetensors +2 -2
model-00002-of-00005.safetensors +2 -2
model-00003-of-00005.safetensors +2 -2
model-00004-of-00005.safetensors +1 -1
model-00005-of-00005.safetensors +2 -2
model.safetensors.index.json +11 -11
special_tokens_map.json +1 -1
tokenizer.json +0 -0
tokenizer_config.json +0 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ tags:
 license: cc-by-nc-sa-4.0
 language:
     - ko
-base_model: megastudy/M-SOLAR-10.7B-v1.3
 pipeline_tag: text-generation
 ---
@@ -16,7 +16,7 @@ pipeline_tag: text-generation
 ### **Base Model**
-[megastudy/M-SOLAR-10.7B-v1.3](https://huggingface.co/megastudy/M-SOLAR-10.7B-v1.3)
 ### **Trained On**
@@ -48,13 +48,13 @@ text = """\
 ### **[Ko LM Eval Harness](https://github.com/Beomi/ko-lm-evaluation-harness)**
-| Task             |      0-shot |       5-shot |      10-shot |      50-shot |
-| :--------------- | ----------: | -----------: | -----------: | -----------: |
-| kobest_boolq     |    0.491356 |     0.632476 |     0.648679 |     0.640638 |
-| kobest_copa      |    0.555845 |     0.589343 |      0.60019 |     0.580294 |
-| kobest_hellaswag |     0.36013 |     0.353449 |     0.364599 |     0.346524 |
-| kobest_sentineg  |    0.448148 |     0.768864 |     0.753904 |     0.646915 |
-| **Average**      | **0.46387** | **0.586033** | **0.591843** | **0.553593** |
 ### **[Ko-LLM-Leaderboard](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard)**

 license: cc-by-nc-sa-4.0
 language:
     - ko
+base_model: LDCC/LDCC-SOLAR-10.7B
 pipeline_tag: text-generation
 ---
 ### **Base Model**
+[LDCC/LDCC-SOLAR-10.7B](https://huggingface.co/LDCC/LDCC-SOLAR-10.7B)
 ### **Trained On**
 ### **[Ko LM Eval Harness](https://github.com/Beomi/ko-lm-evaluation-harness)**
+| Task             |       0-shot |       5-shot |     10-shot |      50-shot |
+| :--------------- | -----------: | -----------: | ----------: | -----------: |
+| kobest_boolq     |     0.920118 |      0.92442 |    0.929443 |     0.927317 |
+| kobest_copa      |     0.727263 |     0.778936 |    0.804812 |     0.815761 |
+| kobest_hellaswag |     0.433039 |     0.465922 |    0.459741 |     0.471022 |
+| kobest_sentineg  |     0.764909 |      0.93946 |    0.937002 |     0.931962 |
+| **Average**      | **0.711332** | **0.777185** | **0.78275** | **0.786516** |
 ### **[Ko-LLM-Leaderboard](https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard)**

config.json CHANGED Viewed

@@ -6,7 +6,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -24,6 +24,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
   "transformers_version": "4.36.2",
-  "use_cache": false,
-  "vocab_size": 32000
 }

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "eos_token_id": 32000,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
   "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 48000
 }

generation_config.json CHANGED Viewed

@@ -1,8 +1,7 @@
 {
   "_from_model_config": true,
   "bos_token_id": 1,
-  "eos_token_id": 2,
   "pad_token_id": 2,
-  "transformers_version": "4.36.2",
-  "use_cache": false
 }

 {
   "_from_model_config": true,
   "bos_token_id": 1,
+  "eos_token_id": 32000,
   "pad_token_id": 2,
+  "transformers_version": "4.36.2"
 }

model-00001-of-00005.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b28cf54ceb61e81eda534dc50da8a7db3210aa7a82ef73cbc8e5ed1de97e8e23
-size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:67551d2d7ab52883d6f177e5045f7881b3e9dd68a679a8c76088b88dc1a75513
+size 4956793616

model-00002-of-00005.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4559cd078d16e996275e4efff6f320365f95dc23584cfe894686dd1043b6ee08
-size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:20bbd1f375f5631f02908d3771e3ec6c6e790ec943a48fe8054b336520c4bc3b
+size 4915916080

model-00003-of-00005.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a14dce327b2a34240acc75b3a5904c464d7c7074e66adf3693eb83fe165337b
-size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:e402a67e7f36b8f724001b8f5948f4b37ef0f9819434fff34f2c81c4f0209386
+size 4999819232

model-00004-of-00005.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba2df5ea05db5f5b57310167deb4db20ec8a26c14336477c3f27e09c3a490dbc
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6ab237b1ca1d4edbd7129db924f01dee41b8d39b804fffab15835c73a570302
 size 4915916080

model-00005-of-00005.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:011a0c491f2278b4ed59eed185f0bdd56c166466de8d0cd43879b20d77794860
-size 1688284744

 version https://git-lfs.github.com/spec/v1
+oid sha256:dca4a648d12f9928d38f24dad3e68fce1901370254059922991d60dbf88e8e17
+size 1936797384

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 21463048192
   },
   "weight_map": {
     "lm_head.weight": "model-00005-of-00005.safetensors",
@@ -26,7 +26,7 @@
     "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
-    "model.layers.10.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
     "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
     "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
@@ -131,11 +131,11 @@
     "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
-    "model.layers.21.input_layernorm.weight": "model-00002-of-00005.safetensors",
-    "model.layers.21.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
-    "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
@@ -145,10 +145,10 @@
     "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
-    "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
-    "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
-    "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
-    "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
     "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
@@ -250,7 +250,7 @@
     "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
-    "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
@@ -359,7 +359,7 @@
     "model.layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
-    "model.layers.44.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",

 {
   "metadata": {
+    "total_size": 21725192192
   },
   "weight_map": {
     "lm_head.weight": "model-00005-of-00005.safetensors",
     "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
     "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
     "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
     "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
     "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
     "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
     "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.33.input_layernorm.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+    "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
     "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
     "model.layers.44.input_layernorm.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+    "model.layers.44.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
     "model.layers.44.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
     "model.layers.44.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",

special_tokens_map.json CHANGED Viewed

@@ -7,7 +7,7 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "</s>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "single_word": false
   },
   "eos_token": {
+    "content": "<|im_end|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff