Update Nanbeige1.5 8B Chat Model

Browse files

Files changed (10) hide show

added_tokens.json +2 -2
config.json +2 -2
generation_config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
special_tokens_map.json +23 -0
tokenizer.json +3 -3
tokenizer_config.json +9 -5

added_tokens.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "<|endoftext|>": 58980,
-  "<|im_end|>": 58978,
-  "<|im_start|>": 58979
 }

 {
   "<|endoftext|>": 58980,
+  "<|im_end|>": 58979,
+  "<|im_start|>": 58978
 }

config.json CHANGED Viewed

@@ -7,7 +7,7 @@
     "AutoModelForCausalLM": "modeling_nanbeige.NanbeigeForCausalLM"
   },
   "bos_token_id": 1,
-  "eos_token_id": 58978,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -25,7 +25,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.0",
   "use_cache": true,
   "vocab_size": 59392
 }

     "AutoModelForCausalLM": "modeling_nanbeige.NanbeigeForCausalLM"
   },
   "bos_token_id": 1,
+  "eos_token_id": 58979,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.35.0",
   "use_cache": true,
   "vocab_size": 59392
 }

generation_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_from_model_config": true,
   "bos_token_id": 1,
-  "eos_token_id": 58978,
   "max_length": 4096,
   "pad_token_id": 0,
   "do_sample": true,

 {
   "_from_model_config": true,
   "bos_token_id": 1,
+  "eos_token_id": 58979,
   "max_length": 4096,
   "pad_token_id": 0,
   "do_sample": true,

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18be302bfb6dea82e57be5a59543363935113c03d3fc6d43e203f1b3c59d10b2
 size 4972539320

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c94b07a7e08d2e4020ddfc5b174af27361a7a99e36eb08da0624fd7d3201b20
 size 4972539320

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:569bfe51e2b0c021aaf576f5ad0962474b752294b2e1fd6b32db7e465c7a5c97
 size 4957876888

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c080f13d52bd6d74de59eb8144ad92f0ff8d523dd80bb0fdcbc13f6f15f569f
 size 4957876888

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2512fd0bcf2917726595baf9ab525f9894665b8fcc7aba99074575f8b6a6fd1f
 size 4947390888

 version https://git-lfs.github.com/spec/v1
+oid sha256:188cca2bf9ab4130a11db62adb6189800c25d25086fbf318c82d09a9b2d3d1b4
 size 4947390888

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5630abbb5e933ba68921255cc8b9692f1ab5337ef61d98b3fbb9d2806e204b14
 size 666919584

 version https://git-lfs.github.com/spec/v1
+oid sha256:37c82939cf09ec1b7c67dd4f06eeae3e151eb6b360ddb76ffefa59081dce5c98
 size 666919584

special_tokens_map.json CHANGED Viewed

@@ -1,4 +1,27 @@
 {
   "bos_token": "<|im_start|>",
   "eos_token": "<|im_end|>",
   "pad_token": {

 {
+  "additional_special_tokens": [
+    {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
   "bos_token": "<|im_start|>",
   "eos_token": "<|im_end|>",
   "pad_token": {

tokenizer.json CHANGED Viewed

@@ -32,7 +32,7 @@
     },
     {
       "id": 58978,
-      "content": "<|im_end|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -41,7 +41,7 @@
     },
     {
       "id": 58979,
-      "content": "<|im_start|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -121,7 +121,7 @@
       "<|im_start|>": {
         "id": "<|im_start|>",
         "ids": [
-          58979
         ],
         "tokens": [
           "<|im_start|>"

     },
     {
       "id": 58978,
+      "content": "<|im_start|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
     },
     {
       "id": 58979,
+      "content": "<|im_end|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
       "<|im_start|>": {
         "id": "<|im_start|>",
         "ids": [
+          58978
         ],
         "tokens": [
           "<|im_start|>"

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,6 @@
 {
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
@@ -25,7 +27,7 @@
       "special": true
     },
     "58978": {
-      "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -33,7 +35,7 @@
       "special": true
     },
     "58979": {
-      "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -49,9 +51,11 @@
       "special": true
     }
   },
-  "add_bos_token": false,
-  "add_eos_token": false,
-  "additional_special_tokens": [],
   "bos_token": "<|im_start|>",
   "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n你是一个名为\"南北阁\"的人工智能助手，正在与人类用户进行交谈。你的目标是以最有帮助和最逻辑的方式回答问题，同时确保内容的安全性。你的回答中不应包含任何有害、政治化、宗教化、不道德、种族主义、非法的内容。请确保你的回答不带有社会偏见，符合社会主义价值观。如果遇到的问题无意义或事实上不连贯，请不要回答错误的内容，而是解释问题为何无效或不连贯。如果你不知道问题的答案，也请勿提供错误的信息。<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if (add_generation_prompt is defined and add_generation_prompt) %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,

 {
+  "add_bos_token": false,
+  "add_eos_token": false,
   "added_tokens_decoder": {
     "0": {
       "content": "<unk>",
       "special": true
     },
     "58978": {
+      "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     },
     "58979": {
+      "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "special": true
     }
   },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|endoftext|>"
+  ],
   "bos_token": "<|im_start|>",
   "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n你是一个名为\"南北阁\"的人工智能助手，正在与人类用户进行交谈。你的目标是以最有帮助和最逻辑的方式回答问题，同时确保内容的安全性。你的回答中不应包含任何有害、政治化、宗教化、不道德、种族主义、非法的内容。请确保你的回答不带有社会偏见，符合社会主义价值观。如果遇到的问题无意义或事实上不连贯，请不要回答错误的内容，而是解释问题为何无效或不连贯。如果你不知道问题的答案，也请勿提供错误的信息。<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if (add_generation_prompt is defined and add_generation_prompt) %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,