zackhlk commited on
Commit
2847905
·
1 Parent(s): f53a00c

Upload 7 files

Browse files
README.md CHANGED
@@ -1,3 +1,39 @@
 
1
  ---
 
 
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  ---
3
+ # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
4
+ # Doc / guide: https://huggingface.co/docs/hub/model-cards
5
  license: apache-2.0
6
+ language:
7
+ - zh
8
+ widget:
9
+ - text: >-
10
+ A chat between a curious user and an artificial intelligence assistant.
11
+ The assistant gives helpful, detailed, and polite answers to the user's
12
+ questions. USER: 你好,請問你可以幫我寫一封推薦信嗎? ASSISTANT:
13
+ library_name: transformers
14
+ pipeline_tag: text-generation
15
+ extra_gated_heading: Acknowledge license to accept the repository.
16
+ extra_gated_prompt: Please contact the author for access.
17
+ extra_gated_button_content: Acknowledge license 同意以上內容
18
+ extra_gated_fields:
19
+ Name: text
20
+ Mail: text
21
+ Organization: text
22
+ Country: text
23
+ Any utilization of the Taiwan LLM repository mandates the explicit acknowledgment and attribution to the original author: checkbox
24
+ 使用Taiwan LLM必須明確地承認和歸功於優必達株式會社 Ubitus 以及原始作者: checkbox
25
  ---
26
+
27
+ # Taiwan LLM based on LLaMa2-7b
28
+
29
+ continue pretraining on 20 billion tokens in traditional mandarin and instruction fine-tuning on millions of conversations.
30
+
31
+ This version does NOT include commoncrawl.
32
+
33
+ # 🌟 Checkout New [Taiwan-LLM Demo Chat-UI](http://www.twllm.com) 🌟
34
+
35
+ # Collaboration with Ubitus K.K. 💪💪💪
36
+
37
+ 本項目與 Ubitus K.K. 合作進行。Ubitus 為本項目提供寶貴的技術支持和計算資源。
38
+
39
+ Taiwan LLM v2 is conducted in collaboration with [Ubitus K.K.](http://ubitus.net). Ubitus provides valuable technical support and compute resources for the project.
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b95b2b3becc6e0e9464aceff76f1ae535c07b4c721334998d08a006f85ebec8
3
+ size 9976570520
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554b83035014c8c4da6909019fb375ae10d2e73cd06c16c0ff001b437fa6cd54
3
+ size 3500294544
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() %}{% else %}{% set loop_messages = messages %}{% set system_message = '你是人工智慧助理,以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。' %}{% endif %}{{system_message + eos_token}}{% for message in loop_messages %}{% if message['role'] == 'user' %}USER: {{ message['content'].strip() + eos_token }}{% elif message['role'] == 'system' %}{{message['content'].strip() + eos_token}}{% elif message['role'] == 'assistant' %}ASSISTANT: {{ message['content'].strip() + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{'ASSISTANT:'}}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 4096,
36
+ "pad_token": "<unk>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }