satreysa commited on
Commit
4e495fb
Β·
verified Β·
1 Parent(s): 7e14341

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model_jit.onnx.data filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - zh
4
+ - en
5
+ tags:
6
+ - glm
7
+ - chatglm
8
+ - thudm
9
+ - ryzenai-npu
10
+ base_model: THUDM/chatglm3-6b
11
+ ---
12
+
13
+ # chatglm3-6b
14
+ - ## Introduction
15
+ This model was created using Quark Quantization, followed by OGA Model Builder, and finalized with post-processing for NPU deployment.
16
+ - ## Quantization Strategy
17
+ - AWQ / Group 128 / Asymmetric / BF16 activations / UINT4 weights
18
+
19
+ - ## Quick Start
20
+ For quickstart, refer to [Ryzen AI doucmentation](https://ryzenai.docs.amd.com/en/latest/npu_oga.html)
21
+
22
+ #### Evaluation scores
23
+ The perplexity measurement is run on the wikitext-2-raw-v1 (raw data) dataset provided by Hugging Face. Perplexity score measured for prompt length 2k is 29.81679.
24
+
25
+
26
+
27
+ #### License
28
+ Modifications copyright(c) 2024 Advanced Micro Devices,Inc. All rights reserved.
29
+
30
+ Licensed under the Apache License, Version 2.0 (the "License");
31
+ you may not use this file except in compliance with the License.
32
+ You may obtain a copy of the License at
33
+
34
+ http://www.apache.org/licenses/LICENSE-2.0
35
+
36
+ Unless required by applicable law or agreed to in writing, software
37
+ distributed under the License is distributed on an "AS IS" BASIS,
38
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
39
+ See the License for the specific language governing permissions and
40
+ limitations under the License.
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
genai_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 16384,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "custom_ops_library": "onnx_custom_ops.dll",
9
+ "custom_allocator": "shared_d3d_xrt",
10
+ "external_data_file": "model_jit.pb.bin",
11
+ "config_entries": {
12
+ "hybrid_opt_max_seq_length": "4096"
13
+ },
14
+ "provider_options": []
15
+ },
16
+ "filename": "model_jit.onnx",
17
+ "head_size": 128,
18
+ "hidden_size": 4096,
19
+ "inputs": {
20
+ "input_ids": "input_ids",
21
+ "attention_mask": "attention_mask",
22
+ "position_ids": "position_ids",
23
+ "past_key_names": "past_key_values.%d.key",
24
+ "past_value_names": "past_key_values.%d.value"
25
+ },
26
+ "outputs": {
27
+ "logits": "logits",
28
+ "present_key_names": "present.%d.key",
29
+ "present_value_names": "present.%d.value"
30
+ },
31
+ "num_attention_heads": 32,
32
+ "num_hidden_layers": 32,
33
+ "num_key_value_heads": 32
34
+ },
35
+ "eos_token_id": 2,
36
+ "pad_token_id": 2,
37
+ "type": "llama",
38
+ "vocab_size": 32016
39
+ },
40
+ "search": {
41
+ "diversity_penalty": 0.0,
42
+ "do_sample": false,
43
+ "early_stopping": true,
44
+ "length_penalty": 1.0,
45
+ "max_length": 16384,
46
+ "min_length": 0,
47
+ "no_repeat_ngram_size": 0,
48
+ "num_beams": 1,
49
+ "num_return_sequences": 1,
50
+ "past_present_share_buffer": true,
51
+ "repetition_penalty": 1.0,
52
+ "temperature": 1.0,
53
+ "top_k": 50,
54
+ "top_p": 1.0
55
+ }
56
+ }
model_jit.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9425e03136e95f0c1d090540866c814d91d425091d5517c6d7de6dc7cfa9c07
3
+ size 3532849152
model_jit.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9602fcc141d8b1d470368d89965880af52a161e2f82a1c9e5052426a55cd23d3
3
+ size 348600
model_jit.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77a78d6536d9dd7b5edb7fba3095ad60c7aaad90f22e17e8f62c3047f020db5
3
+ size 3700667648
model_jit.pb.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdd505a671d464bdda119f55d2bffbdb0c96d418a5aeb5a0a41a129bdf1256f
3
+ size 7826
rai_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "max_prompt_length": {
3
+ "1.3.1": 2048,
4
+ "1.4.0": 2048,
5
+ "1.4.1": 2048,
6
+ "1.5.0": 2048,
7
+ "1.5.1" : 2048,
8
+ "1.6.0": 4096
9
+ }
10
+
11
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>",
7
+ "▁<PRE>",
8
+ "▁<MID>",
9
+ "▁<SUF>",
10
+ "▁<EOT>"
11
+ ],
12
+ "bos_token": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": {
27
+ "content": "</s>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ },
33
+ "unk_token": {
34
+ "content": "<unk>",
35
+ "lstrip": false,
36
+ "normalized": false,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
tokenizer_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32007": {
30
+ "content": "▁<PRE>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32008": {
38
+ "content": "▁<SUF>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "32009": {
46
+ "content": "▁<MID>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32010": {
54
+ "content": "▁<EOT>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ }
61
+ },
62
+ "additional_special_tokens": [
63
+ "▁<PRE>",
64
+ "▁<MID>",
65
+ "▁<SUF>",
66
+ "▁<EOT>",
67
+ "▁<PRE>",
68
+ "▁<MID>",
69
+ "▁<SUF>",
70
+ "▁<EOT>"
71
+ ],
72
+ "bos_token": "<s>",
73
+ "clean_up_tokenization_spaces": false,
74
+ "eos_token": "</s>",
75
+ "eot_token": "▁<EOT>",
76
+ "extra_special_tokens": {},
77
+ "fill_token": "<FILL_ME>",
78
+ "legacy": null,
79
+ "middle_token": "▁<MID>",
80
+ "model_max_length": 1000000000000000019884624838656,
81
+ "pad_token": "</s>",
82
+ "padding_side": "left",
83
+ "prefix_token": "▁<PRE>",
84
+ "sp_model_kwargs": {},
85
+ "suffix_token": "▁<SUF>",
86
+ "tokenizer_class": "CodeLlamaTokenizer",
87
+ "unk_token": "<unk>",
88
+ "use_default_system_prompt": false
89
+ }