kvaishnavi
commited on
Commit
•
24fd626
1
Parent(s):
3b2618a
Upload optimized CPU ONNX models
Browse files- README.md +3 -3
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +2 -3
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx +2 -2
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data +1 -1
- cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json +2 -3
- cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx +2 -2
- cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx.data +1 -1
README.md
CHANGED
@@ -169,9 +169,9 @@ The table below shows the average throughput of the first 256 tokens generated (
|
|
169 |
| torch | 2.2.0 |
|
170 |
| triton | 2.2.0 |
|
171 |
| onnxruntime-gpu | 1.18.0 |
|
172 |
-
| onnxruntime-genai | 0.2.
|
173 |
-
| onnxruntime-genai-cuda | 0.2.
|
174 |
-
| onnxruntime-genai-directml | 0.2.
|
175 |
| transformers | 4.39.0 |
|
176 |
| bitsandbytes | 0.42.0 |
|
177 |
|
|
|
169 |
| torch | 2.2.0 |
|
170 |
| triton | 2.2.0 |
|
171 |
| onnxruntime-gpu | 1.18.0 |
|
172 |
+
| onnxruntime-genai | 0.2.0 |
|
173 |
+
| onnxruntime-genai-cuda | 0.2.0 |
|
174 |
+
| onnxruntime-genai-directml | 0.2.0 |
|
175 |
| transformers | 4.39.0 |
|
176 |
| bitsandbytes | 0.42.0 |
|
177 |
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
16 |
-
"position_ids": "position_ids",
|
17 |
"past_key_names": "past_key_values.%d.key",
|
18 |
"past_value_names": "past_key_values.%d.value"
|
19 |
},
|
@@ -45,10 +44,10 @@
|
|
45 |
"no_repeat_ngram_size": 0,
|
46 |
"num_beams": 1,
|
47 |
"num_return_sequences": 1,
|
48 |
-
"past_present_share_buffer":
|
49 |
"repetition_penalty": 1.0,
|
50 |
"temperature": 1.0,
|
51 |
"top_k": 1,
|
52 |
"top_p": 1.0
|
53 |
}
|
54 |
-
}
|
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
|
|
16 |
"past_key_names": "past_key_values.%d.key",
|
17 |
"past_value_names": "past_key_values.%d.value"
|
18 |
},
|
|
|
44 |
"no_repeat_ngram_size": 0,
|
45 |
"num_beams": 1,
|
46 |
"num_return_sequences": 1,
|
47 |
+
"past_present_share_buffer": true,
|
48 |
"repetition_penalty": 1.0,
|
49 |
"temperature": 1.0,
|
50 |
"top_k": 1,
|
51 |
"top_p": 1.0
|
52 |
}
|
53 |
+
}
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385cd1b908a0d2f8634e86d30236f6dbb7ae660eb3943fd1ef5bdc3847326480
|
3 |
+
size 231335
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2722861056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5db30ce699aee1123cf9045742488db5928006fa618a42cb3c0840322a85ad0f
|
3 |
size 2722861056
|
cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
16 |
-
"position_ids": "position_ids",
|
17 |
"past_key_names": "past_key_values.%d.key",
|
18 |
"past_value_names": "past_key_values.%d.value"
|
19 |
},
|
@@ -45,10 +44,10 @@
|
|
45 |
"no_repeat_ngram_size": 0,
|
46 |
"num_beams": 1,
|
47 |
"num_return_sequences": 1,
|
48 |
-
"past_present_share_buffer":
|
49 |
"repetition_penalty": 1.0,
|
50 |
"temperature": 1.0,
|
51 |
"top_k": 1,
|
52 |
"top_p": 1.0
|
53 |
}
|
54 |
-
}
|
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
|
|
16 |
"past_key_names": "past_key_values.%d.key",
|
17 |
"past_value_names": "past_key_values.%d.value"
|
18 |
},
|
|
|
44 |
"no_repeat_ngram_size": 0,
|
45 |
"num_beams": 1,
|
46 |
"num_return_sequences": 1,
|
47 |
+
"past_present_share_buffer": true,
|
48 |
"repetition_penalty": 1.0,
|
49 |
"temperature": 1.0,
|
50 |
"top_k": 1,
|
51 |
"top_p": 1.0
|
52 |
}
|
53 |
+
}
|
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e1faf7ea6930f63caab12412f4a82c329eaddf6cce365e45c3cd00bb0547be8
|
3 |
+
size 222950
|
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx.data
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2722861056
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5db30ce699aee1123cf9045742488db5928006fa618a42cb3c0840322a85ad0f
|
3 |
size 2722861056
|