kvaishnavi
commited on
Commit
•
882b7ff
1
Parent(s):
791e509
Upload optimized CPU ONNX models
Browse files- README.md +3 -3
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +2 -3
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx +2 -2
- cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data +1 -1
- cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json +2 -3
- cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx +2 -2
- cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx.data +1 -1
README.md
CHANGED
@@ -143,9 +143,9 @@ Note: PyTorch compile and Llama.cpp currently do not support the Phi-3 Mini-128K
|
|
143 |
| torch | 2.2.0 |
|
144 |
| triton | 2.2.0 |
|
145 |
| onnxruntime-gpu | 1.18.0 |
|
146 |
-
| onnxruntime-genai | 0.2.
|
147 |
-
| onnxruntime-genai-cuda | 0.2.
|
148 |
-
| onnxruntime-genai-directml | 0.2.
|
149 |
| transformers | 4.39.0 |
|
150 |
| bitsandbytes | 0.42.0 |
|
151 |
|
|
|
143 |
| torch | 2.2.0 |
|
144 |
| triton | 2.2.0 |
|
145 |
| onnxruntime-gpu | 1.18.0 |
|
146 |
+
| onnxruntime-genai | 0.2. |
|
147 |
+
| onnxruntime-genai-cuda | 0.2.0 |
|
148 |
+
| onnxruntime-genai-directml | 0.2.0 |
|
149 |
| transformers | 4.39.0 |
|
150 |
| bitsandbytes | 0.42.0 |
|
151 |
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
16 |
-
"position_ids": "position_ids",
|
17 |
"past_key_names": "past_key_values.%d.key",
|
18 |
"past_value_names": "past_key_values.%d.value"
|
19 |
},
|
@@ -45,10 +44,10 @@
|
|
45 |
"no_repeat_ngram_size": 0,
|
46 |
"num_beams": 1,
|
47 |
"num_return_sequences": 1,
|
48 |
-
"past_present_share_buffer":
|
49 |
"repetition_penalty": 1.0,
|
50 |
"temperature": 1.0,
|
51 |
"top_k": 1,
|
52 |
"top_p": 1.0
|
53 |
}
|
54 |
-
}
|
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
|
|
16 |
"past_key_names": "past_key_values.%d.key",
|
17 |
"past_value_names": "past_key_values.%d.value"
|
18 |
},
|
|
|
44 |
"no_repeat_ngram_size": 0,
|
45 |
"num_beams": 1,
|
46 |
"num_return_sequences": 1,
|
47 |
+
"past_present_share_buffer": true,
|
48 |
"repetition_penalty": 1.0,
|
49 |
"temperature": 1.0,
|
50 |
"top_k": 1,
|
51 |
"top_p": 1.0
|
52 |
}
|
53 |
+
}
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f22fca92fd03c5efa368a06f5bb668015d3a36677c01b241dda31af758f3d888
|
3 |
+
size 52137679
|
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2721288192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
|
3 |
size 2721288192
|
cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
16 |
-
"position_ids": "position_ids",
|
17 |
"past_key_names": "past_key_values.%d.key",
|
18 |
"past_value_names": "past_key_values.%d.value"
|
19 |
},
|
@@ -45,10 +44,10 @@
|
|
45 |
"no_repeat_ngram_size": 0,
|
46 |
"num_beams": 1,
|
47 |
"num_return_sequences": 1,
|
48 |
-
"past_present_share_buffer":
|
49 |
"repetition_penalty": 1.0,
|
50 |
"temperature": 1.0,
|
51 |
"top_k": 1,
|
52 |
"top_p": 1.0
|
53 |
}
|
54 |
-
}
|
|
|
13 |
"inputs": {
|
14 |
"input_ids": "input_ids",
|
15 |
"attention_mask": "attention_mask",
|
|
|
16 |
"past_key_names": "past_key_values.%d.key",
|
17 |
"past_value_names": "past_key_values.%d.value"
|
18 |
},
|
|
|
44 |
"no_repeat_ngram_size": 0,
|
45 |
"num_beams": 1,
|
46 |
"num_return_sequences": 1,
|
47 |
+
"past_present_share_buffer": true,
|
48 |
"repetition_penalty": 1.0,
|
49 |
"temperature": 1.0,
|
50 |
"top_k": 1,
|
51 |
"top_p": 1.0
|
52 |
}
|
53 |
+
}
|
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e0cff7f236dc76ce10815f122541cb503ef38801b9cf6c6cc48e1c3dffe09ca
|
3 |
+
size 52129320
|
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx.data
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2721288192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
|
3 |
size 2721288192
|