Upload optimized CPU ONNX models

Files changed (7) hide show

README.md CHANGED Viewed

@@ -143,9 +143,9 @@ Note: PyTorch compile and Llama.cpp currently do not support the Phi-3 Mini-128K
 | torch                      | 2.2.0    |
 | triton                     | 2.2.0    |
 | onnxruntime-gpu            | 1.18.0   |
-| onnxruntime-genai          | 0.2.0rc4 |
-| onnxruntime-genai-cuda     | 0.2.0rc4 |
-| onnxruntime-genai-directml | 0.2.0rc4 |
 | transformers               | 4.39.0   |
 | bitsandbytes               | 0.42.0   |

 | torch                      | 2.2.0    |
 | triton                     | 2.2.0    |
 | onnxruntime-gpu            | 1.18.0   |
+| onnxruntime-genai          | 0.2.     |
+| onnxruntime-genai-cuda     | 0.2.0    |
+| onnxruntime-genai-directml | 0.2.0    |
 | transformers               | 4.39.0   |
 | bitsandbytes               | 0.42.0   |

cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json CHANGED Viewed

@@ -13,7 +13,6 @@
             "inputs": {
                 "input_ids": "input_ids",
                 "attention_mask": "attention_mask",
-                "position_ids": "position_ids",
                 "past_key_names": "past_key_values.%d.key",
                 "past_value_names": "past_key_values.%d.value"
             },
@@ -45,10 +44,10 @@
         "no_repeat_ngram_size": 0,
         "num_beams": 1,
         "num_return_sequences": 1,
-        "past_present_share_buffer": false,
         "repetition_penalty": 1.0,
         "temperature": 1.0,
         "top_k": 1,
         "top_p": 1.0
     }
-}

             "inputs": {
                 "input_ids": "input_ids",
                 "attention_mask": "attention_mask",
                 "past_key_names": "past_key_values.%d.key",
                 "past_value_names": "past_key_values.%d.value"
             },
         "no_repeat_ngram_size": 0,
         "num_beams": 1,
         "num_return_sequences": 1,
+        "past_present_share_buffer": true,
         "repetition_penalty": 1.0,
         "temperature": 1.0,
         "top_k": 1,
         "top_p": 1.0
     }
+}

cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab01ff406f32f83d3954b53976ccc70d070b4186cbd7c46da7b4f6483c18ab9a
-size 52231083

 version https://git-lfs.github.com/spec/v1
+oid sha256:f22fca92fd03c5efa368a06f5bb668015d3a36677c01b241dda31af758f3d888
+size 52137679

cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96f994210878f40a67cb1690e8ff3a94653d84f18886ca7c8ba9c6fc3eec1cd9
 size 2721288192

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
 size 2721288192

cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json CHANGED Viewed

@@ -13,7 +13,6 @@
             "inputs": {
                 "input_ids": "input_ids",
                 "attention_mask": "attention_mask",
-                "position_ids": "position_ids",
                 "past_key_names": "past_key_values.%d.key",
                 "past_value_names": "past_key_values.%d.value"
             },
@@ -45,10 +44,10 @@
         "no_repeat_ngram_size": 0,
         "num_beams": 1,
         "num_return_sequences": 1,
-        "past_present_share_buffer": false,
         "repetition_penalty": 1.0,
         "temperature": 1.0,
         "top_k": 1,
         "top_p": 1.0
     }
-}

             "inputs": {
                 "input_ids": "input_ids",
                 "attention_mask": "attention_mask",
                 "past_key_names": "past_key_values.%d.key",
                 "past_value_names": "past_key_values.%d.value"
             },
         "no_repeat_ngram_size": 0,
         "num_beams": 1,
         "num_return_sequences": 1,
+        "past_present_share_buffer": true,
         "repetition_penalty": 1.0,
         "temperature": 1.0,
         "top_k": 1,
         "top_p": 1.0
     }
+}

cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbcb209993e7508380321a5b57f33c24c5c20ae3f9d22f6cc6b51c5f4bdab79a
-size 52219716

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e0cff7f236dc76ce10815f122541cb503ef38801b9cf6c6cc48e1c3dffe09ca
+size 52129320

cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx.data CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96f994210878f40a67cb1690e8ff3a94653d84f18886ca7c8ba9c6fc3eec1cd9
 size 2721288192

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
 size 2721288192