kvaishnavi commited on
Commit
882b7ff
1 Parent(s): 791e509

Upload optimized CPU ONNX models

Browse files
README.md CHANGED
@@ -143,9 +143,9 @@ Note: PyTorch compile and Llama.cpp currently do not support the Phi-3 Mini-128K
143
  | torch | 2.2.0 |
144
  | triton | 2.2.0 |
145
  | onnxruntime-gpu | 1.18.0 |
146
- | onnxruntime-genai | 0.2.0rc4 |
147
- | onnxruntime-genai-cuda | 0.2.0rc4 |
148
- | onnxruntime-genai-directml | 0.2.0rc4 |
149
  | transformers | 4.39.0 |
150
  | bitsandbytes | 0.42.0 |
151
 
 
143
  | torch | 2.2.0 |
144
  | triton | 2.2.0 |
145
  | onnxruntime-gpu | 1.18.0 |
146
+ | onnxruntime-genai | 0.2. |
147
+ | onnxruntime-genai-cuda | 0.2.0 |
148
+ | onnxruntime-genai-directml | 0.2.0 |
149
  | transformers | 4.39.0 |
150
  | bitsandbytes | 0.42.0 |
151
 
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json CHANGED
@@ -13,7 +13,6 @@
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
16
- "position_ids": "position_ids",
17
  "past_key_names": "past_key_values.%d.key",
18
  "past_value_names": "past_key_values.%d.value"
19
  },
@@ -45,10 +44,10 @@
45
  "no_repeat_ngram_size": 0,
46
  "num_beams": 1,
47
  "num_return_sequences": 1,
48
- "past_present_share_buffer": false,
49
  "repetition_penalty": 1.0,
50
  "temperature": 1.0,
51
  "top_k": 1,
52
  "top_p": 1.0
53
  }
54
- }
 
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
 
16
  "past_key_names": "past_key_values.%d.key",
17
  "past_value_names": "past_key_values.%d.value"
18
  },
 
44
  "no_repeat_ngram_size": 0,
45
  "num_beams": 1,
46
  "num_return_sequences": 1,
47
+ "past_present_share_buffer": true,
48
  "repetition_penalty": 1.0,
49
  "temperature": 1.0,
50
  "top_k": 1,
51
  "top_p": 1.0
52
  }
53
+ }
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab01ff406f32f83d3954b53976ccc70d070b4186cbd7c46da7b4f6483c18ab9a
3
- size 52231083
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22fca92fd03c5efa368a06f5bb668015d3a36677c01b241dda31af758f3d888
3
+ size 52137679
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-128k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96f994210878f40a67cb1690e8ff3a94653d84f18886ca7c8ba9c6fc3eec1cd9
3
  size 2721288192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
3
  size 2721288192
cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json CHANGED
@@ -13,7 +13,6 @@
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
16
- "position_ids": "position_ids",
17
  "past_key_names": "past_key_values.%d.key",
18
  "past_value_names": "past_key_values.%d.value"
19
  },
@@ -45,10 +44,10 @@
45
  "no_repeat_ngram_size": 0,
46
  "num_beams": 1,
47
  "num_return_sequences": 1,
48
- "past_present_share_buffer": false,
49
  "repetition_penalty": 1.0,
50
  "temperature": 1.0,
51
  "top_k": 1,
52
  "top_p": 1.0
53
  }
54
- }
 
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
 
16
  "past_key_names": "past_key_values.%d.key",
17
  "past_value_names": "past_key_values.%d.value"
18
  },
 
44
  "no_repeat_ngram_size": 0,
45
  "num_beams": 1,
46
  "num_return_sequences": 1,
47
+ "past_present_share_buffer": true,
48
  "repetition_penalty": 1.0,
49
  "temperature": 1.0,
50
  "top_k": 1,
51
  "top_p": 1.0
52
  }
53
+ }
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbcb209993e7508380321a5b57f33c24c5c20ae3f9d22f6cc6b51c5f4bdab79a
3
- size 52219716
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0cff7f236dc76ce10815f122541cb503ef38801b9cf6c6cc48e1c3dffe09ca
3
+ size 52129320
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-128k-instruct-cpu-int4-rtn-block-32.onnx.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96f994210878f40a67cb1690e8ff3a94653d84f18886ca7c8ba9c6fc3eec1cd9
3
  size 2721288192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a595a4eac2075818630d881e8cb2b8e97cb85ee6a0ff63d68c4b9a9a05a1c9
3
  size 2721288192