kvaishnavi commited on
Commit
24fd626
1 Parent(s): 3b2618a

Upload optimized CPU ONNX models

Browse files
README.md CHANGED
@@ -169,9 +169,9 @@ The table below shows the average throughput of the first 256 tokens generated (
169
  | torch | 2.2.0 |
170
  | triton | 2.2.0 |
171
  | onnxruntime-gpu | 1.18.0 |
172
- | onnxruntime-genai | 0.2.0rc4 |
173
- | onnxruntime-genai-cuda | 0.2.0rc4 |
174
- | onnxruntime-genai-directml | 0.2.0rc4 |
175
  | transformers | 4.39.0 |
176
  | bitsandbytes | 0.42.0 |
177
 
 
169
  | torch | 2.2.0 |
170
  | triton | 2.2.0 |
171
  | onnxruntime-gpu | 1.18.0 |
172
+ | onnxruntime-genai | 0.2.0 |
173
+ | onnxruntime-genai-cuda | 0.2.0 |
174
+ | onnxruntime-genai-directml | 0.2.0 |
175
  | transformers | 4.39.0 |
176
  | bitsandbytes | 0.42.0 |
177
 
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json CHANGED
@@ -13,7 +13,6 @@
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
16
- "position_ids": "position_ids",
17
  "past_key_names": "past_key_values.%d.key",
18
  "past_value_names": "past_key_values.%d.value"
19
  },
@@ -45,10 +44,10 @@
45
  "no_repeat_ngram_size": 0,
46
  "num_beams": 1,
47
  "num_return_sequences": 1,
48
- "past_present_share_buffer": false,
49
  "repetition_penalty": 1.0,
50
  "temperature": 1.0,
51
  "top_k": 1,
52
  "top_p": 1.0
53
  }
54
- }
 
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
 
16
  "past_key_names": "past_key_values.%d.key",
17
  "past_value_names": "past_key_values.%d.value"
18
  },
 
44
  "no_repeat_ngram_size": 0,
45
  "num_beams": 1,
46
  "num_return_sequences": 1,
47
+ "past_present_share_buffer": true,
48
  "repetition_penalty": 1.0,
49
  "temperature": 1.0,
50
  "top_k": 1,
51
  "top_p": 1.0
52
  }
53
+ }
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:223c637f8006881de3ed52383249e9e4fd0f23f898254f8c196dec719d2bea86
3
- size 324481
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385cd1b908a0d2f8634e86d30236f6dbb7ae660eb3943fd1ef5bdc3847326480
3
+ size 231335
cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8df5082c99a10a3820d3031d9554241690573273313233166b4643e0ee0eac4f
3
  size 2722861056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db30ce699aee1123cf9045742488db5928006fa618a42cb3c0840322a85ad0f
3
  size 2722861056
cpu_and_mobile/cpu-int4-rtn-block-32/genai_config.json CHANGED
@@ -13,7 +13,6 @@
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
16
- "position_ids": "position_ids",
17
  "past_key_names": "past_key_values.%d.key",
18
  "past_value_names": "past_key_values.%d.value"
19
  },
@@ -45,10 +44,10 @@
45
  "no_repeat_ngram_size": 0,
46
  "num_beams": 1,
47
  "num_return_sequences": 1,
48
- "past_present_share_buffer": false,
49
  "repetition_penalty": 1.0,
50
  "temperature": 1.0,
51
  "top_k": 1,
52
  "top_p": 1.0
53
  }
54
- }
 
13
  "inputs": {
14
  "input_ids": "input_ids",
15
  "attention_mask": "attention_mask",
 
16
  "past_key_names": "past_key_values.%d.key",
17
  "past_value_names": "past_key_values.%d.value"
18
  },
 
44
  "no_repeat_ngram_size": 0,
45
  "num_beams": 1,
46
  "num_return_sequences": 1,
47
+ "past_present_share_buffer": true,
48
  "repetition_penalty": 1.0,
49
  "temperature": 1.0,
50
  "top_k": 1,
51
  "top_p": 1.0
52
  }
53
+ }
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81819a38d737a306dc1cd152987bd5a7b404ac2d04f243f5bf6569b425bc4538
3
- size 313088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1faf7ea6930f63caab12412f4a82c329eaddf6cce365e45c3cd00bb0547be8
3
+ size 222950
cpu_and_mobile/cpu-int4-rtn-block-32/phi3-mini-4k-instruct-cpu-int4-rtn-block-32.onnx.data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8df5082c99a10a3820d3031d9554241690573273313233166b4643e0ee0eac4f
3
  size 2722861056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db30ce699aee1123cf9045742488db5928006fa618a42cb3c0840322a85ad0f
3
  size 2722861056