smpanaro commited on
Commit
722eedf
1 Parent(s): f554427

Update sequoia mode with transposed value cache and 4:508 input:cache length

Browse files

No change in output but should be faster (primarily due to the transposed value cache).

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. sequoia/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin +1 -1
  2. sequoia/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin +1 -1
  3. sequoia/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json +15 -14
  4. sequoia/Llama-2-7b-hf_chunk1.mlmodelc/model.mil +33 -26
  5. sequoia/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin +2 -2
  6. sequoia/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin +1 -1
  7. sequoia/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin +1 -1
  8. sequoia/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json +67 -64
  9. sequoia/Llama-2-7b-hf_chunk10.mlmodelc/model.mil +0 -0
  10. sequoia/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin +1 -1
  11. sequoia/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin +1 -1
  12. sequoia/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json +67 -64
  13. sequoia/Llama-2-7b-hf_chunk11.mlmodelc/model.mil +0 -0
  14. sequoia/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin +1 -1
  15. sequoia/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin +1 -1
  16. sequoia/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json +51 -51
  17. sequoia/Llama-2-7b-hf_chunk12.mlmodelc/model.mil +0 -0
  18. sequoia/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin +1 -1
  19. sequoia/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin +1 -1
  20. sequoia/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json +67 -64
  21. sequoia/Llama-2-7b-hf_chunk2.mlmodelc/model.mil +0 -0
  22. sequoia/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin +1 -1
  23. sequoia/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin +1 -1
  24. sequoia/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json +67 -64
  25. sequoia/Llama-2-7b-hf_chunk3.mlmodelc/model.mil +0 -0
  26. sequoia/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin +1 -1
  27. sequoia/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin +1 -1
  28. sequoia/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json +67 -64
  29. sequoia/Llama-2-7b-hf_chunk4.mlmodelc/model.mil +0 -0
  30. sequoia/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin +1 -1
  31. sequoia/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin +1 -1
  32. sequoia/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json +67 -64
  33. sequoia/Llama-2-7b-hf_chunk5.mlmodelc/model.mil +0 -0
  34. sequoia/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin +1 -1
  35. sequoia/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin +1 -1
  36. sequoia/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json +67 -64
  37. sequoia/Llama-2-7b-hf_chunk6.mlmodelc/model.mil +0 -0
  38. sequoia/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin +1 -1
  39. sequoia/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin +1 -1
  40. sequoia/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json +67 -64
  41. sequoia/Llama-2-7b-hf_chunk7.mlmodelc/model.mil +0 -0
  42. sequoia/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin +1 -1
  43. sequoia/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin +1 -1
  44. sequoia/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json +67 -64
  45. sequoia/Llama-2-7b-hf_chunk8.mlmodelc/model.mil +0 -0
  46. sequoia/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin +1 -1
  47. sequoia/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin +1 -1
  48. sequoia/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json +67 -64
  49. sequoia/Llama-2-7b-hf_chunk9.mlmodelc/model.mil +0 -0
  50. sequoia/logit-processor.mlmodelc/analytics/coremldata.bin +1 -1
sequoia/Llama-2-7b-hf_chunk1.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e729e06a5dac91d54425432e10c01d40645eefd035e7d3569e6aaf5acc4a1493
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8372d12aa224d728fc434e91b2c1432b7ef69216416bb047c5f7ae2707e4120
3
  size 243
sequoia/Llama-2-7b-hf_chunk1.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a55bcffcb4e191cd6358ad92d705948cd757010e873528f66b6e21943904acd
3
  size 485
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d888daf26172f67d0fb48d9f30faca6f62b348e0e571de6855c2a60530aa2bb
3
  size 485
sequoia/Llama-2-7b-hf_chunk1.mlmodelc/metadata.json CHANGED
@@ -138,9 +138,9 @@
138
  "hasShapeFlexibility" : "0",
139
  "isOptional" : "0",
140
  "dataType" : "Int32",
141
- "formattedType" : "MultiArray (Int32 1 × 1)",
142
  "shortDescription" : "",
143
- "shape" : "[1, 1]",
144
  "name" : "input_ids",
145
  "type" : "MultiArray"
146
  },
@@ -165,9 +165,9 @@
165
  "hasShapeFlexibility" : "0",
166
  "isOptional" : "0",
167
  "dataType" : "Float16",
168
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
169
  "shortDescription" : "",
170
- "shape" : "[1, 4096, 1, 1]",
171
  "name" : "x",
172
  "type" : "MultiArray"
173
  },
@@ -175,9 +175,9 @@
175
  "hasShapeFlexibility" : "0",
176
  "isOptional" : "0",
177
  "dataType" : "Float16",
178
- "formattedType" : "MultiArray (Float16 128 × 1)",
179
  "shortDescription" : "",
180
- "shape" : "[128, 1]",
181
  "name" : "cos",
182
  "type" : "MultiArray"
183
  },
@@ -185,9 +185,9 @@
185
  "hasShapeFlexibility" : "0",
186
  "isOptional" : "0",
187
  "dataType" : "Float16",
188
- "formattedType" : "MultiArray (Float16 128 × 1)",
189
  "shortDescription" : "",
190
- "shape" : "[128, 1]",
191
  "name" : "sin",
192
  "type" : "MultiArray"
193
  },
@@ -195,23 +195,24 @@
195
  "hasShapeFlexibility" : "0",
196
  "isOptional" : "0",
197
  "dataType" : "Float16",
198
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
199
  "shortDescription" : "",
200
- "shape" : "[1, 1, 1, 512]",
201
  "name" : "mask",
202
  "type" : "MultiArray"
203
  }
204
  ],
205
  "name" : "input_1_context_512",
206
  "mlProgramOperationTypeHistogram" : {
207
- "Select" : 1,
208
  "Ios18.maximum" : 1,
209
  "Ios18.gather" : 3,
210
  "Ios18.sub" : 3,
211
  "Ios18.transpose" : 1,
212
- "Ios18.less" : 1,
213
  "Ios18.cast" : 2,
214
- "Ios18.expandDims" : 4
 
215
  }
216
  }
217
  ],
@@ -265,7 +266,7 @@
265
  }
266
  ],
267
  "defaultFunctionName" : "input_512_context_512",
268
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk1",
269
  "userDefinedMetadata" : {
270
 
271
  },
 
138
  "hasShapeFlexibility" : "0",
139
  "isOptional" : "0",
140
  "dataType" : "Int32",
141
+ "formattedType" : "MultiArray (Int32 1 × 4)",
142
  "shortDescription" : "",
143
+ "shape" : "[1, 4]",
144
  "name" : "input_ids",
145
  "type" : "MultiArray"
146
  },
 
165
  "hasShapeFlexibility" : "0",
166
  "isOptional" : "0",
167
  "dataType" : "Float16",
168
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
169
  "shortDescription" : "",
170
+ "shape" : "[1, 4096, 1, 4]",
171
  "name" : "x",
172
  "type" : "MultiArray"
173
  },
 
175
  "hasShapeFlexibility" : "0",
176
  "isOptional" : "0",
177
  "dataType" : "Float16",
178
+ "formattedType" : "MultiArray (Float16 128 × 4)",
179
  "shortDescription" : "",
180
+ "shape" : "[128, 4]",
181
  "name" : "cos",
182
  "type" : "MultiArray"
183
  },
 
185
  "hasShapeFlexibility" : "0",
186
  "isOptional" : "0",
187
  "dataType" : "Float16",
188
+ "formattedType" : "MultiArray (Float16 128 × 4)",
189
  "shortDescription" : "",
190
+ "shape" : "[128, 4]",
191
  "name" : "sin",
192
  "type" : "MultiArray"
193
  },
 
195
  "hasShapeFlexibility" : "0",
196
  "isOptional" : "0",
197
  "dataType" : "Float16",
198
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
199
  "shortDescription" : "",
200
+ "shape" : "[1, 1, 4, 512]",
201
  "name" : "mask",
202
  "type" : "MultiArray"
203
  }
204
  ],
205
  "name" : "input_1_context_512",
206
  "mlProgramOperationTypeHistogram" : {
207
+ "Select" : 2,
208
  "Ios18.maximum" : 1,
209
  "Ios18.gather" : 3,
210
  "Ios18.sub" : 3,
211
  "Ios18.transpose" : 1,
212
+ "Ios18.less" : 2,
213
  "Ios18.cast" : 2,
214
+ "Ios18.expandDims" : 4,
215
+ "Tile" : 2
216
  }
217
  }
218
  ],
 
266
  }
267
  ],
268
  "defaultFunctionName" : "input_512_context_512",
269
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk1",
270
  "userDefinedMetadata" : {
271
 
272
  },
sequoia/Llama-2-7b-hf_chunk1.mlmodelc/model.mil CHANGED
@@ -1,49 +1,56 @@
1
  program(1.3)
2
- [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.34.1"}, {"coremlc-version", "3400.42.1"}})]
3
  {
4
- func input_1_context_512<ios18>(tensor<int32, [1]> full_sequence_length, tensor<int32, [1, 1]> input_ids) {
5
- tensor<int32, [1]> T = const()[name = string("T"), val = tensor<int32, [1]>([1])];
6
  int32 x_axis_0 = const()[name = string("x_axis_0"), val = int32(0)];
7
  int32 x_batch_dims_0 = const()[name = string("x_batch_dims_0"), val = int32(0)];
8
  bool x_validate_indices_0 = const()[name = string("x_validate_indices_0"), val = bool(false)];
9
  tensor<fp16, [32000, 4096]> wte_weight_to_fp16 = const()[name = string("wte_weight_to_fp16"), val = tensor<fp16, [32000, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
10
  string input_ids_to_int16_dtype_0 = const()[name = string("input_ids_to_int16_dtype_0"), val = string("int16")];
11
- tensor<int16, [1, 1]> input_ids_to_int16 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = string("cast_6")];
12
- tensor<fp16, [1, 1, 4096]> x_cast_fp16_cast_uint16 = gather(axis = x_axis_0, batch_dims = x_batch_dims_0, indices = input_ids_to_int16, validate_indices = x_validate_indices_0, x = wte_weight_to_fp16)[name = string("x_cast_fp16_cast_uint16")];
13
  tensor<int32, [3]> var_16_perm_0 = const()[name = string("op_16_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
14
  tensor<int32, [1]> var_18_axes_0 = const()[name = string("op_18_axes_0"), val = tensor<int32, [1]>([2])];
15
- tensor<fp16, [1, 4096, 1]> var_16_cast_fp16 = transpose(perm = var_16_perm_0, x = x_cast_fp16_cast_uint16)[name = string("transpose_0")];
16
- tensor<fp16, [1, 4096, 1, 1]> x = expand_dims(axes = var_18_axes_0, x = var_16_cast_fp16)[name = string("op_18_cast_fp16")];
17
  tensor<int32, [1]> pos_offset = sub(x = T, y = full_sequence_length)[name = string("pos_offset")];
18
- tensor<int32, [1]> var_26 = const()[name = string("op_26"), val = tensor<int32, [1]>([0])];
19
- tensor<int32, [1]> input_pos_1 = sub(x = var_26, y = pos_offset)[name = string("input_pos_1")];
20
- tensor<int32, [1]> var_34 = const()[name = string("op_34"), val = tensor<int32, [1]>([0])];
21
- tensor<int32, [1]> input_pos = maximum(x = input_pos_1, y = var_34)[name = string("input_pos")];
22
  int32 var_45 = const()[name = string("op_45"), val = int32(1)];
23
  int32 var_46_batch_dims_0 = const()[name = string("op_46_batch_dims_0"), val = int32(0)];
24
  bool var_46_validate_indices_0 = const()[name = string("op_46_validate_indices_0"), val = bool(false)];
25
  tensor<fp16, [128, 512]> var_44_to_fp16 = const()[name = string("op_44_to_fp16"), val = tensor<fp16, [128, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262144128)))];
26
  string input_pos_to_uint16_dtype_0 = const()[name = string("input_pos_to_uint16_dtype_0"), val = string("uint16")];
27
- tensor<uint16, [1]> input_pos_to_uint16 = cast(dtype = input_pos_to_uint16_dtype_0, x = input_pos)[name = string("cast_5")];
28
- tensor<fp16, [128, 1]> cos = gather(axis = var_45, batch_dims = var_46_batch_dims_0, indices = input_pos_to_uint16, validate_indices = var_46_validate_indices_0, x = var_44_to_fp16)[name = string("op_46_cast_fp16_cast_uint16")];
29
  int32 var_56 = const()[name = string("op_56"), val = int32(1)];
30
  int32 var_57_batch_dims_0 = const()[name = string("op_57_batch_dims_0"), val = int32(0)];
31
  bool var_57_validate_indices_0 = const()[name = string("op_57_validate_indices_0"), val = bool(false)];
32
  tensor<fp16, [128, 512]> var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = tensor<fp16, [128, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262275264)))];
33
- tensor<fp16, [128, 1]> sin = gather(axis = var_56, batch_dims = var_57_batch_dims_0, indices = input_pos_to_uint16, validate_indices = var_57_validate_indices_0, x = var_55_to_fp16)[name = string("op_57_cast_fp16_cast_uint16")];
34
- tensor<int32, [512]> var_104 = const()[name = string("op_104"), val = tensor<int32, [512]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])];
35
- int32 var_105 = const()[name = string("op_105"), val = int32(512)];
36
- tensor<int32, [1]> var_107 = sub(x = var_105, y = full_sequence_length)[name = string("op_107")];
37
- tensor<bool, [512]> var_108 = less(x = var_104, y = var_107)[name = string("op_108")];
 
 
 
 
 
 
 
38
  tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
39
- tensor<bool, [1, 512]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_108)[name = string("expand_dims_0")];
40
- tensor<fp16, [1, 512]> all_mask_to_fp16 = const()[name = string("all_mask_to_fp16"), val = tensor<fp16, [1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263455104)))];
41
- tensor<fp16, [1, 512]> m_1_to_fp16 = const()[name = string("m_1_to_fp16"), val = tensor<fp16, [1, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263456192)))];
42
- tensor<fp16, [1, 512]> m_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = expand_dims_0)[name = string("m_cast_fp16")];
43
- tensor<int32, [1]> var_111_axes_0 = const()[name = string("op_111_axes_0"), val = tensor<int32, [1]>([0])];
44
- tensor<fp16, [1, 1, 512]> var_111_cast_fp16 = expand_dims(axes = var_111_axes_0, x = m_cast_fp16)[name = string("op_111_cast_fp16")];
45
- tensor<int32, [1]> var_113_axes_0 = const()[name = string("op_113_axes_0"), val = tensor<int32, [1]>([0])];
46
- tensor<fp16, [1, 1, 1, 512]> mask = expand_dims(axes = var_113_axes_0, x = var_111_cast_fp16)[name = string("op_113_cast_fp16")];
47
  } -> (x, cos, sin, mask);
48
  func input_512_context_512<ios18>(tensor<int32, [1]> full_sequence_length, tensor<int32, [1, 512]> input_ids) {
49
  tensor<int32, [1]> T = const()[name = string("T"), val = tensor<int32, [1]>([512])];
 
1
  program(1.3)
2
+ [buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.42.1"}, {"coremlc-version", "3400.51.1"}})]
3
  {
4
+ func input_1_context_512<ios18>(tensor<int32, [1]> full_sequence_length, tensor<int32, [1, 4]> input_ids) {
5
+ tensor<int32, [1]> T = const()[name = string("T"), val = tensor<int32, [1]>([4])];
6
  int32 x_axis_0 = const()[name = string("x_axis_0"), val = int32(0)];
7
  int32 x_batch_dims_0 = const()[name = string("x_batch_dims_0"), val = int32(0)];
8
  bool x_validate_indices_0 = const()[name = string("x_validate_indices_0"), val = bool(false)];
9
  tensor<fp16, [32000, 4096]> wte_weight_to_fp16 = const()[name = string("wte_weight_to_fp16"), val = tensor<fp16, [32000, 4096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
10
  string input_ids_to_int16_dtype_0 = const()[name = string("input_ids_to_int16_dtype_0"), val = string("int16")];
11
+ tensor<int16, [1, 4]> input_ids_to_int16 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = string("cast_6")];
12
+ tensor<fp16, [1, 4, 4096]> x_cast_fp16_cast_uint16 = gather(axis = x_axis_0, batch_dims = x_batch_dims_0, indices = input_ids_to_int16, validate_indices = x_validate_indices_0, x = wte_weight_to_fp16)[name = string("x_cast_fp16_cast_uint16")];
13
  tensor<int32, [3]> var_16_perm_0 = const()[name = string("op_16_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
14
  tensor<int32, [1]> var_18_axes_0 = const()[name = string("op_18_axes_0"), val = tensor<int32, [1]>([2])];
15
+ tensor<fp16, [1, 4096, 4]> var_16_cast_fp16 = transpose(perm = var_16_perm_0, x = x_cast_fp16_cast_uint16)[name = string("transpose_0")];
16
+ tensor<fp16, [1, 4096, 1, 4]> x = expand_dims(axes = var_18_axes_0, x = var_16_cast_fp16)[name = string("op_18_cast_fp16")];
17
  tensor<int32, [1]> pos_offset = sub(x = T, y = full_sequence_length)[name = string("pos_offset")];
18
+ tensor<int32, [4]> var_26 = const()[name = string("op_26"), val = tensor<int32, [4]>([0, 1, 2, 3])];
19
+ tensor<int32, [4]> input_pos_1 = sub(x = var_26, y = pos_offset)[name = string("input_pos_1")];
20
+ tensor<int32, [4]> var_34 = const()[name = string("op_34"), val = tensor<int32, [4]>([0, 0, 0, 0])];
21
+ tensor<int32, [4]> input_pos = maximum(x = input_pos_1, y = var_34)[name = string("input_pos")];
22
  int32 var_45 = const()[name = string("op_45"), val = int32(1)];
23
  int32 var_46_batch_dims_0 = const()[name = string("op_46_batch_dims_0"), val = int32(0)];
24
  bool var_46_validate_indices_0 = const()[name = string("op_46_validate_indices_0"), val = bool(false)];
25
  tensor<fp16, [128, 512]> var_44_to_fp16 = const()[name = string("op_44_to_fp16"), val = tensor<fp16, [128, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262144128)))];
26
  string input_pos_to_uint16_dtype_0 = const()[name = string("input_pos_to_uint16_dtype_0"), val = string("uint16")];
27
+ tensor<uint16, [4]> input_pos_to_uint16 = cast(dtype = input_pos_to_uint16_dtype_0, x = input_pos)[name = string("cast_5")];
28
+ tensor<fp16, [128, 4]> cos = gather(axis = var_45, batch_dims = var_46_batch_dims_0, indices = input_pos_to_uint16, validate_indices = var_46_validate_indices_0, x = var_44_to_fp16)[name = string("op_46_cast_fp16_cast_uint16")];
29
  int32 var_56 = const()[name = string("op_56"), val = int32(1)];
30
  int32 var_57_batch_dims_0 = const()[name = string("op_57_batch_dims_0"), val = int32(0)];
31
  bool var_57_validate_indices_0 = const()[name = string("op_57_validate_indices_0"), val = bool(false)];
32
  tensor<fp16, [128, 512]> var_55_to_fp16 = const()[name = string("op_55_to_fp16"), val = tensor<fp16, [128, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262275264)))];
33
+ tensor<fp16, [128, 4]> sin = gather(axis = var_56, batch_dims = var_57_batch_dims_0, indices = input_pos_to_uint16, validate_indices = var_57_validate_indices_0, x = var_55_to_fp16)[name = string("op_57_cast_fp16_cast_uint16")];
34
+ tensor<int32, [4, 1]> var_92 = const()[name = string("op_92"), val = tensor<int32, [4, 1]>([[0], [1], [2], [3]])];
35
+ tensor<bool, [4, 1]> var_95 = less(x = var_92, y = pos_offset)[name = string("op_95")];
36
+ tensor<int32, [2]> var_95_after_broadcast_reps_0 = const()[name = string("op_95_after_broadcast_reps_0"), val = tensor<int32, [2]>([1, 512])];
37
+ tensor<bool, [4, 512]> var_95_after_broadcast = tile(reps = var_95_after_broadcast_reps_0, x = var_95)[name = string("op_95_after_broadcast")];
38
+ tensor<fp16, [4, 512]> all_mask_to_fp16 = const()[name = string("all_mask_to_fp16"), val = tensor<fp16, [4, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263455104)))];
39
+ tensor<fp16, [4, 512]> m_1_to_fp16 = const()[name = string("m_1_to_fp16"), val = tensor<fp16, [4, 512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263459264)))];
40
+ tensor<fp16, [4, 512]> m_3_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = var_95_after_broadcast)[name = string("m_3_cast_fp16")];
41
+ tensor<int32, [512]> var_105 = const()[name = string("op_105"), val = tensor<int32, [512]>([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])];
42
+ int32 var_106 = const()[name = string("op_106"), val = int32(512)];
43
+ tensor<int32, [1]> var_108 = sub(x = var_106, y = full_sequence_length)[name = string("op_108")];
44
+ tensor<bool, [512]> var_109 = less(x = var_105, y = var_108)[name = string("op_109")];
45
  tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
46
+ tensor<bool, [1, 512]> expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_109)[name = string("expand_dims_0")];
47
+ tensor<int32, [2]> var_109_after_broadcast_reps_0 = const()[name = string("op_109_after_broadcast_reps_0"), val = tensor<int32, [2]>([4, 1])];
48
+ tensor<bool, [4, 512]> var_109_after_broadcast = tile(reps = var_109_after_broadcast_reps_0, x = expand_dims_0)[name = string("op_109_after_broadcast")];
49
+ tensor<fp16, [4, 512]> m_cast_fp16 = select(a = all_mask_to_fp16, b = m_3_cast_fp16, cond = var_109_after_broadcast)[name = string("m_cast_fp16")];
50
+ tensor<int32, [1]> var_112_axes_0 = const()[name = string("op_112_axes_0"), val = tensor<int32, [1]>([0])];
51
+ tensor<fp16, [1, 4, 512]> var_112_cast_fp16 = expand_dims(axes = var_112_axes_0, x = m_cast_fp16)[name = string("op_112_cast_fp16")];
52
+ tensor<int32, [1]> var_114_axes_0 = const()[name = string("op_114_axes_0"), val = tensor<int32, [1]>([0])];
53
+ tensor<fp16, [1, 1, 4, 512]> mask = expand_dims(axes = var_114_axes_0, x = var_112_cast_fp16)[name = string("op_114_cast_fp16")];
54
  } -> (x, cos, sin, mask);
55
  func input_512_context_512<ios18>(tensor<int32, [1]> full_sequence_length, tensor<int32, [1, 512]> input_ids) {
56
  tensor<int32, [1]> T = const()[name = string("T"), val = tensor<int32, [1]>([512])];
sequoia/Llama-2-7b-hf_chunk1.mlmodelc/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63ea75c6154c60560d9edb4d2e2f028afa38a3927bb7277b7d01558bc198e965
3
- size 263457280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66aa1771f06ceee6e578b7f93444d38b2cb55120a2a84494e7649b4e424a176
3
+ size 263463424
sequoia/Llama-2-7b-hf_chunk10.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk10.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk10.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk10",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk10",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk10.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk11.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk11.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk11.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk11",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk11",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk11.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk12.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:877129a9d42c3d4d9b1b793d51e152d6fed08881a973bbb5ed4a001571623eb0
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebfac06ad6ea250163afbdb1dcff54d9a4efd5c687a99f836a173d45bba0e7e9
3
  size 243
sequoia/Llama-2-7b-hf_chunk12.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4186acc6251c3785f2b0af36e33eacfe6b4f78971ae86bda2e885776607d79
3
  size 831
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5015e3121f08174cb761ca5facaf3f027bc6be5ee22d02a1c8a820193ae2e978
3
  size 831
sequoia/Llama-2-7b-hf_chunk12.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_v_cache_0",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_1",
54
  "type" : "MultiArray"
55
  }
@@ -122,9 +122,9 @@
122
  "hasShapeFlexibility" : "0",
123
  "isOptional" : "0",
124
  "dataType" : "Float16",
125
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
126
  "shortDescription" : "",
127
- "shape" : "[1, 32, 128, 511]",
128
  "name" : "new_k_cache_0",
129
  "type" : "MultiArray"
130
  },
@@ -132,9 +132,9 @@
132
  "hasShapeFlexibility" : "0",
133
  "isOptional" : "0",
134
  "dataType" : "Float16",
135
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
136
  "shortDescription" : "",
137
- "shape" : "[1, 32, 128, 511]",
138
  "name" : "new_k_cache_1",
139
  "type" : "MultiArray"
140
  },
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_v_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_v_cache_1",
159
  "type" : "MultiArray"
160
  }
@@ -163,15 +163,15 @@
163
  "mlProgramOperationTypeHistogram" : {
164
  "Ios18.constexprLutToDense" : 14,
165
  "Ios18.conv" : 14,
166
- "Ios18.matmul" : 6,
167
  "Ios18.expandDims" : 5,
168
- "Ios18.concat" : 14,
 
169
  "Ios18.add" : 10,
170
  "Ios18.realDiv" : 5,
171
  "Ios18.silu" : 2,
172
  "Ios18.softmax" : 2,
173
  "Ios18.sliceByIndex" : 12,
174
- "Ios18.transpose" : 1,
175
  "Ios16.reduceL2Norm" : 5,
176
  "Ios18.squeeze" : 6,
177
  "Ios18.reshape" : 11,
@@ -184,9 +184,9 @@
184
  "hasShapeFlexibility" : "0",
185
  "isOptional" : "0",
186
  "dataType" : "Float16",
187
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
188
  "shortDescription" : "",
189
- "shape" : "[1, 4096, 1, 1]",
190
  "name" : "x",
191
  "type" : "MultiArray"
192
  },
@@ -194,9 +194,9 @@
194
  "hasShapeFlexibility" : "0",
195
  "isOptional" : "0",
196
  "dataType" : "Float16",
197
- "formattedType" : "MultiArray (Float16 128 × 1)",
198
  "shortDescription" : "",
199
- "shape" : "[128, 1]",
200
  "name" : "cos",
201
  "type" : "MultiArray"
202
  },
@@ -204,9 +204,9 @@
204
  "hasShapeFlexibility" : "0",
205
  "isOptional" : "0",
206
  "dataType" : "Float16",
207
- "formattedType" : "MultiArray (Float16 128 × 1)",
208
  "shortDescription" : "",
209
- "shape" : "[128, 1]",
210
  "name" : "sin",
211
  "type" : "MultiArray"
212
  },
@@ -214,9 +214,9 @@
214
  "hasShapeFlexibility" : "0",
215
  "isOptional" : "0",
216
  "dataType" : "Float16",
217
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
218
  "shortDescription" : "",
219
- "shape" : "[1, 1, 1, 512]",
220
  "name" : "mask",
221
  "type" : "MultiArray"
222
  },
@@ -224,9 +224,9 @@
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "1",
226
  "dataType" : "Float16",
227
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
228
  "shortDescription" : "",
229
- "shape" : "[1, 32, 128, 511]",
230
  "name" : "k_cache_0",
231
  "type" : "MultiArray"
232
  },
@@ -234,9 +234,9 @@
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "1",
236
  "dataType" : "Float16",
237
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
238
  "shortDescription" : "",
239
- "shape" : "[1, 32, 128, 511]",
240
  "name" : "v_cache_0",
241
  "type" : "MultiArray"
242
  },
@@ -244,9 +244,9 @@
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "1",
246
  "dataType" : "Float16",
247
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
248
  "shortDescription" : "",
249
- "shape" : "[1, 32, 128, 511]",
250
  "name" : "k_cache_1",
251
  "type" : "MultiArray"
252
  },
@@ -254,9 +254,9 @@
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "1",
256
  "dataType" : "Float16",
257
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
258
  "shortDescription" : "",
259
- "shape" : "[1, 32, 128, 511]",
260
  "name" : "v_cache_1",
261
  "type" : "MultiArray"
262
  }
@@ -271,9 +271,9 @@
271
  "hasShapeFlexibility" : "0",
272
  "isOptional" : "0",
273
  "dataType" : "Float16",
274
- "formattedType" : "MultiArray (Float16 1 × 1 × 32000)",
275
  "shortDescription" : "",
276
- "shape" : "[1, 1, 32000]",
277
  "name" : "logits",
278
  "type" : "MultiArray"
279
  },
@@ -281,9 +281,9 @@
281
  "hasShapeFlexibility" : "0",
282
  "isOptional" : "0",
283
  "dataType" : "Float16",
284
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
285
  "shortDescription" : "",
286
- "shape" : "[1, 32, 128, 511]",
287
  "name" : "new_k_cache_0",
288
  "type" : "MultiArray"
289
  },
@@ -291,9 +291,9 @@
291
  "hasShapeFlexibility" : "0",
292
  "isOptional" : "0",
293
  "dataType" : "Float16",
294
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
295
  "shortDescription" : "",
296
- "shape" : "[1, 32, 128, 511]",
297
  "name" : "new_k_cache_1",
298
  "type" : "MultiArray"
299
  },
@@ -301,9 +301,9 @@
301
  "hasShapeFlexibility" : "0",
302
  "isOptional" : "0",
303
  "dataType" : "Float16",
304
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
305
  "shortDescription" : "",
306
- "shape" : "[1, 32, 128, 511]",
307
  "name" : "new_v_cache_0",
308
  "type" : "MultiArray"
309
  },
@@ -311,9 +311,9 @@
311
  "hasShapeFlexibility" : "0",
312
  "isOptional" : "0",
313
  "dataType" : "Float16",
314
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
315
  "shortDescription" : "",
316
- "shape" : "[1, 32, 128, 511]",
317
  "name" : "new_v_cache_1",
318
  "type" : "MultiArray"
319
  }
@@ -322,15 +322,15 @@
322
  "mlProgramOperationTypeHistogram" : {
323
  "Ios18.constexprLutToDense" : 14,
324
  "Ios18.conv" : 14,
325
- "Ios18.matmul" : 6,
326
  "Ios18.expandDims" : 5,
 
327
  "Ios18.concat" : 14,
328
  "Ios18.add" : 10,
329
  "Ios18.realDiv" : 5,
330
  "Ios18.silu" : 2,
331
  "Ios18.softmax" : 2,
332
  "Ios18.sliceByIndex" : 12,
333
- "Ios18.transpose" : 1,
334
  "Ios16.reduceL2Norm" : 5,
335
  "Ios18.squeeze" : 6,
336
  "Ios18.reshape" : 11,
@@ -341,15 +341,15 @@
341
  "mlProgramOperationTypeHistogram" : {
342
  "Ios18.constexprLutToDense" : 14,
343
  "Ios18.conv" : 14,
344
- "Ios18.matmul" : 6,
345
  "Ios18.expandDims" : 5,
346
- "Ios18.concat" : 14,
 
347
  "Ios18.add" : 10,
348
  "Ios18.realDiv" : 5,
349
  "Ios18.silu" : 2,
350
  "Ios18.softmax" : 2,
351
  "Ios18.sliceByIndex" : 12,
352
- "Ios18.transpose" : 1,
353
  "Ios16.reduceL2Norm" : 5,
354
  "Ios18.squeeze" : 6,
355
  "Ios18.reshape" : 11,
@@ -414,7 +414,7 @@
414
  }
415
  ],
416
  "defaultFunctionName" : "input_512_context_512",
417
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk12",
418
  "userDefinedMetadata" : {
419
 
420
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 508, 128]",
43
  "name" : "new_v_cache_0",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_1",
54
  "type" : "MultiArray"
55
  }
 
122
  "hasShapeFlexibility" : "0",
123
  "isOptional" : "0",
124
  "dataType" : "Float16",
125
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
126
  "shortDescription" : "",
127
+ "shape" : "[1, 32, 128, 508]",
128
  "name" : "new_k_cache_0",
129
  "type" : "MultiArray"
130
  },
 
132
  "hasShapeFlexibility" : "0",
133
  "isOptional" : "0",
134
  "dataType" : "Float16",
135
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
136
  "shortDescription" : "",
137
+ "shape" : "[1, 32, 128, 508]",
138
  "name" : "new_k_cache_1",
139
  "type" : "MultiArray"
140
  },
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 508, 128]",
148
  "name" : "new_v_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 508, 128]",
158
  "name" : "new_v_cache_1",
159
  "type" : "MultiArray"
160
  }
 
163
  "mlProgramOperationTypeHistogram" : {
164
  "Ios18.constexprLutToDense" : 14,
165
  "Ios18.conv" : 14,
 
166
  "Ios18.expandDims" : 5,
167
+ "Ios18.matmul" : 6,
168
+ "Ios18.concat" : 10,
169
  "Ios18.add" : 10,
170
  "Ios18.realDiv" : 5,
171
  "Ios18.silu" : 2,
172
  "Ios18.softmax" : 2,
173
  "Ios18.sliceByIndex" : 12,
174
+ "Ios18.transpose" : 5,
175
  "Ios16.reduceL2Norm" : 5,
176
  "Ios18.squeeze" : 6,
177
  "Ios18.reshape" : 11,
 
184
  "hasShapeFlexibility" : "0",
185
  "isOptional" : "0",
186
  "dataType" : "Float16",
187
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
188
  "shortDescription" : "",
189
+ "shape" : "[1, 4096, 1, 4]",
190
  "name" : "x",
191
  "type" : "MultiArray"
192
  },
 
194
  "hasShapeFlexibility" : "0",
195
  "isOptional" : "0",
196
  "dataType" : "Float16",
197
+ "formattedType" : "MultiArray (Float16 128 × 4)",
198
  "shortDescription" : "",
199
+ "shape" : "[128, 4]",
200
  "name" : "cos",
201
  "type" : "MultiArray"
202
  },
 
204
  "hasShapeFlexibility" : "0",
205
  "isOptional" : "0",
206
  "dataType" : "Float16",
207
+ "formattedType" : "MultiArray (Float16 128 × 4)",
208
  "shortDescription" : "",
209
+ "shape" : "[128, 4]",
210
  "name" : "sin",
211
  "type" : "MultiArray"
212
  },
 
214
  "hasShapeFlexibility" : "0",
215
  "isOptional" : "0",
216
  "dataType" : "Float16",
217
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
218
  "shortDescription" : "",
219
+ "shape" : "[1, 1, 4, 512]",
220
  "name" : "mask",
221
  "type" : "MultiArray"
222
  },
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "1",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 32, 128, 508]",
230
  "name" : "k_cache_0",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "1",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
238
  "shortDescription" : "",
239
+ "shape" : "[1, 32, 508, 128]",
240
  "name" : "v_cache_0",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "1",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
248
  "shortDescription" : "",
249
+ "shape" : "[1, 32, 128, 508]",
250
  "name" : "k_cache_1",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "1",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 32, 508, 128]",
260
  "name" : "v_cache_1",
261
  "type" : "MultiArray"
262
  }
 
271
  "hasShapeFlexibility" : "0",
272
  "isOptional" : "0",
273
  "dataType" : "Float16",
274
+ "formattedType" : "MultiArray (Float16 1 × 4 × 32000)",
275
  "shortDescription" : "",
276
+ "shape" : "[1, 4, 32000]",
277
  "name" : "logits",
278
  "type" : "MultiArray"
279
  },
 
281
  "hasShapeFlexibility" : "0",
282
  "isOptional" : "0",
283
  "dataType" : "Float16",
284
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
285
  "shortDescription" : "",
286
+ "shape" : "[1, 32, 128, 508]",
287
  "name" : "new_k_cache_0",
288
  "type" : "MultiArray"
289
  },
 
291
  "hasShapeFlexibility" : "0",
292
  "isOptional" : "0",
293
  "dataType" : "Float16",
294
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
295
  "shortDescription" : "",
296
+ "shape" : "[1, 32, 128, 508]",
297
  "name" : "new_k_cache_1",
298
  "type" : "MultiArray"
299
  },
 
301
  "hasShapeFlexibility" : "0",
302
  "isOptional" : "0",
303
  "dataType" : "Float16",
304
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
305
  "shortDescription" : "",
306
+ "shape" : "[1, 32, 508, 128]",
307
  "name" : "new_v_cache_0",
308
  "type" : "MultiArray"
309
  },
 
311
  "hasShapeFlexibility" : "0",
312
  "isOptional" : "0",
313
  "dataType" : "Float16",
314
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
315
  "shortDescription" : "",
316
+ "shape" : "[1, 32, 508, 128]",
317
  "name" : "new_v_cache_1",
318
  "type" : "MultiArray"
319
  }
 
322
  "mlProgramOperationTypeHistogram" : {
323
  "Ios18.constexprLutToDense" : 14,
324
  "Ios18.conv" : 14,
 
325
  "Ios18.expandDims" : 5,
326
+ "Ios18.matmul" : 6,
327
  "Ios18.concat" : 14,
328
  "Ios18.add" : 10,
329
  "Ios18.realDiv" : 5,
330
  "Ios18.silu" : 2,
331
  "Ios18.softmax" : 2,
332
  "Ios18.sliceByIndex" : 12,
333
+ "Ios18.transpose" : 5,
334
  "Ios16.reduceL2Norm" : 5,
335
  "Ios18.squeeze" : 6,
336
  "Ios18.reshape" : 11,
 
341
  "mlProgramOperationTypeHistogram" : {
342
  "Ios18.constexprLutToDense" : 14,
343
  "Ios18.conv" : 14,
 
344
  "Ios18.expandDims" : 5,
345
+ "Ios18.matmul" : 6,
346
+ "Ios18.concat" : 10,
347
  "Ios18.add" : 10,
348
  "Ios18.realDiv" : 5,
349
  "Ios18.silu" : 2,
350
  "Ios18.softmax" : 2,
351
  "Ios18.sliceByIndex" : 12,
352
+ "Ios18.transpose" : 5,
353
  "Ios16.reduceL2Norm" : 5,
354
  "Ios18.squeeze" : 6,
355
  "Ios18.reshape" : 11,
 
414
  }
415
  ],
416
  "defaultFunctionName" : "input_512_context_512",
417
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk12",
418
  "userDefinedMetadata" : {
419
 
420
  },
sequoia/Llama-2-7b-hf_chunk12.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk2.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk2.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk2.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk2",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk2",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk2.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk3.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk3.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk3.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk3",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk3",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk3.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk4.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk4.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk4.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk4",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk4",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk4.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk5.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk5.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk5.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk5",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk5",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk5.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk6.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk6.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk6.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk6",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk6",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk6.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk7.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk7.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk7.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk7",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk7",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk7.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk8.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk8.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk8.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk8",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk8",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk8.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/Llama-2-7b-hf_chunk9.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e317a82cdf4e96f808f63e77f10098844d47ad522545181edfac4d287c9c92
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e7ad37dd59e97348d395eec9b4c41b7d3ea44d86f613751ae47803a0a2efe
3
  size 243
sequoia/Llama-2-7b-hf_chunk9.mlmodelc/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e430d0795ff5c384187174f5718a2c13d0070f5d6a811831e18862497865a86d
3
  size 1037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d35a0353bcfa501579e07af3718261af3b129b4bec004c1fe6d812a6403a3f5b
3
  size 1037
sequoia/Llama-2-7b-hf_chunk9.mlmodelc/metadata.json CHANGED
@@ -17,9 +17,9 @@
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
21
  "shortDescription" : "",
22
- "shape" : "[1, 32, 128, 511]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
@@ -27,9 +27,9 @@
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
31
  "shortDescription" : "",
32
- "shape" : "[1, 32, 128, 511]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
@@ -37,9 +37,9 @@
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
41
  "shortDescription" : "",
42
- "shape" : "[1, 32, 128, 511]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
@@ -47,9 +47,9 @@
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
51
  "shortDescription" : "",
52
- "shape" : "[1, 32, 128, 511]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
@@ -57,9 +57,9 @@
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
61
  "shortDescription" : "",
62
- "shape" : "[1, 32, 128, 511]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
@@ -67,9 +67,9 @@
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
71
  "shortDescription" : "",
72
- "shape" : "[1, 32, 128, 511]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
@@ -142,9 +142,9 @@
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
146
  "shortDescription" : "",
147
- "shape" : "[1, 32, 128, 511]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
@@ -152,9 +152,9 @@
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
156
  "shortDescription" : "",
157
- "shape" : "[1, 32, 128, 511]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
@@ -162,9 +162,9 @@
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
166
  "shortDescription" : "",
167
- "shape" : "[1, 32, 128, 511]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
@@ -172,9 +172,9 @@
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
176
  "shortDescription" : "",
177
- "shape" : "[1, 32, 128, 511]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
@@ -182,9 +182,9 @@
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
186
  "shortDescription" : "",
187
- "shape" : "[1, 32, 128, 511]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
@@ -192,9 +192,9 @@
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
196
  "shortDescription" : "",
197
- "shape" : "[1, 32, 128, 511]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
@@ -203,14 +203,15 @@
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
206
- "Ios18.matmul" : 6,
207
  "Ios18.expandDims" : 6,
208
- "Ios18.concat" : 18,
 
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
 
214
  "Ios16.reduceL2Norm" : 6,
215
  "Ios18.squeeze" : 6,
216
  "Ios18.reshape" : 12,
@@ -223,9 +224,9 @@
223
  "hasShapeFlexibility" : "0",
224
  "isOptional" : "0",
225
  "dataType" : "Float16",
226
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
227
  "shortDescription" : "",
228
- "shape" : "[1, 4096, 1, 1]",
229
  "name" : "x",
230
  "type" : "MultiArray"
231
  },
@@ -233,9 +234,9 @@
233
  "hasShapeFlexibility" : "0",
234
  "isOptional" : "0",
235
  "dataType" : "Float16",
236
- "formattedType" : "MultiArray (Float16 128 × 1)",
237
  "shortDescription" : "",
238
- "shape" : "[128, 1]",
239
  "name" : "cos",
240
  "type" : "MultiArray"
241
  },
@@ -243,9 +244,9 @@
243
  "hasShapeFlexibility" : "0",
244
  "isOptional" : "0",
245
  "dataType" : "Float16",
246
- "formattedType" : "MultiArray (Float16 128 × 1)",
247
  "shortDescription" : "",
248
- "shape" : "[128, 1]",
249
  "name" : "sin",
250
  "type" : "MultiArray"
251
  },
@@ -253,9 +254,9 @@
253
  "hasShapeFlexibility" : "0",
254
  "isOptional" : "0",
255
  "dataType" : "Float16",
256
- "formattedType" : "MultiArray (Float16 1 × 1 × 1 × 512)",
257
  "shortDescription" : "",
258
- "shape" : "[1, 1, 1, 512]",
259
  "name" : "mask",
260
  "type" : "MultiArray"
261
  },
@@ -263,9 +264,9 @@
263
  "hasShapeFlexibility" : "0",
264
  "isOptional" : "1",
265
  "dataType" : "Float16",
266
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
267
  "shortDescription" : "",
268
- "shape" : "[1, 32, 128, 511]",
269
  "name" : "k_cache_0",
270
  "type" : "MultiArray"
271
  },
@@ -273,9 +274,9 @@
273
  "hasShapeFlexibility" : "0",
274
  "isOptional" : "1",
275
  "dataType" : "Float16",
276
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
277
  "shortDescription" : "",
278
- "shape" : "[1, 32, 128, 511]",
279
  "name" : "v_cache_0",
280
  "type" : "MultiArray"
281
  },
@@ -283,9 +284,9 @@
283
  "hasShapeFlexibility" : "0",
284
  "isOptional" : "1",
285
  "dataType" : "Float16",
286
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
287
  "shortDescription" : "",
288
- "shape" : "[1, 32, 128, 511]",
289
  "name" : "k_cache_1",
290
  "type" : "MultiArray"
291
  },
@@ -293,9 +294,9 @@
293
  "hasShapeFlexibility" : "0",
294
  "isOptional" : "1",
295
  "dataType" : "Float16",
296
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
297
  "shortDescription" : "",
298
- "shape" : "[1, 32, 128, 511]",
299
  "name" : "v_cache_1",
300
  "type" : "MultiArray"
301
  },
@@ -303,9 +304,9 @@
303
  "hasShapeFlexibility" : "0",
304
  "isOptional" : "1",
305
  "dataType" : "Float16",
306
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
307
  "shortDescription" : "",
308
- "shape" : "[1, 32, 128, 511]",
309
  "name" : "k_cache_2",
310
  "type" : "MultiArray"
311
  },
@@ -313,9 +314,9 @@
313
  "hasShapeFlexibility" : "0",
314
  "isOptional" : "1",
315
  "dataType" : "Float16",
316
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)?",
317
  "shortDescription" : "",
318
- "shape" : "[1, 32, 128, 511]",
319
  "name" : "v_cache_2",
320
  "type" : "MultiArray"
321
  }
@@ -330,9 +331,9 @@
330
  "hasShapeFlexibility" : "0",
331
  "isOptional" : "0",
332
  "dataType" : "Float16",
333
- "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 1)",
334
  "shortDescription" : "",
335
- "shape" : "[1, 4096, 1, 1]",
336
  "name" : "new_x",
337
  "type" : "MultiArray"
338
  },
@@ -340,9 +341,9 @@
340
  "hasShapeFlexibility" : "0",
341
  "isOptional" : "0",
342
  "dataType" : "Float16",
343
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
344
  "shortDescription" : "",
345
- "shape" : "[1, 32, 128, 511]",
346
  "name" : "new_k_cache_0",
347
  "type" : "MultiArray"
348
  },
@@ -350,9 +351,9 @@
350
  "hasShapeFlexibility" : "0",
351
  "isOptional" : "0",
352
  "dataType" : "Float16",
353
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
354
  "shortDescription" : "",
355
- "shape" : "[1, 32, 128, 511]",
356
  "name" : "new_k_cache_1",
357
  "type" : "MultiArray"
358
  },
@@ -360,9 +361,9 @@
360
  "hasShapeFlexibility" : "0",
361
  "isOptional" : "0",
362
  "dataType" : "Float16",
363
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
364
  "shortDescription" : "",
365
- "shape" : "[1, 32, 128, 511]",
366
  "name" : "new_k_cache_2",
367
  "type" : "MultiArray"
368
  },
@@ -370,9 +371,9 @@
370
  "hasShapeFlexibility" : "0",
371
  "isOptional" : "0",
372
  "dataType" : "Float16",
373
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
374
  "shortDescription" : "",
375
- "shape" : "[1, 32, 128, 511]",
376
  "name" : "new_v_cache_0",
377
  "type" : "MultiArray"
378
  },
@@ -380,9 +381,9 @@
380
  "hasShapeFlexibility" : "0",
381
  "isOptional" : "0",
382
  "dataType" : "Float16",
383
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
384
  "shortDescription" : "",
385
- "shape" : "[1, 32, 128, 511]",
386
  "name" : "new_v_cache_1",
387
  "type" : "MultiArray"
388
  },
@@ -390,9 +391,9 @@
390
  "hasShapeFlexibility" : "0",
391
  "isOptional" : "0",
392
  "dataType" : "Float16",
393
- "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 511)",
394
  "shortDescription" : "",
395
- "shape" : "[1, 32, 128, 511]",
396
  "name" : "new_v_cache_2",
397
  "type" : "MultiArray"
398
  }
@@ -401,14 +402,15 @@
401
  "mlProgramOperationTypeHistogram" : {
402
  "Ios18.constexprLutToDense" : 21,
403
  "Ios18.conv" : 21,
404
- "Ios18.matmul" : 6,
405
  "Ios18.expandDims" : 6,
 
406
  "Ios18.concat" : 18,
407
  "Ios18.add" : 15,
408
  "Ios18.realDiv" : 6,
409
  "Ios18.silu" : 3,
410
  "Ios18.softmax" : 3,
411
  "Ios18.sliceByIndex" : 18,
 
412
  "Ios16.reduceL2Norm" : 6,
413
  "Ios18.squeeze" : 6,
414
  "Ios18.reshape" : 12,
@@ -419,14 +421,15 @@
419
  "mlProgramOperationTypeHistogram" : {
420
  "Ios18.constexprLutToDense" : 21,
421
  "Ios18.conv" : 21,
422
- "Ios18.matmul" : 6,
423
  "Ios18.expandDims" : 6,
424
- "Ios18.concat" : 18,
 
425
  "Ios18.add" : 15,
426
  "Ios18.realDiv" : 6,
427
  "Ios18.silu" : 3,
428
  "Ios18.softmax" : 3,
429
  "Ios18.sliceByIndex" : 18,
 
430
  "Ios16.reduceL2Norm" : 6,
431
  "Ios18.squeeze" : 6,
432
  "Ios18.reshape" : 12,
@@ -491,7 +494,7 @@
491
  }
492
  ],
493
  "defaultFunctionName" : "input_512_context_512",
494
- "generatedClassName" : "Llama_2_7b_hf_2024_07_02_20_36_17_merged_chunk9",
495
  "userDefinedMetadata" : {
496
 
497
  },
 
17
  "hasShapeFlexibility" : "0",
18
  "isOptional" : "0",
19
  "dataType" : "Float16",
20
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
21
  "shortDescription" : "",
22
+ "shape" : "[1, 32, 128, 508]",
23
  "name" : "new_k_cache_0",
24
  "type" : "MultiArray"
25
  },
 
27
  "hasShapeFlexibility" : "0",
28
  "isOptional" : "0",
29
  "dataType" : "Float16",
30
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
31
  "shortDescription" : "",
32
+ "shape" : "[1, 32, 128, 508]",
33
  "name" : "new_k_cache_1",
34
  "type" : "MultiArray"
35
  },
 
37
  "hasShapeFlexibility" : "0",
38
  "isOptional" : "0",
39
  "dataType" : "Float16",
40
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
41
  "shortDescription" : "",
42
+ "shape" : "[1, 32, 128, 508]",
43
  "name" : "new_k_cache_2",
44
  "type" : "MultiArray"
45
  },
 
47
  "hasShapeFlexibility" : "0",
48
  "isOptional" : "0",
49
  "dataType" : "Float16",
50
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
51
  "shortDescription" : "",
52
+ "shape" : "[1, 32, 508, 128]",
53
  "name" : "new_v_cache_0",
54
  "type" : "MultiArray"
55
  },
 
57
  "hasShapeFlexibility" : "0",
58
  "isOptional" : "0",
59
  "dataType" : "Float16",
60
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
61
  "shortDescription" : "",
62
+ "shape" : "[1, 32, 508, 128]",
63
  "name" : "new_v_cache_1",
64
  "type" : "MultiArray"
65
  },
 
67
  "hasShapeFlexibility" : "0",
68
  "isOptional" : "0",
69
  "dataType" : "Float16",
70
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
71
  "shortDescription" : "",
72
+ "shape" : "[1, 32, 508, 128]",
73
  "name" : "new_v_cache_2",
74
  "type" : "MultiArray"
75
  }
 
142
  "hasShapeFlexibility" : "0",
143
  "isOptional" : "0",
144
  "dataType" : "Float16",
145
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
146
  "shortDescription" : "",
147
+ "shape" : "[1, 32, 128, 508]",
148
  "name" : "new_k_cache_0",
149
  "type" : "MultiArray"
150
  },
 
152
  "hasShapeFlexibility" : "0",
153
  "isOptional" : "0",
154
  "dataType" : "Float16",
155
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
156
  "shortDescription" : "",
157
+ "shape" : "[1, 32, 128, 508]",
158
  "name" : "new_k_cache_1",
159
  "type" : "MultiArray"
160
  },
 
162
  "hasShapeFlexibility" : "0",
163
  "isOptional" : "0",
164
  "dataType" : "Float16",
165
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
166
  "shortDescription" : "",
167
+ "shape" : "[1, 32, 128, 508]",
168
  "name" : "new_k_cache_2",
169
  "type" : "MultiArray"
170
  },
 
172
  "hasShapeFlexibility" : "0",
173
  "isOptional" : "0",
174
  "dataType" : "Float16",
175
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
176
  "shortDescription" : "",
177
+ "shape" : "[1, 32, 508, 128]",
178
  "name" : "new_v_cache_0",
179
  "type" : "MultiArray"
180
  },
 
182
  "hasShapeFlexibility" : "0",
183
  "isOptional" : "0",
184
  "dataType" : "Float16",
185
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
186
  "shortDescription" : "",
187
+ "shape" : "[1, 32, 508, 128]",
188
  "name" : "new_v_cache_1",
189
  "type" : "MultiArray"
190
  },
 
192
  "hasShapeFlexibility" : "0",
193
  "isOptional" : "0",
194
  "dataType" : "Float16",
195
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
196
  "shortDescription" : "",
197
+ "shape" : "[1, 32, 508, 128]",
198
  "name" : "new_v_cache_2",
199
  "type" : "MultiArray"
200
  }
 
203
  "mlProgramOperationTypeHistogram" : {
204
  "Ios18.constexprLutToDense" : 21,
205
  "Ios18.conv" : 21,
 
206
  "Ios18.expandDims" : 6,
207
+ "Ios18.matmul" : 6,
208
+ "Ios18.concat" : 12,
209
  "Ios18.add" : 15,
210
  "Ios18.realDiv" : 6,
211
  "Ios18.silu" : 3,
212
  "Ios18.softmax" : 3,
213
  "Ios18.sliceByIndex" : 18,
214
+ "Ios18.transpose" : 6,
215
  "Ios16.reduceL2Norm" : 6,
216
  "Ios18.squeeze" : 6,
217
  "Ios18.reshape" : 12,
 
224
  "hasShapeFlexibility" : "0",
225
  "isOptional" : "0",
226
  "dataType" : "Float16",
227
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
228
  "shortDescription" : "",
229
+ "shape" : "[1, 4096, 1, 4]",
230
  "name" : "x",
231
  "type" : "MultiArray"
232
  },
 
234
  "hasShapeFlexibility" : "0",
235
  "isOptional" : "0",
236
  "dataType" : "Float16",
237
+ "formattedType" : "MultiArray (Float16 128 × 4)",
238
  "shortDescription" : "",
239
+ "shape" : "[128, 4]",
240
  "name" : "cos",
241
  "type" : "MultiArray"
242
  },
 
244
  "hasShapeFlexibility" : "0",
245
  "isOptional" : "0",
246
  "dataType" : "Float16",
247
+ "formattedType" : "MultiArray (Float16 128 × 4)",
248
  "shortDescription" : "",
249
+ "shape" : "[128, 4]",
250
  "name" : "sin",
251
  "type" : "MultiArray"
252
  },
 
254
  "hasShapeFlexibility" : "0",
255
  "isOptional" : "0",
256
  "dataType" : "Float16",
257
+ "formattedType" : "MultiArray (Float16 1 × 1 × 4 × 512)",
258
  "shortDescription" : "",
259
+ "shape" : "[1, 1, 4, 512]",
260
  "name" : "mask",
261
  "type" : "MultiArray"
262
  },
 
264
  "hasShapeFlexibility" : "0",
265
  "isOptional" : "1",
266
  "dataType" : "Float16",
267
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
268
  "shortDescription" : "",
269
+ "shape" : "[1, 32, 128, 508]",
270
  "name" : "k_cache_0",
271
  "type" : "MultiArray"
272
  },
 
274
  "hasShapeFlexibility" : "0",
275
  "isOptional" : "1",
276
  "dataType" : "Float16",
277
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
278
  "shortDescription" : "",
279
+ "shape" : "[1, 32, 508, 128]",
280
  "name" : "v_cache_0",
281
  "type" : "MultiArray"
282
  },
 
284
  "hasShapeFlexibility" : "0",
285
  "isOptional" : "1",
286
  "dataType" : "Float16",
287
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
288
  "shortDescription" : "",
289
+ "shape" : "[1, 32, 128, 508]",
290
  "name" : "k_cache_1",
291
  "type" : "MultiArray"
292
  },
 
294
  "hasShapeFlexibility" : "0",
295
  "isOptional" : "1",
296
  "dataType" : "Float16",
297
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
298
  "shortDescription" : "",
299
+ "shape" : "[1, 32, 508, 128]",
300
  "name" : "v_cache_1",
301
  "type" : "MultiArray"
302
  },
 
304
  "hasShapeFlexibility" : "0",
305
  "isOptional" : "1",
306
  "dataType" : "Float16",
307
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)?",
308
  "shortDescription" : "",
309
+ "shape" : "[1, 32, 128, 508]",
310
  "name" : "k_cache_2",
311
  "type" : "MultiArray"
312
  },
 
314
  "hasShapeFlexibility" : "0",
315
  "isOptional" : "1",
316
  "dataType" : "Float16",
317
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)?",
318
  "shortDescription" : "",
319
+ "shape" : "[1, 32, 508, 128]",
320
  "name" : "v_cache_2",
321
  "type" : "MultiArray"
322
  }
 
331
  "hasShapeFlexibility" : "0",
332
  "isOptional" : "0",
333
  "dataType" : "Float16",
334
+ "formattedType" : "MultiArray (Float16 1 × 4096 × 1 × 4)",
335
  "shortDescription" : "",
336
+ "shape" : "[1, 4096, 1, 4]",
337
  "name" : "new_x",
338
  "type" : "MultiArray"
339
  },
 
341
  "hasShapeFlexibility" : "0",
342
  "isOptional" : "0",
343
  "dataType" : "Float16",
344
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
345
  "shortDescription" : "",
346
+ "shape" : "[1, 32, 128, 508]",
347
  "name" : "new_k_cache_0",
348
  "type" : "MultiArray"
349
  },
 
351
  "hasShapeFlexibility" : "0",
352
  "isOptional" : "0",
353
  "dataType" : "Float16",
354
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
355
  "shortDescription" : "",
356
+ "shape" : "[1, 32, 128, 508]",
357
  "name" : "new_k_cache_1",
358
  "type" : "MultiArray"
359
  },
 
361
  "hasShapeFlexibility" : "0",
362
  "isOptional" : "0",
363
  "dataType" : "Float16",
364
+ "formattedType" : "MultiArray (Float16 1 × 32 × 128 × 508)",
365
  "shortDescription" : "",
366
+ "shape" : "[1, 32, 128, 508]",
367
  "name" : "new_k_cache_2",
368
  "type" : "MultiArray"
369
  },
 
371
  "hasShapeFlexibility" : "0",
372
  "isOptional" : "0",
373
  "dataType" : "Float16",
374
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
375
  "shortDescription" : "",
376
+ "shape" : "[1, 32, 508, 128]",
377
  "name" : "new_v_cache_0",
378
  "type" : "MultiArray"
379
  },
 
381
  "hasShapeFlexibility" : "0",
382
  "isOptional" : "0",
383
  "dataType" : "Float16",
384
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
385
  "shortDescription" : "",
386
+ "shape" : "[1, 32, 508, 128]",
387
  "name" : "new_v_cache_1",
388
  "type" : "MultiArray"
389
  },
 
391
  "hasShapeFlexibility" : "0",
392
  "isOptional" : "0",
393
  "dataType" : "Float16",
394
+ "formattedType" : "MultiArray (Float16 1 × 32 × 508 × 128)",
395
  "shortDescription" : "",
396
+ "shape" : "[1, 32, 508, 128]",
397
  "name" : "new_v_cache_2",
398
  "type" : "MultiArray"
399
  }
 
402
  "mlProgramOperationTypeHistogram" : {
403
  "Ios18.constexprLutToDense" : 21,
404
  "Ios18.conv" : 21,
 
405
  "Ios18.expandDims" : 6,
406
+ "Ios18.matmul" : 6,
407
  "Ios18.concat" : 18,
408
  "Ios18.add" : 15,
409
  "Ios18.realDiv" : 6,
410
  "Ios18.silu" : 3,
411
  "Ios18.softmax" : 3,
412
  "Ios18.sliceByIndex" : 18,
413
+ "Ios18.transpose" : 6,
414
  "Ios16.reduceL2Norm" : 6,
415
  "Ios18.squeeze" : 6,
416
  "Ios18.reshape" : 12,
 
421
  "mlProgramOperationTypeHistogram" : {
422
  "Ios18.constexprLutToDense" : 21,
423
  "Ios18.conv" : 21,
 
424
  "Ios18.expandDims" : 6,
425
+ "Ios18.matmul" : 6,
426
+ "Ios18.concat" : 12,
427
  "Ios18.add" : 15,
428
  "Ios18.realDiv" : 6,
429
  "Ios18.silu" : 3,
430
  "Ios18.softmax" : 3,
431
  "Ios18.sliceByIndex" : 18,
432
+ "Ios18.transpose" : 6,
433
  "Ios16.reduceL2Norm" : 6,
434
  "Ios18.squeeze" : 6,
435
  "Ios18.reshape" : 12,
 
494
  }
495
  ],
496
  "defaultFunctionName" : "input_512_context_512",
497
+ "generatedClassName" : "Llama_2_7b_hf_2024_07_17_19_34_17_merged_chunk9",
498
  "userDefinedMetadata" : {
499
 
500
  },
sequoia/Llama-2-7b-hf_chunk9.mlmodelc/model.mil CHANGED
The diff for this file is too large to render. See raw diff
 
sequoia/logit-processor.mlmodelc/analytics/coremldata.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74983de62535c92d990c8a01bc57449459a6ac9a8263109616a338cdee40e8a2
3
  size 243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d9433176e6a80c761281219743bd081fc1f935801c88c62bfff49d064c7d7c
3
  size 243