Vivek Bhakta commited on
Commit
de31d33
1 Parent(s): db6b744

Mixed bit palletization & compiled mlpackages

Browse files
Files changed (37) hide show
  1. mlpackages/TextEncoder.mlmodelc/analytics/coremldata.bin +3 -0
  2. mlpackages/TextEncoder.mlmodelc/coremldata.bin +3 -0
  3. mlpackages/TextEncoder.mlmodelc/metadata.json +82 -0
  4. mlpackages/TextEncoder.mlmodelc/model.mil +0 -0
  5. mlpackages/TextEncoder.mlmodelc/weights/weight.bin +3 -0
  6. mlpackages/TextEncoder2.mlmodelc/analytics/coremldata.bin +3 -0
  7. mlpackages/TextEncoder2.mlmodelc/coremldata.bin +3 -0
  8. mlpackages/TextEncoder2.mlmodelc/metadata.json +82 -0
  9. mlpackages/TextEncoder2.mlmodelc/model.mil +0 -0
  10. mlpackages/TextEncoder2.mlmodelc/weights/weight.bin +3 -0
  11. mlpackages/Unet.mlmodelc/analytics/coremldata.bin +3 -0
  12. mlpackages/Unet.mlmodelc/coremldata.bin +3 -0
  13. mlpackages/Unet.mlmodelc/metadata.json +124 -0
  14. mlpackages/Unet.mlmodelc/model.mil +0 -0
  15. mlpackages/Unet.mlmodelc/weights/weight.bin +3 -0
  16. mlpackages/VAEDecoder.mlmodelc/analytics/coremldata.bin +3 -0
  17. mlpackages/VAEDecoder.mlmodelc/coremldata.bin +3 -0
  18. mlpackages/VAEDecoder.mlmodelc/metadata.json +74 -0
  19. mlpackages/VAEDecoder.mlmodelc/model.mil +0 -0
  20. mlpackages/VAEDecoder.mlmodelc/weights/weight.bin +3 -0
  21. mlpackages/VAEEncoder.mlmodelc/analytics/coremldata.bin +3 -0
  22. mlpackages/VAEEncoder.mlmodelc/coremldata.bin +3 -0
  23. mlpackages/VAEEncoder.mlmodelc/metadata.json +75 -0
  24. mlpackages/VAEEncoder.mlmodelc/model.mil +0 -0
  25. mlpackages/VAEEncoder.mlmodelc/weights/weight.bin +3 -0
  26. mlpackages/merges.txt +0 -0
  27. mlpackages/vocab.json +0 -0
  28. model_index.json +22 -0
  29. scheduler/scheduler_config.json +22 -0
  30. tokenizer/merges.txt +0 -0
  31. tokenizer/special_tokens_map.json +24 -0
  32. tokenizer/tokenizer_config.json +30 -0
  33. tokenizer/vocab.json +0 -0
  34. tokenizer_2/merges.txt +0 -0
  35. tokenizer_2/special_tokens_map.json +24 -0
  36. tokenizer_2/tokenizer_config.json +38 -0
  37. tokenizer_2/vocab.json +0 -0
mlpackages/TextEncoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f732919fa370a1b7b09ec2b227539269b6543149a2b0dbae95cc4cf350e4b697
3
+ size 207
mlpackages/TextEncoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff26866d8d8fbb4e53a0628f8aab5f7edf1b3ec763a96e6812c8f7fbf4c9827
3
+ size 825
mlpackages/TextEncoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Stable Diffusion generates images conditioned on text and\/or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "Hidden states after the encoder layers",
12
+ "shape" : "[]",
13
+ "name" : "hidden_embeds",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32)",
21
+ "shortDescription" : "The version of the `last_hidden_state` output after pooling",
22
+ "shape" : "[]",
23
+ "name" : "pooled_outputs",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "version" : "diffusers\/stable-diffusion-xl-base-1.0",
28
+ "modelParameters" : [
29
+
30
+ ],
31
+ "author" : "Please refer to the Model Card available at huggingface.co\/diffusers\/stable-diffusion-xl-base-1.0",
32
+ "specificationVersion" : 7,
33
+ "storagePrecision" : "Float16",
34
+ "license" : "OpenRAIL (https:\/\/huggingface.co\/spaces\/CompVis\/stable-diffusion-license)",
35
+ "mlProgramOperationTypeHistogram" : {
36
+ "Ios16.cast" : 3,
37
+ "Ios16.mul" : 36,
38
+ "Ios16.layerNorm" : 25,
39
+ "Stack" : 1,
40
+ "Transpose" : 60,
41
+ "Ios16.sigmoid" : 12,
42
+ "Ios16.linear" : 72,
43
+ "Ios16.add" : 37,
44
+ "Ios16.matmul" : 24,
45
+ "Ios16.softmax" : 12,
46
+ "Ios16.gatherNd" : 1,
47
+ "Ios16.gather" : 1,
48
+ "Ios16.reshape" : 120,
49
+ "Ios16.reduceArgmax" : 1
50
+ },
51
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
52
+ "isUpdatable" : "0",
53
+ "availability" : {
54
+ "macOS" : "13.0",
55
+ "tvOS" : "16.0",
56
+ "watchOS" : "9.0",
57
+ "iOS" : "16.0",
58
+ "macCatalyst" : "16.0"
59
+ },
60
+ "modelType" : {
61
+ "name" : "MLModelType_mlProgram"
62
+ },
63
+ "inputSchema" : [
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Float32",
68
+ "formattedType" : "MultiArray (Float32 1 × 77)",
69
+ "shortDescription" : "The token ids that represent the input text",
70
+ "shape" : "[1, 77]",
71
+ "name" : "input_ids",
72
+ "type" : "MultiArray"
73
+ }
74
+ ],
75
+ "userDefinedMetadata" : {
76
+ "com.github.apple.coremltools.version" : "7.0b1",
77
+ "com.github.apple.coremltools.source" : "torch==2.0.1+cu117"
78
+ },
79
+ "generatedClassName" : "Stable_Diffusion_version_diffusers_stable_diffusion_xl_base_1_0_text_encoder",
80
+ "method" : "predict"
81
+ }
82
+ ]
mlpackages/TextEncoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/TextEncoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86533724aadf50c8f5539592d440887a484f60002d7967505c69c7faf4d7797
3
+ size 246145536
mlpackages/TextEncoder2.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef66b480388714bb62f2f0f2f97a8953e44acbb00b25f9a9fd63c759f4f0e83
3
+ size 207
mlpackages/TextEncoder2.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ff26866d8d8fbb4e53a0628f8aab5f7edf1b3ec763a96e6812c8f7fbf4c9827
3
+ size 825
mlpackages/TextEncoder2.mlmodelc/metadata.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Stable Diffusion generates images conditioned on text and\/or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "Hidden states after the encoder layers",
12
+ "shape" : "[]",
13
+ "name" : "hidden_embeds",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32)",
21
+ "shortDescription" : "The version of the `last_hidden_state` output after pooling",
22
+ "shape" : "[]",
23
+ "name" : "pooled_outputs",
24
+ "type" : "MultiArray"
25
+ }
26
+ ],
27
+ "version" : "diffusers\/stable-diffusion-xl-base-1.0",
28
+ "modelParameters" : [
29
+
30
+ ],
31
+ "author" : "Please refer to the Model Card available at huggingface.co\/diffusers\/stable-diffusion-xl-base-1.0",
32
+ "specificationVersion" : 7,
33
+ "storagePrecision" : "Float16",
34
+ "license" : "OpenRAIL (https:\/\/huggingface.co\/spaces\/CompVis\/stable-diffusion-license)",
35
+ "mlProgramOperationTypeHistogram" : {
36
+ "Ios16.cast" : 3,
37
+ "Ios16.mul" : 32,
38
+ "Ios16.layerNorm" : 65,
39
+ "Stack" : 1,
40
+ "Transpose" : 160,
41
+ "Ios16.linear" : 193,
42
+ "Ios16.add" : 97,
43
+ "Ios16.matmul" : 64,
44
+ "Ios16.gelu" : 32,
45
+ "Ios16.softmax" : 32,
46
+ "Ios16.gatherNd" : 1,
47
+ "Ios16.gather" : 1,
48
+ "Ios16.reshape" : 320,
49
+ "Ios16.reduceArgmax" : 1
50
+ },
51
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
52
+ "isUpdatable" : "0",
53
+ "availability" : {
54
+ "macOS" : "13.0",
55
+ "tvOS" : "16.0",
56
+ "watchOS" : "9.0",
57
+ "iOS" : "16.0",
58
+ "macCatalyst" : "16.0"
59
+ },
60
+ "modelType" : {
61
+ "name" : "MLModelType_mlProgram"
62
+ },
63
+ "inputSchema" : [
64
+ {
65
+ "hasShapeFlexibility" : "0",
66
+ "isOptional" : "0",
67
+ "dataType" : "Float32",
68
+ "formattedType" : "MultiArray (Float32 1 × 77)",
69
+ "shortDescription" : "The token ids that represent the input text",
70
+ "shape" : "[1, 77]",
71
+ "name" : "input_ids",
72
+ "type" : "MultiArray"
73
+ }
74
+ ],
75
+ "userDefinedMetadata" : {
76
+ "com.github.apple.coremltools.version" : "7.0b1",
77
+ "com.github.apple.coremltools.source" : "torch==2.0.1+cu117"
78
+ },
79
+ "generatedClassName" : "Stable_Diffusion_version_diffusers_stable_diffusion_xl_base_1_0_text_encoder_2",
80
+ "method" : "predict"
81
+ }
82
+ ]
mlpackages/TextEncoder2.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/TextEncoder2.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd1fc0bcce11cff685648387b0060e0b6ecfce6c34e580e1ae904cae5903363
3
+ size 1389367424
mlpackages/Unet.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f726f7f29091a23eb434bd4febf1f83733c95db26b912dfa671cdccad5d874
3
+ size 243
mlpackages/Unet.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aea3887ffdc8e059925f3981259e1cd3227b827e5f91edff613c73ac0ea16f6
3
+ size 1338
mlpackages/Unet.mlmodelc/metadata.json ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Stable Diffusion generates images conditioned on text or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "Same shape and dtype as the `sample` input. The predicted noise to facilitate the reverse diffusion (denoising) process",
12
+ "shape" : "[]",
13
+ "name" : "noise_pred",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "version" : "diffusers\/stable-diffusion-xl-base-1.0",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Please refer to the Model Card available at huggingface.co\/diffusers\/stable-diffusion-xl-base-1.0",
22
+ "specificationVersion" : 7,
23
+ "storagePrecision" : "Mixed (Float16, Palettized (1 bits), Palettized (2 bits), Palettized (4 bits), Palettized (6 bits), Palettized (8 bits))",
24
+ "license" : "OpenRAIL (https:\/\/huggingface.co\/spaces\/CompVis\/stable-diffusion-license)",
25
+ "mlProgramOperationTypeHistogram" : {
26
+ "UpsampleNearestNeighbor" : 2,
27
+ "Ios16.reduceMean" : 512,
28
+ "Ios16.sin" : 2,
29
+ "Ios16.softmax" : 140,
30
+ "Split" : 70,
31
+ "Ios16.add" : 722,
32
+ "Concat" : 14,
33
+ "Ios16.realDiv" : 46,
34
+ "Ios16.square" : 46,
35
+ "ExpandDims" : 6,
36
+ "Ios16.sub" : 256,
37
+ "Ios16.cast" : 1,
38
+ "Ios16.conv" : 794,
39
+ "Ios16.constexprLutToDense" : 775,
40
+ "Ios16.gelu" : 70,
41
+ "Ios16.matmul" : 280,
42
+ "Ios16.batchNorm" : 46,
43
+ "Ios16.reshape" : 675,
44
+ "Ios16.rsqrt" : 210,
45
+ "Ios16.silu" : 38,
46
+ "Ios16.sqrt" : 46,
47
+ "Ios16.mul" : 842,
48
+ "Ios16.cos" : 2,
49
+ "SliceByIndex" : 4
50
+ },
51
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
52
+ "isUpdatable" : "0",
53
+ "availability" : {
54
+ "macOS" : "13.0",
55
+ "tvOS" : "16.0",
56
+ "visionOS" : "1.0",
57
+ "watchOS" : "9.0",
58
+ "iOS" : "16.0",
59
+ "macCatalyst" : "16.0"
60
+ },
61
+ "modelType" : {
62
+ "name" : "MLModelType_mlProgram"
63
+ },
64
+ "inputSchema" : [
65
+ {
66
+ "hasShapeFlexibility" : "0",
67
+ "isOptional" : "0",
68
+ "dataType" : "Float16",
69
+ "formattedType" : "MultiArray (Float16 2 × 4 × 128 × 128)",
70
+ "shortDescription" : "The low resolution latent feature maps being denoised through reverse diffusion",
71
+ "shape" : "[2, 4, 128, 128]",
72
+ "name" : "sample",
73
+ "type" : "MultiArray"
74
+ },
75
+ {
76
+ "hasShapeFlexibility" : "0",
77
+ "isOptional" : "0",
78
+ "dataType" : "Float16",
79
+ "formattedType" : "MultiArray (Float16 2)",
80
+ "shortDescription" : "A value emitted by the associated scheduler object to condition the model on a given noise schedule",
81
+ "shape" : "[2]",
82
+ "name" : "timestep",
83
+ "type" : "MultiArray"
84
+ },
85
+ {
86
+ "hasShapeFlexibility" : "0",
87
+ "isOptional" : "0",
88
+ "dataType" : "Float16",
89
+ "formattedType" : "MultiArray (Float16 2 × 2048 × 1 × 77)",
90
+ "shortDescription" : "Output embeddings from the associated text_encoder model to condition to generated image on text. A maximum of 77 tokens (~40 words) are allowed. Longer text is truncated. Shorter text does not reduce computation.",
91
+ "shape" : "[2, 2048, 1, 77]",
92
+ "name" : "encoder_hidden_states",
93
+ "type" : "MultiArray"
94
+ },
95
+ {
96
+ "hasShapeFlexibility" : "0",
97
+ "isOptional" : "0",
98
+ "dataType" : "Float16",
99
+ "formattedType" : "MultiArray (Float16 12)",
100
+ "shortDescription" : "",
101
+ "shape" : "[12]",
102
+ "name" : "time_ids",
103
+ "type" : "MultiArray"
104
+ },
105
+ {
106
+ "hasShapeFlexibility" : "0",
107
+ "isOptional" : "0",
108
+ "dataType" : "Float16",
109
+ "formattedType" : "MultiArray (Float16 2 × 1280)",
110
+ "shortDescription" : "",
111
+ "shape" : "[2, 1280]",
112
+ "name" : "text_embeds",
113
+ "type" : "MultiArray"
114
+ }
115
+ ],
116
+ "userDefinedMetadata" : {
117
+ "com.github.apple.coremltools.version" : "7.0b1",
118
+ "com.github.apple.coremltools.source" : "torch==2.0.1+cu117",
119
+ "com.github.apple.ml-stable-diffusion.version" : "1.0.0"
120
+ },
121
+ "generatedClassName" : "recipe_4_50_bit_mixedpalette",
122
+ "method" : "predict"
123
+ }
124
+ ]
mlpackages/Unet.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/Unet.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9219e9fcaa60ff76a31ea84658b485264b50f66147c058e18a7cfd456bb0f3a
3
+ size 1450049728
mlpackages/VAEDecoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac2854504d7f6bfdc5645982dd965cca9cc8c12b9fdd2493cf50cd583684cc2
3
+ size 207
mlpackages/VAEDecoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766fce2585587fe93a1d32e09bc4d63ad45335ea62239f11b193e481b5888258
3
+ size 773
mlpackages/VAEDecoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Stable Diffusion generates images conditioned on text and\/or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "Generated image normalized to range [-1, 1]",
12
+ "shape" : "[]",
13
+ "name" : "image",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "version" : "diffusers\/stable-diffusion-xl-base-1.0",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Please refer to the Model Card available at huggingface.co\/diffusers\/stable-diffusion-xl-base-1.0",
22
+ "specificationVersion" : 7,
23
+ "storagePrecision" : "Float32",
24
+ "license" : "OpenRAIL (https:\/\/huggingface.co\/spaces\/CompVis\/stable-diffusion-license)",
25
+ "mlProgramOperationTypeHistogram" : {
26
+ "Ios16.mul" : 2,
27
+ "Ios16.sqrt" : 30,
28
+ "Ios16.sub" : 30,
29
+ "Transpose" : 6,
30
+ "UpsampleNearestNeighbor" : 3,
31
+ "Ios16.conv" : 36,
32
+ "Ios16.add" : 46,
33
+ "Ios16.linear" : 4,
34
+ "Ios16.matmul" : 2,
35
+ "Ios16.realDiv" : 30,
36
+ "Ios16.reduceMean" : 60,
37
+ "Ios16.softmax" : 1,
38
+ "Ios16.batchNorm" : 29,
39
+ "Ios16.square" : 30,
40
+ "Ios16.reshape" : 65,
41
+ "Ios16.silu" : 29
42
+ },
43
+ "computePrecision" : "Mixed (Float32, Int32)",
44
+ "isUpdatable" : "0",
45
+ "availability" : {
46
+ "macOS" : "13.0",
47
+ "tvOS" : "16.0",
48
+ "watchOS" : "9.0",
49
+ "iOS" : "16.0",
50
+ "macCatalyst" : "16.0"
51
+ },
52
+ "modelType" : {
53
+ "name" : "MLModelType_mlProgram"
54
+ },
55
+ "inputSchema" : [
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 1 × 4 × 128 × 128)",
61
+ "shortDescription" : "The denoised latent embeddings from the unet model after the last step of reverse diffusion",
62
+ "shape" : "[1, 4, 128, 128]",
63
+ "name" : "z",
64
+ "type" : "MultiArray"
65
+ }
66
+ ],
67
+ "userDefinedMetadata" : {
68
+ "com.github.apple.coremltools.version" : "7.0b1",
69
+ "com.github.apple.coremltools.source" : "torch==2.0.1"
70
+ },
71
+ "generatedClassName" : "Stable_Diffusion_version_diffusers_stable_diffusion_xl_base_1_0_vae_decoder",
72
+ "method" : "predict"
73
+ }
74
+ ]
mlpackages/VAEDecoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/VAEDecoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ade814d6037fb5ba892963be1596c8e37852f96c399101401831f8c07e64bd2
3
+ size 197977216
mlpackages/VAEEncoder.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e44297c358d64101602d9abfe4d6c9fb96ddb3b120f84fbb74001aa4312cf93
3
+ size 207
mlpackages/VAEEncoder.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7269e365034d061c3ad035d9a5b7c38864d7db71bee6fb7545c97a4942a865f1
3
+ size 783
mlpackages/VAEEncoder.mlmodelc/metadata.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "shortDescription" : "Stable Diffusion generates images conditioned on text and\/or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
4
+ "metadataOutputVersion" : "3.0",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32)",
11
+ "shortDescription" : "The latent embeddings from the unet model from the input image.",
12
+ "shape" : "[]",
13
+ "name" : "latent",
14
+ "type" : "MultiArray"
15
+ }
16
+ ],
17
+ "version" : "diffusers\/stable-diffusion-xl-base-1.0",
18
+ "modelParameters" : [
19
+
20
+ ],
21
+ "author" : "Please refer to the Model Card available at huggingface.co\/diffusers\/stable-diffusion-xl-base-1.0",
22
+ "specificationVersion" : 7,
23
+ "storagePrecision" : "Float16",
24
+ "license" : "OpenRAIL (https:\/\/huggingface.co\/spaces\/CompVis\/stable-diffusion-license)",
25
+ "mlProgramOperationTypeHistogram" : {
26
+ "Pad" : 3,
27
+ "Ios16.cast" : 1,
28
+ "Ios16.mul" : 2,
29
+ "Ios16.sqrt" : 22,
30
+ "Ios16.sub" : 22,
31
+ "Transpose" : 6,
32
+ "Ios16.conv" : 28,
33
+ "Ios16.add" : 34,
34
+ "Ios16.linear" : 4,
35
+ "Ios16.matmul" : 2,
36
+ "Ios16.realDiv" : 22,
37
+ "Ios16.reduceMean" : 44,
38
+ "Ios16.softmax" : 1,
39
+ "Ios16.batchNorm" : 21,
40
+ "Ios16.square" : 22,
41
+ "Ios16.reshape" : 49,
42
+ "Ios16.silu" : 21
43
+ },
44
+ "computePrecision" : "Mixed (Float16, Float32, Int32)",
45
+ "isUpdatable" : "0",
46
+ "availability" : {
47
+ "macOS" : "13.0",
48
+ "tvOS" : "16.0",
49
+ "watchOS" : "9.0",
50
+ "iOS" : "16.0",
51
+ "macCatalyst" : "16.0"
52
+ },
53
+ "modelType" : {
54
+ "name" : "MLModelType_mlProgram"
55
+ },
56
+ "inputSchema" : [
57
+ {
58
+ "hasShapeFlexibility" : "0",
59
+ "isOptional" : "0",
60
+ "dataType" : "Float16",
61
+ "formattedType" : "MultiArray (Float16 1 × 3 × 1024 × 1024)",
62
+ "shortDescription" : "The input image to base the initial latents on normalized to range [-1, 1]",
63
+ "shape" : "[1, 3, 1024, 1024]",
64
+ "name" : "z",
65
+ "type" : "MultiArray"
66
+ }
67
+ ],
68
+ "userDefinedMetadata" : {
69
+ "com.github.apple.coremltools.version" : "7.0b1",
70
+ "com.github.apple.coremltools.source" : "torch==2.0.1+cu117"
71
+ },
72
+ "generatedClassName" : "Stable_Diffusion_version_diffusers_stable_diffusion_xl_base_1_0_vae_encoder",
73
+ "method" : "predict"
74
+ }
75
+ ]
mlpackages/VAEEncoder.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/VAEEncoder.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:329f708df0bae1990a1886007b5ae56cfd9a44e7091e8f822db907a9fc411858
3
+ size 68338112
mlpackages/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlpackages/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
model_index.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionXLMinimalPipeline",
3
+ "_diffusers_version": "0.27.2",
4
+ "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
5
+ "feature_extractor": [
6
+ null,
7
+ null
8
+ ],
9
+ "force_zeros_for_empty_prompt": true,
10
+ "scheduler": [
11
+ "diffusers",
12
+ "EulerDiscreteScheduler"
13
+ ],
14
+ "tokenizer": [
15
+ "transformers",
16
+ "CLIPTokenizer"
17
+ ],
18
+ "tokenizer_2": [
19
+ "transformers",
20
+ "CLIPTokenizer"
21
+ ]
22
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.27.2",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "interpolation_type": "linear",
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "epsilon",
11
+ "rescale_betas_zero_snr": false,
12
+ "sample_max_value": 1.0,
13
+ "set_alpha_to_one": false,
14
+ "sigma_max": null,
15
+ "sigma_min": null,
16
+ "skip_prk_steps": true,
17
+ "steps_offset": 1,
18
+ "timestep_spacing": "leading",
19
+ "timestep_type": "discrete",
20
+ "trained_betas": null,
21
+ "use_karras_sigmas": false
22
+ }
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "bos_token": "<|startoftext|>",
22
+ "clean_up_tokenization_spaces": true,
23
+ "do_lower_case": true,
24
+ "eos_token": "<|endoftext|>",
25
+ "errors": "replace",
26
+ "model_max_length": 77,
27
+ "pad_token": "<|endoftext|>",
28
+ "tokenizer_class": "CLIPTokenizer",
29
+ "unk_token": "<|endoftext|>"
30
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_2/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer_2/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>"
38
+ }
tokenizer_2/vocab.json ADDED
The diff for this file is too large to render. See raw diff