Upload 6 files

Browse files

Files changed (6) hide show

unet/config.json +70 -0
unet/coreml_model.mlmodelc/analytics/coremldata.bin +3 -0
unet/coreml_model.mlmodelc/coremldata.bin +3 -0
unet/coreml_model.mlmodelc/metadata.json +225 -0
unet/coreml_model.mlmodelc/model.mil +0 -0
unet/coreml_model.mlmodelc/weights/weight.bin +3 -0

unet/config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.19.0.dev0",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 2048,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 128,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "up_block_types": [
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "UpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}

unet/coreml_model.mlmodelc/analytics/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5c09e8cba81e2f8c4c5ff155c5f8a41ae5d2271e6e040c849c6d00f66087346
+size 243

unet/coreml_model.mlmodelc/coremldata.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d57b9bd69997d46bf280e2ef570a2b2e2b3238c24ef6bb55ca8bf25baa2fb5ec
+size 2081

unet/coreml_model.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,225 @@

+[
+  {
+    "shortDescription" : "Stable Diffusion generates images conditioned on text or other images as input through the diffusion process. Please refer to https:\/\/arxiv.org\/abs\/2112.10752 for details.",
+    "metadataOutputVersion" : "3.0",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32 1 × 4 × 128 × 128)",
+        "shortDescription" : "Same shape and dtype as the `sample` input. The predicted noise to facilitate the reverse diffusion (denoising) process",
+        "shape" : "[1, 4, 128, 128]",
+        "name" : "noise_pred",
+        "type" : "MultiArray"
+      }
+    ],
+    "version" : "stabilityai\/stable-diffusion-xl-base-1.0",
+    "modelParameters" : [
+    ],
+    "author" : "Please refer to the Model Card available at huggingface.co\/stabilityai\/stable-diffusion-xl-base-1.0",
+    "specificationVersion" : 7,
+    "storagePrecision" : "Mixed (Float16, Palettized (6 bits))",
+    "license" : "OpenRAIL++-M (https:\/\/huggingface.co\/stabilityai\/stable-diffusion-xl-base-1.0\/blob\/main\/LICENSE.md)",
+    "mlProgramOperationTypeHistogram" : {
+      "UpsampleNearestNeighbor" : 2,
+      "Ios16.reduceMean" : 92,
+      "Ios16.sin" : 2,
+      "Ios16.softmax" : 140,
+      "Split" : 70,
+      "Ios16.add" : 312,
+      "Concat" : 14,
+      "Ios16.realDiv" : 46,
+      "Ios16.square" : 46,
+      "ExpandDims" : 6,
+      "Ios16.sub" : 46,
+      "Ios16.cast" : 1,
+      "Ios16.conv" : 794,
+      "Ios16.constexprLutToDense" : 870,
+      "Ios16.gelu" : 70,
+      "Ios16.layerNorm" : 210,
+      "Ios16.batchNorm" : 46,
+      "Ios16.matmul" : 280,
+      "Ios16.reshape" : 676,
+      "Ios16.silu" : 38,
+      "Ios16.sqrt" : 46,
+      "Ios16.mul" : 212,
+      "Ios16.cos" : 2,
+      "SliceByIndex" : 4
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "availability" : {
+      "macOS" : "13.0",
+      "tvOS" : "16.0",
+      "visionOS" : "1.0",
+      "watchOS" : "9.0",
+      "iOS" : "16.0",
+      "macCatalyst" : "16.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 4 × 128 × 128)",
+        "shortDescription" : "The low resolution latent feature maps being denoised through reverse diffusion",
+        "shape" : "[1, 4, 128, 128]",
+        "name" : "sample",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1)",
+        "shortDescription" : "A value emitted by the associated scheduler object to condition the model on a given noise schedule",
+        "shape" : "[1]",
+        "name" : "timestep",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 2048 × 1 × 77)",
+        "shortDescription" : "Output embeddings from the associated text_encoder model to condition to generated image on text. A maximum of 77 tokens (~40 words) are allowed. Longer text is truncated. Shorter text does not reduce computation.",
+        "shape" : "[1, 2048, 1, 77]",
+        "name" : "encoder_hidden_states",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 6)",
+        "shortDescription" : "Additional embeddings that if specified are added to the embeddings that are passed along to the UNet blocks.",
+        "shape" : "[1, 6]",
+        "name" : "time_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1280)",
+        "shortDescription" : "Additional embeddings from text_encoder_2 that if specified are added to the embeddings that are passed along to the UNet blocks.",
+        "shape" : "[1, 1280]",
+        "name" : "text_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 320 × 128 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 320, 128, 128]",
+        "name" : "additional_residual_0",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 320 × 128 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 320, 128, 128]",
+        "name" : "additional_residual_1",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 320 × 128 × 128)",
+        "shortDescription" : "",
+        "shape" : "[1, 320, 128, 128]",
+        "name" : "additional_residual_2",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 320 × 64 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 320, 64, 64]",
+        "name" : "additional_residual_3",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 640 × 64 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 640, 64, 64]",
+        "name" : "additional_residual_4",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 640 × 64 × 64)",
+        "shortDescription" : "",
+        "shape" : "[1, 640, 64, 64]",
+        "name" : "additional_residual_5",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 640 × 32 × 32)",
+        "shortDescription" : "",
+        "shape" : "[1, 640, 32, 32]",
+        "name" : "additional_residual_6",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1280 × 32 × 32)",
+        "shortDescription" : "",
+        "shape" : "[1, 1280, 32, 32]",
+        "name" : "additional_residual_7",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1280 × 32 × 32)",
+        "shortDescription" : "",
+        "shape" : "[1, 1280, 32, 32]",
+        "name" : "additional_residual_8",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1280 × 32 × 32)",
+        "shortDescription" : "",
+        "shape" : "[1, 1280, 32, 32]",
+        "name" : "additional_residual_9",
+        "type" : "MultiArray"
+      }
+    ],
+    "userDefinedMetadata" : {
+      "com.github.apple.ml-stable-diffusion.version" : "1.1.0",
+      "com.github.apple.coremltools.source" : "torch==2.2.0",
+      "com.github.apple.coremltools.version" : "7.2",
+      "com.github.apple.coremltools.source_dialect" : "TorchScript"
+    },
+    "generatedClassName" : "Stable_Diffusion_version_stabilityai_stable_diffusion_xl_base_1_0_control_unet",
+    "method" : "predict"
+  }
+]

unet/coreml_model.mlmodelc/model.mil ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/coreml_model.mlmodelc/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e4d55b05fb4422fb0c02291c56f2e5a00bd86708f093c273331df221c3e2c5b
+size 1927032512