Add image variation components

#8
by williamberman - opened
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 224
26
+ }
27
+ }
image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "quick_gelu",
9
+ "hidden_size": 1024,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 24,
19
+ "patch_size": 14,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1"
23
+ }
image_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c304e9a1c99835f6c2d355cbb5438e7e6204d4903b2f1b056120be4bd5050f1f
3
+ size 1215996977
model_index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_class_name": "UnCLIPPipeline",
3
- "_diffusers_version": "0.11.0.dev0",
4
  "decoder": [
5
  "diffusers",
6
  "UNet2DConditionModel"
@@ -40,5 +40,13 @@
40
  "tokenizer": [
41
  "transformers",
42
  "CLIPTokenizer"
 
 
 
 
 
 
 
 
43
  ]
44
  }
1
  {
2
  "_class_name": "UnCLIPPipeline",
3
+ "_diffusers_version": "0.13.0.dev0",
4
  "decoder": [
5
  "diffusers",
6
  "UNet2DConditionModel"
40
  "tokenizer": [
41
  "transformers",
42
  "CLIPTokenizer"
43
+ ],
44
+ "feature_extractor": [
45
+ "transformers",
46
+ "CLIPImageProcessor"
47
+ ],
48
+ "image_encoder": [
49
+ "transformers",
50
+ "CLIPVisionModelWithProjection"
51
  ]
52
  }