patrickvonplaten commited on
Commit
c9c864b
1 Parent(s): 0b1fa07

[Celeba-256] Upload first model

Browse files
generated_image.png ADDED
model_index.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LatentDiffusionUncondPipeline",
3
+ "_diffusers_version": "0.0.4",
4
+ "scheduler": [
5
+ "diffusers",
6
+ "DDIMScheduler"
7
+ ],
8
+ "unet": [
9
+ "diffusers",
10
+ "UNetUnconditionalModel"
11
+ ],
12
+ "vqvae": [
13
+ "diffusers",
14
+ "VQModel"
15
+ ]
16
+ }
run.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import UNetUnconditionalModel, DDIMScheduler, VQModel
3
+ import torch
4
+ import PIL.Image
5
+ import numpy as np
6
+ import tqdm
7
+
8
+ # load all models
9
+ unet = UNetUnconditionalModel.from_pretrained("./", subfolder="unet")
10
+ vqvae = VQModel.from_pretrained("./", subfolder="vqvae")
11
+ scheduler = DDIMScheduler.from_config("./", subfolder="scheduler")
12
+
13
+ # set to cuda
14
+ torch_device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ unet.to(torch_device)
17
+ vqvae.to(torch_device)
18
+
19
+ # generate gaussian noise to be decoded
20
+ generator = torch.manual_seed(0)
21
+ noise = torch.randn(
22
+ (1, unet.in_channels, unet.image_size, unet.image_size),
23
+ generator=generator,
24
+ ).to(torch_device)
25
+
26
+ # set inference steps for DDIM
27
+ scheduler.set_timesteps(num_inference_steps=50)
28
+
29
+ image = noise
30
+ for t in tqdm.tqdm(scheduler.timesteps):
31
+ # predict noise residual of previous image
32
+ with torch.no_grad():
33
+ residual = unet(image, t)["sample"]
34
+
35
+ # compute previous image x_t according to DDIM formula
36
+ prev_image = scheduler.step(residual, t, image, eta=0.0)["prev_sample"]
37
+
38
+ # x_t-1 -> x_t
39
+ image = prev_image
40
+
41
+ # decode image with vae
42
+ with torch.no_grad():
43
+ image = vqvae.decode(image)
44
+
45
+ # process image
46
+ image_processed = image.cpu().permute(0, 2, 3, 1)
47
+ image_processed = (image_processed + 1.0) * 127.5
48
+ image_processed = image_processed.numpy().astype(np.uint8)
49
+ image_pil = PIL.Image.fromarray(image_processed[0])
50
+
51
+ image_pil.save("generated_image.png")
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.0.4",
4
+ "beta_end": 0.0195,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.0015,
7
+ "clip_sample": false,
8
+ "timestep_values": null,
9
+ "timesteps": 1000,
10
+ "trained_betas": null
11
+ }
unet/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNetUnconditionalModel",
3
+ "_diffusers_version": "0.0.4",
4
+ "attention_resolutions": [
5
+ 8,
6
+ 4,
7
+ 2
8
+ ],
9
+ "down_blocks": ["UNetResDownBlock2D", "UNetResAttnDownBlock2D", "UNetResAttnDownBlock2D", "UNetResAttnDownBlock2D"],
10
+ "up_blocks": ["UNetResAttnUpBlock2D", "UNetResAttnUpBlock2D", "UNetResAttnUpBlock2D", "UNetResUpBlock2D"],
11
+ "down_block_input_channels": [224, 224, 448, 672],
12
+ "down_block_output_channels": [224, 448, 672, 896],
13
+ "context_dim": null,
14
+ "conv_resample": true,
15
+ "dims": 2,
16
+ "dropout": 0,
17
+ "image_size": 64,
18
+ "in_channels": 3,
19
+ "legacy": true,
20
+ "n_embed": null,
21
+ "num_classes": null,
22
+ "num_head_channels": 32,
23
+ "num_heads": -1,
24
+ "num_heads_upsample": -1,
25
+ "num_res_blocks": 2,
26
+ "out_channels": 3,
27
+ "resblock_updown": false,
28
+ "transformer_depth": 1,
29
+ "use_checkpoint": false,
30
+ "use_fp16": false,
31
+ "use_new_attention_order": false,
32
+ "use_scale_shift_norm": false,
33
+ "use_spatial_transformer": false,
34
+ "ldm": true
35
+ }
unet/diffusion_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b655ee0d741c2de23be13d7031c8365b7c17f61b5921a42d1173e1e20d48067
3
+ size 1096382177
vqvae/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "VQModel",
3
+ "_diffusers_version": "0.0.4",
4
+ "attn_resolutions": [],
5
+ "ch": 128,
6
+ "ch_mult": [
7
+ 1,
8
+ 2,
9
+ 4
10
+ ],
11
+ "double_z": false,
12
+ "dropout": 0.0,
13
+ "embed_dim": 3,
14
+ "give_pre_end": false,
15
+ "in_channels": 3,
16
+ "n_embed": 8192,
17
+ "num_res_blocks": 2,
18
+ "out_ch": 3,
19
+ "remap": null,
20
+ "resamp_with_conv": true,
21
+ "resolution": 256,
22
+ "sane_index_shape": false,
23
+ "z_channels": 3
24
+ }
vqvae/diffusion_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e383b55bf3faeffafffb49286ae11c41611557c6c2b0dfbf09a0d3ea94590ae8
3
+ size 221364711