Ahsen Khaliq commited on
Commit
f5dff55
1 Parent(s): fe6483f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -38
app.py CHANGED
@@ -78,46 +78,50 @@ def tv_loss(input):
78
  def range_loss(input):
79
  return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
80
 
81
- # Model settings
82
- model_config = model_and_diffusion_defaults()
83
- model_config.update({
84
- 'attention_resolutions': '32, 16, 8',
85
- 'class_cond': False,
86
- 'diffusion_steps': 1000,
87
- 'rescale_timesteps': True,
88
- 'timestep_respacing': '90', # Modify this value to decrease the number of
89
- # timesteps.
90
- 'image_size': 256,
91
- 'learn_sigma': True,
92
- 'noise_schedule': 'linear',
93
- 'num_channels': 256,
94
- 'num_head_channels': 64,
95
- 'num_res_blocks': 2,
96
- 'resblock_updown': True,
97
- 'use_fp16': True,
98
- 'use_scale_shift_norm': True,
99
- })
100
- # Load models
101
- device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
102
- print('Using device:', device)
103
- model, diffusion = create_model_and_diffusion(**model_config)
104
- model.load_state_dict(torch.load('256x256_diffusion_uncond.pt', map_location='cpu'))
105
- model.requires_grad_(False).eval().to(device)
106
- for name, param in model.named_parameters():
107
- if 'qkv' in name or 'norm' in name or 'proj' in name:
108
- param.requires_grad_()
109
- if model_config['use_fp16']:
110
- model.convert_to_fp16()
111
- clip_model = clip.load('ViT-B/16', jit=False)[0].eval().requires_grad_(False).to(device)
112
- clip_size = clip_model.visual.input_resolution
113
- normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
114
- std=[0.26862954, 0.26130258, 0.27577711])
115
- lpips_model = lpips.LPIPS(net='vgg').to(device)
 
116
 
117
- def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed):
118
  all_frames = []
119
  prompts = [text]
120
- image_prompts = []
 
 
 
121
  batch_size = 1
122
  clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
123
  tv_scale = tv_scale # Controls the smoothness of the final output.
@@ -217,6 +221,6 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
217
  title = "CLIP Guided Diffusion HQ"
218
  description = "Gradio demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
219
  article = "<p style='text-align: center'> By Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). It uses OpenAI's 256x256 unconditional ImageNet diffusion model (https://github.com/openai/guided-diffusion) together with CLIP (https://github.com/openai/CLIP) to connect text prompts with images. | <a href='https://colab.research.google.com/drive/12a_Wrfi2_gwwAuN3VvMTwVMz9TfqctNj' target='_blank'>Colab</a></p>"
220
- iface = gr.Interface(inference, inputs=["text",gr.inputs.Image(type="file", label='initial image (optional)', optional=True),gr.inputs.Slider(minimum=0, maximum=45, step=1, default=0, label="skip_timesteps"), gr.inputs.Slider(minimum=0, maximum=3000, step=1, default=700, label="clip guidance scale (Controls how much the image should look like the prompt)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=150, label="tv_scale (Controls the smoothness of the final output)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=50, label="range_scale (Controls how far out of range RGB values are allowed to be)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=0, label="init_scale (This enhances the effect of the init image)"), gr.inputs.Number(default=0, label="Seed") ], outputs=["image","video"], title=title, description=description, article=article, examples=[["coral reef city by artistation artists", None, 0, 1000, 150, 50, 0, 0]],
221
  enable_queue=True)
222
  iface.launch()
 
78
  def range_loss(input):
79
  return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
80
 
81
+ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt,timestep_respacing):
82
+ # Model settings
83
+ model_config = model_and_diffusion_defaults()
84
+ model_config.update({
85
+ 'attention_resolutions': '32, 16, 8',
86
+ 'class_cond': False,
87
+ 'diffusion_steps': 1000,
88
+ 'rescale_timesteps': True,
89
+ 'timestep_respacing': str(timestep_respacing), # Modify this value to decrease the number of
90
+ # timesteps.
91
+ 'image_size': 256,
92
+ 'learn_sigma': True,
93
+ 'noise_schedule': 'linear',
94
+ 'num_channels': 256,
95
+ 'num_head_channels': 64,
96
+ 'num_res_blocks': 2,
97
+ 'resblock_updown': True,
98
+ 'use_fp16': True,
99
+ 'use_scale_shift_norm': True,
100
+ })
101
+ # Load models
102
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
103
+ print('Using device:', device)
104
+ model, diffusion = create_model_and_diffusion(**model_config)
105
+ model.load_state_dict(torch.load('256x256_diffusion_uncond.pt', map_location='cpu'))
106
+ model.requires_grad_(False).eval().to(device)
107
+ for name, param in model.named_parameters():
108
+ if 'qkv' in name or 'norm' in name or 'proj' in name:
109
+ param.requires_grad_()
110
+ if model_config['use_fp16']:
111
+ model.convert_to_fp16()
112
+ clip_model = clip.load('ViT-B/16', jit=False)[0].eval().requires_grad_(False).to(device)
113
+ clip_size = clip_model.visual.input_resolution
114
+ normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
115
+ std=[0.26862954, 0.26130258, 0.27577711])
116
+ lpips_model = lpips.LPIPS(net='vgg').to(device)
117
 
118
+ #def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
119
  all_frames = []
120
  prompts = [text]
121
+ if image_prompts:
122
+ image_prompts = [image_prompt.name]
123
+ else:
124
+ image_prompts = []
125
  batch_size = 1
126
  clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
127
  tv_scale = tv_scale # Controls the smoothness of the final output.
 
221
  title = "CLIP Guided Diffusion HQ"
222
  description = "Gradio demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
223
  article = "<p style='text-align: center'> By Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). It uses OpenAI's 256x256 unconditional ImageNet diffusion model (https://github.com/openai/guided-diffusion) together with CLIP (https://github.com/openai/CLIP) to connect text prompts with images. | <a href='https://colab.research.google.com/drive/12a_Wrfi2_gwwAuN3VvMTwVMz9TfqctNj' target='_blank'>Colab</a></p>"
224
+ iface = gr.Interface(inference, inputs=["text",gr.inputs.Image(type="file", label='initial image (optional)', optional=True),gr.inputs.Slider(minimum=0, maximum=45, step=1, default=0, label="skip_timesteps"), gr.inputs.Slider(minimum=0, maximum=3000, step=1, default=700, label="clip guidance scale (Controls how much the image should look like the prompt)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=150, label="tv_scale (Controls the smoothness of the final output)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=50, label="range_scale (Controls how far out of range RGB values are allowed to be)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=0, label="init_scale (This enhances the effect of the init image)"), gr.inputs.Number(default=0, label="Seed"), gr.inputs.Image(type="file", label='image prompt (optional)', optional=True), gr.inputs.Slider(minimum=50, maximum=300, step=1, default=90, label="timestep respacing")], outputs=["image","video"], title=title, description=description, article=article, examples=[["coral reef city by artistation artists", None, 0, 1000, 150, 50, 0, 0]],
225
  enable_queue=True)
226
  iface.launch()