JingyeChen commited on
Commit
586de43
1 Parent(s): ea0bceb
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -65,7 +65,7 @@ m1_model = AutoModelForCausalLM.from_pretrained(
65
  #### import diffusion models
66
  text_encoder = CLIPTextModel.from_pretrained(
67
  'JingyeChen22/textdiffuser2-full-ft', subfolder="text_encoder", ignore_mismatched_sizes=True
68
- ).cuda()
69
  tokenizer = CLIPTokenizer.from_pretrained(
70
  'runwayml/stable-diffusion-v1-5', subfolder="tokenizer"
71
  )
@@ -83,10 +83,10 @@ for c in alphabet:
83
  print(len(tokenizer))
84
  print('***************')
85
 
86
- vae = AutoencoderKL.from_pretrained('runwayml/stable-diffusion-v1-5', subfolder="vae").cuda()
87
  unet = UNet2DConditionModel.from_pretrained(
88
  'JingyeChen22/textdiffuser2-full-ft', subfolder="unet"
89
- ).cuda()
90
  text_encoder.resize_token_embeddings(len(tokenizer))
91
 
92
 
@@ -340,10 +340,10 @@ def text_to_image(prompt,keywords,radio,slider_step,slider_guidance,slider_batch
340
  scheduler = DDPMScheduler.from_pretrained('runwayml/stable-diffusion-v1-5', subfolder="scheduler")
341
  scheduler.set_timesteps(slider_step)
342
  noise = torch.randn((slider_batch, 4, 64, 64)).to("cuda")
343
- input = noise
344
 
345
- encoder_hidden_states_cond = text_encoder(prompts_cond)[0]
346
- encoder_hidden_states_nocond = text_encoder(prompts_nocond)[0]
347
 
348
 
349
  for t in tqdm(scheduler.timesteps):
@@ -355,7 +355,7 @@ def text_to_image(prompt,keywords,radio,slider_step,slider_guidance,slider_batch
355
  del noise_pred_cond
356
  del noise_pred_uncond
357
 
358
- torch.cuda.empty_cache()
359
 
360
  # decode
361
  input = 1 / vae.config.scaling_factor * input
@@ -363,7 +363,7 @@ def text_to_image(prompt,keywords,radio,slider_step,slider_guidance,slider_batch
363
  width, height = 512, 512
364
  results = []
365
  new_image = Image.new('RGB', (2*width, 2*height))
366
- for index, image in enumerate(images.float()):
367
  image = (image / 2 + 0.5).clamp(0, 1).unsqueeze(0)
368
  image = image.cpu().permute(0, 2, 3, 1).numpy()[0]
369
  image = Image.fromarray((image * 255).round().astype("uint8")).convert('RGB')
 
65
  #### import diffusion models
66
  text_encoder = CLIPTextModel.from_pretrained(
67
  'JingyeChen22/textdiffuser2-full-ft', subfolder="text_encoder", ignore_mismatched_sizes=True
68
+ ).half().cuda()
69
  tokenizer = CLIPTokenizer.from_pretrained(
70
  'runwayml/stable-diffusion-v1-5', subfolder="tokenizer"
71
  )
 
83
  print(len(tokenizer))
84
  print('***************')
85
 
86
+ vae = AutoencoderKL.from_pretrained('runwayml/stable-diffusion-v1-5', subfolder="vae").half().cuda()
87
  unet = UNet2DConditionModel.from_pretrained(
88
  'JingyeChen22/textdiffuser2-full-ft', subfolder="unet"
89
+ ).half().cuda()
90
  text_encoder.resize_token_embeddings(len(tokenizer))
91
 
92
 
 
340
  scheduler = DDPMScheduler.from_pretrained('runwayml/stable-diffusion-v1-5', subfolder="scheduler")
341
  scheduler.set_timesteps(slider_step)
342
  noise = torch.randn((slider_batch, 4, 64, 64)).to("cuda")
343
+ input = noise.half()
344
 
345
+ encoder_hidden_states_cond = text_encoder(prompts_cond)[0].half()
346
+ encoder_hidden_states_nocond = text_encoder(prompts_nocond)[0] .half()
347
 
348
 
349
  for t in tqdm(scheduler.timesteps):
 
355
  del noise_pred_cond
356
  del noise_pred_uncond
357
 
358
+ torch.cuda.empty_cache()
359
 
360
  # decode
361
  input = 1 / vae.config.scaling_factor * input
 
363
  width, height = 512, 512
364
  results = []
365
  new_image = Image.new('RGB', (2*width, 2*height))
366
+ for index, image in enumerate(images.cpu().float()):
367
  image = (image / 2 + 0.5).clamp(0, 1).unsqueeze(0)
368
  image = image.cpu().permute(0, 2, 3, 1).numpy()[0]
369
  image = Image.fromarray((image * 255).round().astype("uint8")).convert('RGB')