Spaces:

akhaliq
/

stylegan3_clip

Runtime error

App Files Files Community

Ahsen Khaliq commited on Oct 22, 2021

Commit

2b08e86

•

1 Parent(s): 28b55ac

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -61

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import os
-os.system("pip install --upgrade torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html")
 os.system("git clone https://github.com/openai/CLIP")
 os.system("pip install -e ./CLIP")
-os.system("pip install einops ninja scipy numpy Pillow tqdm imageio-ffmpeg imageio")
 import sys
 sys.path.append('./CLIP')
 import io
@@ -105,65 +103,71 @@ zs = torch.randn([10000, G.mapping.z_dim], device=device)
 w_stds = G.mapping(zs, None).std(0)
-def inference(text,steps,image):
-  all_frames = []
-  target = clip_model.embed_text(text)
-  if image:
-      target = embed_image(TF.to_tensor(image).to(device).unsqueeze(0)).mean(0).squeeze(0)
-  else:
       target = clip_model.embed_text(text)
-  steps = steps
-  #seed = 2
-  seed =  -1
-  if seed == -1:
-      seed = np.random.randint(0,2**32 - 1)
-  tf = Compose([
-      Resize(224),
-      lambda x: torch.clamp((x+1)/2,min=0,max=1),
-      ])
-  torch.manual_seed(seed)
-  timestring = time.strftime('%Y%m%d%H%M%S')
-  with torch.no_grad():
-    qs = []
-    losses = []
-    for _ in range(8):
-      q = (G.mapping(torch.randn([4,G.mapping.z_dim], device=device), None, truncation_psi=0.7) - G.mapping.w_avg) / w_stds
-      images = G.synthesis(q * w_stds + G.mapping.w_avg)
-      embeds = embed_image(images.add(1).div(2))
-      loss = spherical_dist_loss(embeds, target).mean(0)
-      i = torch.argmin(loss)
-      qs.append(q[i])
-      losses.append(loss[i])
-    qs = torch.stack(qs)
-    losses = torch.stack(losses)
-    print(losses)
-    print(losses.shape, qs.shape)
-    i = torch.argmin(losses)
-    q = qs[i].unsqueeze(0)
-  q.requires_grad_()
-  q_ema = q
-  opt = torch.optim.AdamW([q], lr=0.03, betas=(0.0,0.999))
-  loop = tqdm(range(steps))
-  for i in loop:
-    opt.zero_grad()
-    w = q * w_stds
-    image = G.synthesis(w + G.mapping.w_avg, noise_mode='const')
-    embed = embed_image(image.add(1).div(2))
-    loss = spherical_dist_loss(embed, target).mean()
-    loss.backward()
-    opt.step()
-    loop.set_postfix(loss=loss.item(), q_magnitude=q.std().item())
-    q_ema = q_ema * 0.9 + q * 0.1
-    image = G.synthesis(q_ema * w_stds + G.mapping.w_avg, noise_mode='const')
-    pil_image = TF.to_pil_image(image[0].add(1).div(2).clamp(0,1))
-    all_frames.append(pil_image)
-    #os.makedirs(f'samples/{timestring}', exist_ok=True)
-    #pil_image.save(f'samples/{timestring}/{i:04}.jpg')
-  writer = imageio.get_writer('test.mp4', fps=15)
-  for im in all_frames:
-      writer.append_data(np.array(im))
-  writer.close()
-  return pil_image, "test.mp4"
 title = "StyleGAN3+CLIP"
@@ -172,7 +176,7 @@ article = "<p style='text-align: center'><a href='https://colab.research.google.
 examples = [['mario',150,None]]
 gr.Interface(
     inference,
-    ["text",gr.inputs.Slider(minimum=50, maximum=200, step=1, default=150, label="steps"),gr.inputs.Image(type="pil", label="Image (Optional)", optional=True)],
     [gr.outputs.Image(type="pil", label="Output"),"playable_video"],
     title=title,
     description=description,

 import os
 os.system("git clone https://github.com/openai/CLIP")
 os.system("pip install -e ./CLIP")
 import sys
 sys.path.append('./CLIP')
 import io
 w_stds = G.mapping(zs, None).std(0)
+def inference(text,steps,image,mode):
+  if mode == "CLIP+StyleGAN3":
+      all_frames = []
       target = clip_model.embed_text(text)
+      if image:
+          target = embed_image(TF.to_tensor(image).to(device).unsqueeze(0)).mean(0).squeeze(0)
+      else:
+          target = clip_model.embed_text(text)
+      steps = steps
+      #seed = 2
+      seed =  -1
+      if seed == -1:
+          seed = np.random.randint(0,2**32 - 1)
+      tf = Compose([
+          Resize(224),
+          lambda x: torch.clamp((x+1)/2,min=0,max=1),
+          ])
+      torch.manual_seed(seed)
+      timestring = time.strftime('%Y%m%d%H%M%S')
+      with torch.no_grad():
+        qs = []
+        losses = []
+        for _ in range(8):
+          q = (G.mapping(torch.randn([4,G.mapping.z_dim], device=device), None, truncation_psi=0.7) - G.mapping.w_avg) / w_stds
+          images = G.synthesis(q * w_stds + G.mapping.w_avg)
+          embeds = embed_image(images.add(1).div(2))
+          loss = spherical_dist_loss(embeds, target).mean(0)
+          i = torch.argmin(loss)
+          qs.append(q[i])
+          losses.append(loss[i])
+        qs = torch.stack(qs)
+        losses = torch.stack(losses)
+        print(losses)
+        print(losses.shape, qs.shape)
+        i = torch.argmin(losses)
+        q = qs[i].unsqueeze(0)
+      q.requires_grad_()
+      q_ema = q
+      opt = torch.optim.AdamW([q], lr=0.03, betas=(0.0,0.999))
+      loop = tqdm(range(steps))
+      for i in loop:
+        opt.zero_grad()
+        w = q * w_stds
+        image = G.synthesis(w + G.mapping.w_avg, noise_mode='const')
+        embed = embed_image(image.add(1).div(2))
+        loss = spherical_dist_loss(embed, target).mean()
+        loss.backward()
+        opt.step()
+        loop.set_postfix(loss=loss.item(), q_magnitude=q.std().item())
+        q_ema = q_ema * 0.9 + q * 0.1
+        image = G.synthesis(q_ema * w_stds + G.mapping.w_avg, noise_mode='const')
+        pil_image = TF.to_pil_image(image[0].add(1).div(2).clamp(0,1))
+        all_frames.append(pil_image)
+        #os.makedirs(f'samples/{timestring}', exist_ok=True)
+        #pil_image.save(f'samples/{timestring}/{i:04}.jpg')
+      writer = imageio.get_writer('test.mp4', fps=15)
+      for im in all_frames:
+          writer.append_data(np.array(im))
+      writer.close()
+      return pil_image, "test.mp4"
+  else:
+      os.system("python gen_video.py --output=lerp.mp4 --trunc=1 --seeds=0-31 --grid=4x2 \
+    --network=https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan3/versions/1/files/stylegan3-r-afhqv2-512x512.pkl")
+      img = Image.new("RGB", (800, 1280), (255, 255, 255))
+      return img, "lerp.mp4"
 title = "StyleGAN3+CLIP"
 examples = [['mario',150,None]]
 gr.Interface(
     inference,
+    ["text",gr.inputs.Slider(minimum=50, maximum=200, step=1, default=150, label="steps"),gr.inputs.Image(type="pil", label="Image (Optional)", optional=True),gradio.inputs.Radio(choices["CLIP+StyleGAN3","Stylegan3 interpolation"] type="value", default="CLIP+StyleGAN3", label="mode")],
     [gr.outputs.Image(type="pil", label="Output"),"playable_video"],
     title=title,
     description=description,