Spaces:

Kayson
/

InstructDiffusion

Runtime error

App Files Files Community

TiankaiHang commited on Sep 24, 2023

Commit

6c26e0d

•

1 Parent(s): 29cd0de

sync

Browse files

Files changed (1) hide show

app.py +31 -10

app.py CHANGED Viewed

@@ -109,9 +109,11 @@ def predict(
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    torch.cuda.empty_cache()
     if isinstance(input_img, str):
         if input_img.startswith("http"):
@@ -129,7 +131,10 @@ def predict(
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
-        input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
     # if PIL Image
     elif isinstance(input_img, Image.Image):
@@ -144,7 +149,10 @@ def predict(
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
-        input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
     elif isinstance(input_img, dict):
         input_image = input_img["image"].convert("RGB")
         width, height = input_image.size
@@ -158,26 +166,36 @@ def predict(
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
-        input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
     assert input_image is not None
     # print input image size
     print(input_image.shape, factor, width, height)
-    with torch.no_grad(), autocast("cuda"):
         cond = {}
         cond["c_crossattn"] = [model.get_learned_conditioning([edit])]
         cond["c_concat"] = [model.encode_first_stage(input_image).mode()]
         uncond = {}
         if "txt_embed" in additional:
-            uncond["c_crossattn"] = [additional["txt_embed"].cuda().unsqueeze(0)]
         else:
             uncond["c_crossattn"] = [null_token]
         if "img_embed" in additional:
             # uncond["c_concat"] = [additional["img_embed"].cuda()]
             # resize to cond["c_concat"][0]
-            uncond["c_concat"] = [additional["img_embed"].cuda()]
             uncond["c_concat"][0] = F.interpolate(uncond["c_concat"][0], size=cond["c_concat"][0].shape[-2:], mode="bilinear", align_corners=False)
         else:
             uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])]
@@ -269,7 +287,10 @@ def main(ckpt="checkpoints/v1-5-pruned-emaonly-adaption-task-humanalign.ckpt", a
     vae_ckpt = None
     model = load_model_from_config(config, ckpt, vae_ckpt)
-    model.eval().cuda()
     model_wrap = K.external.CompVisDenoiser(model)
     model_wrap_cfg = CFGDenoiser(model_wrap)

     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
+    try:
+        torch.cuda.manual_seed(seed)
+        torch.cuda.empty_cache()
+    except:
+        pass
     if isinstance(input_img, str):
         if input_img.startswith("http"):
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
+        if torch.cuda.is_available():
+            input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
+        else:
+            input_image = rearrange(input_image, "h w c -> 1 c h w")
     # if PIL Image
     elif isinstance(input_img, Image.Image):
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
+        if torch.cuda.is_available():
+            input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
+        else:
+            input_image = rearrange(input_image, "h w c -> 1 c h w")
     elif isinstance(input_img, dict):
         input_image = input_img["image"].convert("RGB")
         width, height = input_image.size
         else:
             input_image = ImageOps.fit(input_image, (width, height), method=Image.LANCZOS)
         input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
+        if torch.cuda.is_available():
+            input_image = rearrange(input_image, "h w c -> 1 c h w").cuda()
+        else:
+            input_image = rearrange(input_image, "h w c -> 1 c h w")
     assert input_image is not None
     # print input image size
     print(input_image.shape, factor, width, height)
+    # with torch.no_grad(), autocast("cuda"):
+    with torch.no_grad():
         cond = {}
         cond["c_crossattn"] = [model.get_learned_conditioning([edit])]
         cond["c_concat"] = [model.encode_first_stage(input_image).mode()]
         uncond = {}
         if "txt_embed" in additional:
+            if torch.cuda.is_available():
+                uncond["c_crossattn"] = [additional["txt_embed"].cuda().unsqueeze(0)]
+            else:
+                uncond["c_crossattn"] = [additional["txt_embed"].unsqueeze(0)]
         else:
             uncond["c_crossattn"] = [null_token]
         if "img_embed" in additional:
             # uncond["c_concat"] = [additional["img_embed"].cuda()]
             # resize to cond["c_concat"][0]
+            if torch.cuda.is_available():
+                uncond["c_concat"] = [additional["img_embed"].cuda()]
+            else:
+                uncond["c_concat"] = [additional["img_embed"]]
             uncond["c_concat"][0] = F.interpolate(uncond["c_concat"][0], size=cond["c_concat"][0].shape[-2:], mode="bilinear", align_corners=False)
         else:
             uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])]
     vae_ckpt = None
     model = load_model_from_config(config, ckpt, vae_ckpt)
+    if torch.cuda.is_available():
+        model.eval().cuda()
+    else:
+        model.eval()
     model_wrap = K.external.CompVisDenoiser(model)
     model_wrap_cfg = CFGDenoiser(model_wrap)