Spaces:

bkhmsi
/

Font-To-Sketch

Runtime error

App Files Files Community

Badr AlKhamissi commited on May 19, 2023

Commit

35c104c

1 Parent(s): e8f6bdd

fp32 instead of fp16

Browse files

Files changed (2) hide show

app.py +1 -1
code/losses.py +11 -20

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ from diffusers import StableDiffusionPipeline
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = None
-model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to(device)
 from typing import Mapping
 from tqdm import tqdm

 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = None
+model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
 from typing import Mapping
 from tqdm import tqdm

code/losses.py CHANGED Viewed

@@ -21,8 +21,8 @@ class SDSLoss(nn.Module):
         self.pipe = model
         self.pipe = self.pipe.to(self.device)
-        self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(self.device)
-        self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
         # default scheduler: PNDMScheduler(beta_start=0.00085, beta_end=0.012,
         # beta_schedule="scaled_linear", num_train_timesteps=1000)
@@ -35,24 +35,15 @@ class SDSLoss(nn.Module):
     def embed_text(self):
         # tokenizer and embed text
-        if "jpeg" not in self.cfg.caption:
-          text_input = self.pipe.tokenizer(self.cfg.caption, padding="max_length",
-                                          max_length=self.pipe.tokenizer.model_max_length,
-                                          truncation=True, return_tensors="pt")
-          uncond_input = self.pipe.tokenizer([""], padding="max_length",
-                                          max_length=text_input.input_ids.shape[-1],
-                                          return_tensors="pt")
-          with torch.no_grad():
-              text_embeddings = self.pipe.text_encoder(text_input.input_ids.to(self.device))[0]
-              uncond_embeddings = self.pipe.text_encoder(uncond_input.input_ids.to(self.device))[0]
-        else:
-            print(f"> Reading Image {self.cfg.caption}")
-            with torch.no_grad():
-                image = Image.open(self.cfg.caption)
-                inputs = self.clip_processor(images=image, return_tensors="pt").to(self.device)
-                img_emb = self.clip_model.get_image_features(**inputs)
-            text_embeddings = img_emb
-            uncond_embeddings = img_emb
         self.text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
         self.text_embeddings = self.text_embeddings.repeat_interleave(self.cfg.batch_size, 0)

         self.pipe = model
         self.pipe = self.pipe.to(self.device)
+        # self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(self.device)
+        # self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
         # default scheduler: PNDMScheduler(beta_start=0.00085, beta_end=0.012,
         # beta_schedule="scaled_linear", num_train_timesteps=1000)
     def embed_text(self):
         # tokenizer and embed text
+        text_input = self.pipe.tokenizer(self.cfg.caption, padding="max_length",
+                                        max_length=self.pipe.tokenizer.model_max_length,
+                                        truncation=True, return_tensors="pt")
+        uncond_input = self.pipe.tokenizer([""], padding="max_length",
+                                        max_length=text_input.input_ids.shape[-1],
+                                        return_tensors="pt")
+        with torch.no_grad():
+            text_embeddings = self.pipe.text_encoder(text_input.input_ids.to(self.device))[0]
+            uncond_embeddings = self.pipe.text_encoder(uncond_input.input_ids.to(self.device))[0]
         self.text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
         self.text_embeddings = self.text_embeddings.repeat_interleave(self.cfg.batch_size, 0)