gvecchio
/

MatForger

@@ -62,7 +62,8 @@ def postprocess(
         output_type = "np"
     image = image.detach().cpu()
     if output_type == "latent":
         return image
@@ -412,9 +413,7 @@ class MatForgerPipeline(DiffusionPipeline, FromSingleFileMixin):
             raise ValueError(
                 "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
             )
-        elif prompt is not None and (
-            not isinstance(prompt, str) and not isinstance(prompt, list)
-        ):
             raise ValueError(
                 f"`prompt` has to be of type `str` or `list` but is {type(prompt)}"
             )
@@ -671,8 +670,8 @@ class MatForgerPipeline(DiffusionPipeline, FromSingleFileMixin):
         ] = None,
         height: Optional[int] = None,
         width: Optional[int] = None,
-        tileable: bool = True,
-        patched: bool = True,
         num_inference_steps: int = 50,
         timesteps: List[int] = None,
         guidance_scale: float = 7.5,

         output_type = "np"
     image = image.detach().cpu()
+    image = image.to(torch.float32)
     if output_type == "latent":
         return image
             raise ValueError(
                 "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
             )
+        elif prompt is not None and (not isinstance(prompt, (str, list, Image.Image))):
             raise ValueError(
                 f"`prompt` has to be of type `str` or `list` but is {type(prompt)}"
             )
         ] = None,
         height: Optional[int] = None,
         width: Optional[int] = None,
+        tileable: bool = False,
+        patched: bool = False,
         num_inference_steps: int = 50,
         timesteps: List[int] = None,
         guidance_scale: float = 7.5,

prompt_encoder/encoder.py CHANGED Viewed

@@ -1,6 +1,13 @@
-from typing import List, Optional
 from diffusers.configuration_utils import ConfigMixin
 from diffusers.models.modeling_utils import ModelMixin
 from PIL import Image
 from transformers import (
@@ -10,6 +17,15 @@ from transformers import (
     CLIPVisionModelWithProjection,
 )
 class BasePromptEncoder(ModelMixin, ConfigMixin):
     def __init__(self):
@@ -59,16 +75,19 @@ class MaterialPromptEncoder(BasePromptEncoder):
         self,
         prompt,
     ):
-        dtype = type(prompt)
-        if dtype == list:
-            dtype = type(prompt[0])
-        if dtype == str:
-            return self.encode_text(prompt)
-        elif dtype == Image.Image:
-            return self.encode_image(prompt)
-        else:
-            raise NotImplementedError
     def forward(
         self,

+from typing import List, Union, get_args
+import PIL
+import PIL.Jpeg2KImagePlugin
+import PIL.JpegImagePlugin
+import PIL.PngImagePlugin
+import PIL.TiffImagePlugin
+import torch
 from diffusers.configuration_utils import ConfigMixin
+from diffusers.image_processor import PipelineImageInput
 from diffusers.models.modeling_utils import ModelMixin
 from PIL import Image
 from transformers import (
     CLIPVisionModelWithProjection,
 )
+StrInput = Union[str, List[str]]
+ImageInput = Union[
+    PIL.JpegImagePlugin.JpegImageFile,
+    PIL.Jpeg2KImagePlugin.Jpeg2KImageFile,
+    PIL.PngImagePlugin.PngImageFile,
+    PIL.TiffImagePlugin.TiffImageFile,
+]
 class BasePromptEncoder(ModelMixin, ConfigMixin):
     def __init__(self):
         self,
         prompt,
     ):
+        if type(prompt) != list:
+            prompt = [prompt]
+        embs = []
+        for prompt in prompt:
+            if isinstance(prompt, str):
+                embs.append(self.encode_text(prompt))
+            elif type(prompt, get_args(ImageInput)):
+                embs.append(self.encode_image(prompt))
+            else:
+                raise NotImplementedError
+        return torch.cat(embs, dim=0)
     def forward(
         self,