Spaces:

myn0908
/

S2I-Artwork-Sketch-to-Image-Diffusion

Running

App Files Files Community

myn0908 commited on Jul 8, 2024

Commit

8811405

1 Parent(s): 136e8a5

enhance prompModule

Browse files

Files changed (4) hide show

S2I/commons/controller.py +17 -22
S2I/modules/models.py +10 -3
S2I/modules/sketch2image.py +19 -0
app.py +18 -8

S2I/commons/controller.py CHANGED Viewed

@@ -47,31 +47,27 @@ class Sketch2ImageController():
             self.pipe = Sketch2ImagePipeline()
             self.zero_options = zero_options
-    def update_canvas(self, use_line, use_eraser):
-        brush_size = 20 if use_eraser else 4
-        _color = "#ffffff" if use_eraser else "#000000"
-        return self.gr.update(brush_radius=brush_size, brush_color=_color, interactive=True)
-    def upload_sketch(self, file):
-        _img = Image.open(file.name).convert("L")
-        return self.gr.update(value=_img, source="upload", interactive=True)
     @staticmethod
     def pil_image_to_data_uri(img, format="PNG"):
         buffered = BytesIO()
         img.save(buffered, format=format)
         img_str = base64.b64encode(buffered.getvalue()).decode()
         return f"data:image/{format.lower()};base64,{img_str}"
-    def artwork(self, options, image, prompt, prompt_template, style_name, seed, val_r, faster, model_name, type_flag):
         self.load_pipeline(zero_options=options)
-        prompt = prompt_template.replace("{prompt}", prompt)
-        if type_flag == 'live-sketch':
-            img = Image.fromarray(np.array(image["composite"])[:, :, -1])
-        elif type_flag == 'url-sketch':
             img = image["composite"]
         img = img.convert("RGB")
         img = img.resize((512, 512))
@@ -84,14 +80,13 @@ class Sketch2ImageController():
         noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
         with torch.no_grad():
-            output_image = self.pipe.generate(c_t, prompt, r=val_r, noise_map=noise, half_model=faster, model_name=model_name)
         output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
-        if type_flag == 'live-sketch':
-            input_uri = self.pil_image_to_data_uri(Image.fromarray(255 - np.array(img)))
-        else:
-            input_uri = self.pil_image_to_data_uri(img)
-        return output_pil
-        # , self.gr.update(link=input_uri)

             self.pipe = Sketch2ImagePipeline()
             self.zero_options = zero_options
     @staticmethod
     def pil_image_to_data_uri(img, format="PNG"):
         buffered = BytesIO()
         img.save(buffered, format=format)
         img_str = base64.b64encode(buffered.getvalue()).decode()
         return f"data:image/{format.lower()};base64,{img_str}"
+    def artwork(self, options, image, prompt, prompt_template, style_name, seed, val_r, faster, model_name, type_flag, prompt_quality):
         self.load_pipeline(zero_options=options)
+        prompt_enhanced = self.automatic_enhance_prompt(prompt, prompt_quality)
+        prompt_enhanced = prompt_template.replace("{prompt}", prompt_enhanced)
+        # if type_flag == 'live-sketch':
+        #     img = Image.fromarray(np.array(image["composite"])[:, :, -1])
+        # elif type_flag == 'url-sketch':
+        #     img = image["composite"]
+        if type_flag == 'URL':
             img = image["composite"]
+        else:
+            img = Image.fromarray(np.array(image["composite"])[:, :, -1])
         img = img.convert("RGB")
         img = img.resize((512, 512))
         noise = torch.randn((1, 4, H // 8, W // 8), device=c_t.device)
         with torch.no_grad():
+            output_image = self.pipe.generate(c_t, prompt_enhanced, r=val_r, noise_map=noise, half_model=faster, model_name=model_name)
         output_pil = F.to_pil_image(output_image[0].cpu() * 0.5 + 0.5)
+        # if type_flag == 'live-sketch':
+        #     input_uri = self.pil_image_to_data_uri(Image.fromarray(255 - np.array(img)))
+        # else:
+        #     input_uri = self.pil_image_to_data_uri(img)
+        return output_pil

S2I/modules/models.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 import copy
 import os
 from diffusers import DDPMScheduler
-from transformers import AutoTokenizer, CLIPTextModel
 from diffusers import AutoencoderKL, UNet2DConditionModel
 from peft import LoraConfig
 from S2I.modules.utils import sc_vae_encoder_fwd, sc_vae_decoder_fwd, download_models, get_model_path, get_s2i_home
@@ -29,6 +29,8 @@ class PrimaryModel:
         self.global_tokenizer = None
         self.global_text_encoder = None
         self.global_scheduler = None
     @staticmethod
     def _load_model(path, model_class, unet_mode=False):
@@ -62,9 +64,14 @@ class PrimaryModel:
             sd = torch.load(p_ckpt, map_location="cpu")
             return sd
     def from_pretrained(self, model_name, r):
         if self.global_tokenizer is None:
-            # self.global_tokenizer = AutoTokenizer.from_pretrained(self.backbone_diffusion_path,
-            #                                                       subfolder="tokenizer")
             self.global_tokenizer = AutoTokenizer.from_pretrained("myn0908/stable-diffusion-3", subfolder="tokenizer_2")
         if self.global_text_encoder is None:

 import copy
 import os
 from diffusers import DDPMScheduler
+from transformers import AutoTokenizer, CLIPTextModel, pipeline
 from diffusers import AutoencoderKL, UNet2DConditionModel
 from peft import LoraConfig
 from S2I.modules.utils import sc_vae_encoder_fwd, sc_vae_decoder_fwd, download_models, get_model_path, get_s2i_home
         self.global_tokenizer = None
         self.global_text_encoder = None
         self.global_scheduler = None
+        self.global_medium_prompt = None
+        self.global_long_prompt = None
     @staticmethod
     def _load_model(path, model_class, unet_mode=False):
             sd = torch.load(p_ckpt, map_location="cpu")
             return sd
     def from_pretrained(self, model_name, r):
+        if self.global_meidum_prompt is None:
+            self.global_medium_prompt = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance", device='cuda')
+        if self.global_long_prompt is None:
+            self.global_long_prompt = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device='cuda')
         if self.global_tokenizer is None:
             self.global_tokenizer = AutoTokenizer.from_pretrained("myn0908/stable-diffusion-3", subfolder="tokenizer_2")
         if self.global_text_encoder is None:

S2I/modules/sketch2image.py CHANGED Viewed

@@ -72,6 +72,25 @@ class Sketch2ImagePipeline(PrimaryModel):
         self.global_unet.set_adapters(["default"], weights=[r])
         set_weights_and_activate_adapters(self.global_vae, ["vae_skip"], [r])
     def _move_to_cpu(self, module):
         module.to("cpu")

         self.global_unet.set_adapters(["default"], weights=[r])
         set_weights_and_activate_adapters(self.global_vae, ["vae_skip"], [r])
+    def automatic_enhance_prompt(self, input_prompt, model_choice):
+        if model_choice == "short-sentences":
+            result = self.global_medium_prompt("Enhance the description: " + input_prompt)
+            enhanced_text = result[0]['summary_text']
+            pattern = r'^.*?of\s+(.*?(?:\.|$))'
+            match = re.match(pattern, enhanced_text, re.IGNORECASE | re.DOTALL)
+            if match:
+                remaining_text = enhanced_text[match.end():].strip()
+                modified_sentence = match.group(1).capitalize()
+                enhanced_text = modified_sentence + ' ' + remaining_text
+        else:
+            result = self.global_long_prompt("Enhance the description: " + input_prompt)
+            enhanced_text = result[0]['summary_text']
+        return enhanced_text
     def _move_to_cpu(self, module):
         module.to("cpu")

app.py CHANGED Viewed

@@ -118,7 +118,7 @@ def get_meta_from_image(input_img, type_image):
         # Convert the processed image back to PIL Image
         img_pil = Image.fromarray(processed_img.astype('uint8'))
-        return img_pil
 with gr.Blocks(css=css, theme="NoCrypt/miku@1.2.1") as demo:
@@ -267,10 +267,20 @@ with gr.Blocks(css=css, theme="NoCrypt/miku@1.2.1") as demo:
                     clear_button = gr.Button("Reset Sketch Session", min_width=10, variant='primary')
                 with gr.Accordion("S2I Advances Option", open=True):
                         with gr.Row():
-                                input_type = gr.Radio(
-                                    choices=["live-sketch", "url-sketch"],
-                                    value="live-sketch",
-                                    label="Type Sketch2Image models",
                                     interactive=True)
                                 style = gr.Dropdown(
@@ -307,7 +317,7 @@ with gr.Blocks(css=css, theme="NoCrypt/miku@1.2.1") as demo:
         queue=False,
         api_name=False,
     )
-    inputs = [zero_gpu_options, image, prompt, prompt_temp, style, seed, val_r, half_model, model_options, input_type]
     outputs = [result]
     prompt.submit(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
@@ -328,8 +338,8 @@ with gr.Blocks(css=css, theme="NoCrypt/miku@1.2.1") as demo:
     val_r.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
     run_button.click(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
     image.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
-    url_image.submit(fn=get_meta_from_image, inputs=[url_image, type_image], outputs=[image])
-    url_image.change(fn=get_meta_from_image, inputs=[url_image, type_image], outputs=[image])
 if __name__ == '__main__':
     demo.queue()
     demo.launch(debug=True, share=False)

         # Convert the processed image back to PIL Image
         img_pil = Image.fromarray(processed_img.astype('uint8'))
+        return img_pil, 'URL'
 with gr.Blocks(css=css, theme="NoCrypt/miku@1.2.1") as demo:
                     clear_button = gr.Button("Reset Sketch Session", min_width=10, variant='primary')
                 with gr.Accordion("S2I Advances Option", open=True):
                         with gr.Row():
+                                # input_type = gr.Radio(
+                                #     choices=["live-sketch", "url-sketch"],
+                                #     value="live-sketch",
+                                #     label="Type Sketch2Image models",
+                                #     interactive=True)
+                                input_type = gr.Textbox(
+                                    label="Check URL or Real-time Input",
+                                    interactive=True)
+                                prompt_quality = gr.Radio(
+                                    choices=["short-sentences", "long-sentences"],
+                                    value="short-sentences",
+                                    label="Long/Short of Text Prompt",
                                     interactive=True)
                                 style = gr.Dropdown(
         queue=False,
         api_name=False,
     )
+    inputs = [zero_gpu_options, image, prompt, prompt_temp, style, seed, val_r, half_model, model_options, input_type, prompt_quality]
     outputs = [result]
     prompt.submit(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
     val_r.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
     run_button.click(fn=assign_gpu, inputs=inputs, outputs=outputs, api_name=False)
     image.change(assign_gpu, inputs=inputs, outputs=outputs, queue=False, api_name=False)
+    url_image.submit(fn=get_meta_from_image, inputs=[url_image, type_image], outputs=[image, input_type])
+    url_image.change(fn=get_meta_from_image, inputs=[url_image, type_image], outputs=[image, input_type])
 if __name__ == '__main__':
     demo.queue()
     demo.launch(debug=True, share=False)