Spaces:

songweig
/

rich-text-to-image

Sleeping

App Files Files Community

songweig commited on Aug 7, 2023

Commit

d908d7c

1 Parent(s): bdf1746

reduce memory footprint

Browse files

Files changed (2) hide show

app.py +161 -161
models/region_diffusion_xl.py +11 -6

app.py CHANGED Viewed

@@ -260,45 +260,114 @@ def main():
         with gr.Row():
             gr.Markdown(help_text)
         with gr.Row():
-            footnote_examples = [
                 [
-                    '{"ops":[{"insert":"A close-up 4k dslr photo of a "},{"attributes":{"link":"A cat wearing sunglasses and a bandana around its neck."},"insert":"cat"},{"insert":" riding a scooter. Palm trees in the background."}]}',
-                    '',
-                    9,
                     0.3,
                     0.3,
                     0.5,
-                    3,
-                    0,
                     None,
                 ],
                 [
-                    '{"ops":[{"insert":"A cozy "},{"attributes":{"link":"A charming wooden cabin with Christmas decoration, warm light coming out from the windows."},"insert":"cabin"},{"insert":" nestled in a "},{"attributes":{"link":"Towering evergreen trees covered in a thick layer of pristine snow."},"insert":"snowy forest"},{"insert":", and a "},{"attributes":{"link":"A cute snowman wearing a carrot nose, coal eyes, and a colorful scarf, welcoming visitors with a cheerful vibe."},"insert":"snowman"},{"insert":" stands in the yard."}]}',
                     '',
-                    12,
-                    0.4,
-                    0.3,
                     0.5,
-                    3,
-                    0,
-                    None,
-                ],
-                [
-                    '{"ops":[{"insert":"A "},{"attributes":{"link":"Happy Kung fu panda art, elder, asian art, volumetric lighting, dramatic scene, ultra detailed, realism, chinese"},"insert":"panda"},{"insert":" standing on a cliff by a waterfall, wildlife photography, photograph, high quality, wildlife, f 1.8, soft focus, 8k, national geographic, award - winning photograph by nick nichols"}]}',
-                    '',
-                    5,
                     0.3,
-                    0,
-                    0.1,
-                    4,
-                    0,
                     None,
                 ],
             ]
-            gr.Examples(examples=footnote_examples,
-                        label='Footnote examples',
                         inputs=[
                             text_input,
                             negative_prompt,
@@ -319,55 +388,93 @@ def main():
                         fn=generate,
                         cache_examples=True,
                         examples_per_page=20)
         # with gr.Row():
-        #     color_examples = [
         #         [
-        #             '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#04a704"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
-        #             'lowres, had anatomy, bad hands, cropped, worst quality',
-        #             11,
-        #             0.5,
-        #             0.3,
-        #             0.3,
-        #             6,
-        #             0.5,
         #             None,
         #         ],
         #         [
-        #             '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#ff5df1"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
-        #             'lowres, had anatomy, bad hands, cropped, worst quality',
-        #             11,
-        #             0.5,
-        #             0.3,
-        #             0.3,
         #             6,
         #             0.5,
         #             None,
         #         ],
         #         [
-        #             '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#999999"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
-        #             'lowres, had anatomy, bad hands, cropped, worst quality',
-        #             11,
-        #             0.5,
         #             0.3,
         #             0.3,
-        #             6,
-        #             0.5,
         #             None,
         #         ],
         #         [
-        #             '{"ops":[{"insert":"a Gothic "},{"attributes":{"color":"#FD6C9E"},"insert":"church"},{"insert":" in a the sunset with a beautiful landscape in the background."}]}',
         #             '',
-        #             10,
-        #             0.5,
-        #             0.5,
         #             0.3,
-        #             7,
-        #             0.5,
         #             None,
         #         ],
         #     ]
-        #     gr.Examples(examples=color_examples,
-        #                 label='Font color examples',
         #                 inputs=[
         #                     text_input,
         #                     negative_prompt,
@@ -388,113 +495,6 @@ def main():
         #                 fn=generate,
         #                 cache_examples=True,
         #                 examples_per_page=20)
-        with gr.Row():
-            style_examples = [
-                [
-                    '{"ops":[{"insert":"a beautiful"},{"attributes":{"font":"mirza"},"insert":" garden"},{"insert":" with a "},{"attributes":{"font":"roboto"},"insert":"snow mountain"},{"insert":" in the background"}]}',
-                    '',
-                    10,
-                    0.6,
-                    0,
-                    0.4,
-                    5,
-                    0,
-                    None,
-                ],
-                [
-                    '{"ops":[{"insert":"a night"},{"attributes":{"font":"slabo"},"insert":" sky"},{"insert":" filled with stars above a turbulent"},{"attributes":{"font":"roboto"},"insert":" sea"},{"insert":" with giant waves"}]}',
-                    '',
-                    2,
-                    0.6,
-                    0,
-                    0,
-                    6,
-                    0.5,
-                    None,
-                ],
-            ]
-            gr.Examples(examples=style_examples,
-                        label='Font style examples',
-                        inputs=[
-                            text_input,
-                            negative_prompt,
-                            num_segments,
-                            segment_threshold,
-                            inject_interval,
-                            inject_background,
-                            seed,
-                            color_guidance_weight,
-                            rich_text_input,
-                        ],
-                        outputs=[
-                            plaintext_result,
-                            richtext_result,
-                            segments,
-                            token_map,
-                        ],
-                        fn=generate,
-                        cache_examples=True,
-                        examples_per_page=20)
-        with gr.Row():
-            size_examples = [
-                [
-                    '{"ops": [{"insert": "A pizza with "}, {"attributes": {"size": "60px"}, "insert": "pineapple"}, {"insert": " pepperoni, and mushroom on the top"}]}',
-                    '',
-                    5,
-                    0.3,
-                    0,
-                    0,
-                    3,
-                    1,
-                    None,
-                ],
-                [
-                    '{"ops": [{"insert": "A pizza with pineapple, "}, {"attributes": {"size": "60px"}, "insert": "pepperoni"}, {"insert": ", and mushroom on the top"}]}',
-                    '',
-                    5,
-                    0.3,
-                    0,
-                    0,
-                    3,
-                    1,
-                    None,
-                ],
-                [
-                    '{"ops": [{"insert": "A pizza with pineapple, pepperoni, and "}, {"attributes": {"size": "60px"}, "insert": "mushroom"}, {"insert": " on the top"}]}',
-                    '',
-                    5,
-                    0.3,
-                    0,
-                    0,
-                    3,
-                    1,
-                    None,
-                ],
-            ]
-            gr.Examples(examples=size_examples,
-                        label='Font size examples',
-                        inputs=[
-                            text_input,
-                            negative_prompt,
-                            num_segments,
-                            segment_threshold,
-                            inject_interval,
-                            inject_background,
-                            seed,
-                            color_guidance_weight,
-                            rich_text_input,
-                        ],
-                        outputs=[
-                            plaintext_result,
-                            richtext_result,
-                            segments,
-                            token_map,
-                        ],
-                        fn=generate,
-                        cache_examples=True,
-                        examples_per_page=20)
         generate_button.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=share_row, queue=False).then(
             fn=generate,
             inputs=[

         with gr.Row():
             gr.Markdown(help_text)
+        # with gr.Row():
+        #     footnote_examples = [
+        #         [
+        #             '{"ops":[{"insert":"A close-up 4k dslr photo of a "},{"attributes":{"link":"A cat wearing sunglasses and a bandana around its neck."},"insert":"cat"},{"insert":" riding a scooter. Palm trees in the background."}]}',
+        #             '',
+        #             9,
+        #             0.3,
+        #             0.3,
+        #             0.5,
+        #             3,
+        #             0,
+        #             None,
+        #         ],
+        #         [
+        #             '{"ops":[{"insert":"A cozy "},{"attributes":{"link":"A charming wooden cabin with Christmas decoration, warm light coming out from the windows."},"insert":"cabin"},{"insert":" nestled in a "},{"attributes":{"link":"Towering evergreen trees covered in a thick layer of pristine snow."},"insert":"snowy forest"},{"insert":", and a "},{"attributes":{"link":"A cute snowman wearing a carrot nose, coal eyes, and a colorful scarf, welcoming visitors with a cheerful vibe."},"insert":"snowman"},{"insert":" stands in the yard."}]}',
+        #             '',
+        #             12,
+        #             0.4,
+        #             0.3,
+        #             0.5,
+        #             3,
+        #             0,
+        #             None,
+        #         ],
+        #         [
+        #             '{"ops":[{"insert":"A "},{"attributes":{"link":"Happy Kung fu panda art, elder, asian art, volumetric lighting, dramatic scene, ultra detailed, realism, chinese"},"insert":"panda"},{"insert":" standing on a cliff by a waterfall, wildlife photography, photograph, high quality, wildlife, f 1.8, soft focus, 8k, national geographic, award - winning photograph by nick nichols"}]}',
+        #             '',
+        #             5,
+        #             0.3,
+        #             0,
+        #             0.1,
+        #             4,
+        #             0,
+        #             None,
+        #         ],
+        #     ]
+        #     gr.Examples(examples=footnote_examples,
+        #                 label='Footnote examples',
+        #                 inputs=[
+        #                     text_input,
+        #                     negative_prompt,
+        #                     num_segments,
+        #                     segment_threshold,
+        #                     inject_interval,
+        #                     inject_background,
+        #                     seed,
+        #                     color_guidance_weight,
+        #                     rich_text_input,
+        #                 ],
+        #                 outputs=[
+        #                     plaintext_result,
+        #                     richtext_result,
+        #                     segments,
+        #                     token_map,
+        #                 ],
+        #                 fn=generate,
+        #                 cache_examples=True,
+        #                 examples_per_page=20)
         with gr.Row():
+            color_examples = [
+                # [
+                #     '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#04a704"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
+                #     'lowres, had anatomy, bad hands, cropped, worst quality',
+                #     11,
+                #     0.5,
+                #     0.3,
+                #     0.3,
+                #     6,
+                #     0.5,
+                #     None,
+                # ],
+                # [
+                #     '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#ff5df1"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
+                #     'lowres, had anatomy, bad hands, cropped, worst quality',
+                #     11,
+                #     0.5,
+                #     0.3,
+                #     0.3,
+                #     6,
+                #     0.5,
+                #     None,
+                # ],
                 [
+                    '{"ops":[{"insert":"a beautifule girl with big eye, skin, and long "},{"attributes":{"color":"#999999"},"insert":"hair"},{"insert":", t-shirt, bursting with vivid color, intricate, elegant, highly detailed, photorealistic, digital painting,  artstation, illustration, concept art."}]}',
+                    'lowres, had anatomy, bad hands, cropped, worst quality',
+                    11,
+                    0.5,
                     0.3,
                     0.3,
+                    6,
                     0.5,
                     None,
                 ],
                 [
+                    '{"ops":[{"insert":"a Gothic "},{"attributes":{"color":"#FD6C9E"},"insert":"church"},{"insert":" in a the sunset with a beautiful landscape in the background."}]}',
                     '',
+                    10,
+                    0.5,
                     0.5,
                     0.3,
+                    7,
+                    0.5,
                     None,
                 ],
             ]
+            gr.Examples(examples=color_examples,
+                        label='Font color examples',
                         inputs=[
                             text_input,
                             negative_prompt,
                         fn=generate,
                         cache_examples=True,
                         examples_per_page=20)
         # with gr.Row():
+        #     style_examples = [
         #         [
+        #             '{"ops":[{"insert":"a beautiful"},{"attributes":{"font":"mirza"},"insert":" garden"},{"insert":" with a "},{"attributes":{"font":"roboto"},"insert":"snow mountain"},{"insert":" in the background"}]}',
+        #             '',
+        #             10,
+        #             0.6,
+        #             0,
+        #             0.4,
+        #             5,
+        #             0,
         #             None,
         #         ],
         #         [
+        #             '{"ops":[{"insert":"a night"},{"attributes":{"font":"slabo"},"insert":" sky"},{"insert":" filled with stars above a turbulent"},{"attributes":{"font":"roboto"},"insert":" sea"},{"insert":" with giant waves"}]}',
+        #             '',
+        #             2,
+        #             0.6,
+        #             0,
+        #             0,
         #             6,
         #             0.5,
         #             None,
         #         ],
+        #     ]
+        #     gr.Examples(examples=style_examples,
+        #                 label='Font style examples',
+        #                 inputs=[
+        #                     text_input,
+        #                     negative_prompt,
+        #                     num_segments,
+        #                     segment_threshold,
+        #                     inject_interval,
+        #                     inject_background,
+        #                     seed,
+        #                     color_guidance_weight,
+        #                     rich_text_input,
+        #                 ],
+        #                 outputs=[
+        #                     plaintext_result,
+        #                     richtext_result,
+        #                     segments,
+        #                     token_map,
+        #                 ],
+        #                 fn=generate,
+        #                 cache_examples=True,
+        #                 examples_per_page=20)
+        # with gr.Row():
+        #     size_examples = [
         #         [
+        #             '{"ops": [{"insert": "A pizza with "}, {"attributes": {"size": "60px"}, "insert": "pineapple"}, {"insert": " pepperoni, and mushroom on the top"}]}',
+        #             '',
+        #             5,
         #             0.3,
+        #             0,
+        #             0,
+        #             3,
+        #             1,
+        #             None,
+        #         ],
+        #         [
+        #             '{"ops": [{"insert": "A pizza with pineapple, "}, {"attributes": {"size": "60px"}, "insert": "pepperoni"}, {"insert": ", and mushroom on the top"}]}',
+        #             '',
+        #             5,
         #             0.3,
+        #             0,
+        #             0,
+        #             3,
+        #             1,
         #             None,
         #         ],
         #         [
+        #             '{"ops": [{"insert": "A pizza with pineapple, pepperoni, and "}, {"attributes": {"size": "60px"}, "insert": "mushroom"}, {"insert": " on the top"}]}',
         #             '',
+        #             5,
         #             0.3,
+        #             0,
+        #             0,
+        #             3,
+        #             1,
         #             None,
         #         ],
         #     ]
+        #     gr.Examples(examples=size_examples,
+        #                 label='Font size examples',
         #                 inputs=[
         #                     text_input,
         #                     negative_prompt,
         #                 fn=generate,
         #                 cache_examples=True,
         #                 examples_per_page=20)
         generate_button.click(fn=lambda: gr.update(visible=False), inputs=None, outputs=share_row, queue=False).then(
             fn=generate,
             inputs=[

models/region_diffusion_xl.py CHANGED Viewed

@@ -846,12 +846,16 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
                     # apply guidance
                     if use_guidance and t < text_format_dict['guidance_start_step']:
                         with torch.enable_grad():
                             if not latents.requires_grad:
                                 latents.requires_grad = True
                             # import ipdb;ipdb.set_trace()
-                            latents_0 = self.predict_x0(latents, noise_pred, t).to(dtype=latents.dtype)
                             latents_inp = latents_0 / self.vae.config.scaling_factor
-                            imgs = self.vae.decode(latents_inp.to(dtype=torch.float32)).sample
                             imgs = (imgs / 2 + 0.5).clamp(0, 1)
                             loss_total = 0.
                             for attn_map, rgb_val in zip(text_format_dict['color_obj_atten'], text_format_dict['target_RGB']):
@@ -863,6 +867,7 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
                             loss_total.backward()
                         latents = (
                             latents - latents.grad * text_format_dict['color_guidance_weight'] * text_format_dict['color_obj_atten_all']).detach().clone().to(dtype=prompt_embeds.dtype)
                     # apply background injection
                     if i == int(inject_background * len(self.scheduler.timesteps)) and inject_background > 0:
@@ -1023,7 +1028,7 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
             PyTorch Forward hook to save outputs at each forward pass.
             """
             if 'attn1' in name:
-                modified_args = (args[0], self.self_attention_maps_cur[name])
                 return modified_args
                 # cross attention injection
             # elif 'attn2' in name:
@@ -1039,7 +1044,7 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
             PyTorch Forward hook to save outputs at each forward pass.
             """
             modified_args = (args[0], args[1],
-                             self.self_attention_maps_cur[name])
             return modified_args
         for name, module in self.unet.named_modules():
             leaf_name = name.split('.')[-1]
@@ -1077,7 +1082,7 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
                 # activations[name] = out[1][1].detach()
             else:
                 assert out[1][1].shape[-1] != 77
-                activations[name] = out[1][1].detach()
         def save_resnet_activations(activations, name, module, inp, out):
             r"""
@@ -1087,7 +1092,7 @@ class RegionDiffusionXL(DiffusionPipeline, FromSingleFileMixin):
             # out[1] - residual hidden feature
             # import ipdb;ipdb.set_trace()
             assert out[1].shape[-1] == 64
-            activations[name] = out[1].detach()
         attention_dict = collections.defaultdict(list)
         for name, module in self.unet.named_modules():
             leaf_name = name.split('.')[-1]

                     # apply guidance
                     if use_guidance and t < text_format_dict['guidance_start_step']:
                         with torch.enable_grad():
+                            self.unet.to(device='cpu')
+                            torch.cuda.empty_cache()
                             if not latents.requires_grad:
                                 latents.requires_grad = True
                             # import ipdb;ipdb.set_trace()
+                            # latents_0 = self.predict_x0(latents, noise_pred, t).to(dtype=latents.dtype)
+                            latents_0 = self.predict_x0(latents, noise_pred, t).to(dtype=torch.bfloat16)
                             latents_inp = latents_0 / self.vae.config.scaling_factor
+                            imgs = self.vae.to(dtype=latents_inp.dtype).decode(latents_inp).sample
+                            # imgs = self.vae.decode(latents_inp.to(dtype=torch.float32)).sample
                             imgs = (imgs / 2 + 0.5).clamp(0, 1)
                             loss_total = 0.
                             for attn_map, rgb_val in zip(text_format_dict['color_obj_atten'], text_format_dict['target_RGB']):
                             loss_total.backward()
                         latents = (
                             latents - latents.grad * text_format_dict['color_guidance_weight'] * text_format_dict['color_obj_atten_all']).detach().clone().to(dtype=prompt_embeds.dtype)
+                        self.unet.to(device=latents.device)
                     # apply background injection
                     if i == int(inject_background * len(self.scheduler.timesteps)) and inject_background > 0:
             PyTorch Forward hook to save outputs at each forward pass.
             """
             if 'attn1' in name:
+                modified_args = (args[0], self.self_attention_maps_cur[name].to(args[0].device))
                 return modified_args
                 # cross attention injection
             # elif 'attn2' in name:
             PyTorch Forward hook to save outputs at each forward pass.
             """
             modified_args = (args[0], args[1],
+                             self.self_attention_maps_cur[name].to(args[0].device))
             return modified_args
         for name, module in self.unet.named_modules():
             leaf_name = name.split('.')[-1]
                 # activations[name] = out[1][1].detach()
             else:
                 assert out[1][1].shape[-1] != 77
+                activations[name] = out[1][1].detach().cpu()
         def save_resnet_activations(activations, name, module, inp, out):
             r"""
             # out[1] - residual hidden feature
             # import ipdb;ipdb.set_trace()
             assert out[1].shape[-1] == 64
+            activations[name] = out[1].detach().cpu()
         attention_dict = collections.defaultdict(list)
         for name, module in self.unet.named_modules():
             leaf_name = name.split('.')[-1]