Spaces:

multimodalart
/

vqgan

Paused

App Files Files Community

apolinario commited on May 5, 2022

Commit

ad1fd8e

•

1 Parent(s): ca37dd4

VQGAN attempt

Browse files

Files changed (3) hide show

app.py +34 -17
flavors.jpg +0 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ import subprocess
 import imageio
 from PIL import ImageFile, Image
 import time
 import hashlib
 from PIL.PngImagePlugin import PngImageFile, PngInfo
@@ -41,6 +41,7 @@ import urllib.request
 from random import randint
 from pathvalidate import sanitize_filename
 from huggingface_hub import hf_hub_download
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print("Using device:", device)
@@ -75,7 +76,7 @@ perceptor = (
     .requires_grad_(False)
     .to(device)
 )
-def run_all(user_input,num_steps, template, width,height):
     import random
     #if uploaded_file is not None:
         #uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
@@ -89,8 +90,7 @@ def run_all(user_input,num_steps, template, width,height):
         #pass
     #else:
     image_path = None
-    flavor = 'cumin'
     args2 = argparse.Namespace(
             prompt=user_input,
             seed=int(random.randint(0, 2147483647)),
@@ -103,7 +103,7 @@ def run_all(user_input,num_steps, template, width,height):
             template=template,
             vqgan_model='ImageNet 16384',
             seed_image=image_path,
-            image_file="progress.png",
             #frame_dir=intermediary_folder,
          )
     if args2.seed is not None:
@@ -1299,6 +1299,7 @@ def run_all(user_input,num_steps, template, width,height):
                 z_orig = z.tensor.clone()
             z.requires_grad_(True)
             # opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
             if self.normal_flip_optim == True:
                 if randint(1, 2) == 1:
                     opt = torch.optim.AdamW(
@@ -1430,8 +1431,7 @@ def run_all(user_input,num_steps, template, width,height):
             sys.stdout.write("Iteration {}".format(i) + "\n")
             sys.stdout.flush()
-            if i % args2.update == 0:
                 self.checkin(i, lossAll, x)
             loss = sum(lossAll)
@@ -1493,6 +1493,8 @@ def run_all(user_input,num_steps, template, width,height):
         def run(self, x):
             j = 0
             try:
                 before_start_time = time.perf_counter()
                 total_steps = int(args.max_iterations + args.mse_end) - 1
                 for _ in range(total_steps):
@@ -1516,9 +1518,9 @@ def run_all(user_input,num_steps, template, width,height):
                 import shutil
                 import os
-                image_data = Image.open(args2.image_file)
-                print(image_data)
-                return(image_data)
             except KeyboardInterrupt:
                 pass
@@ -2289,14 +2291,16 @@ def run_all(user_input,num_steps, template, width,height):
                 is_gumbel=is_gumbel,
                 gen_seed=gen_seed,
             )
             mh = ModelHost(args)
             x = 0
             #for x in range(batch_size):
             mh.setup_model(x)
-            last_iter = mh.run(x)
-            return(last_iter)
             #x = x + 1
         if zoom:
@@ -2322,18 +2326,31 @@ def run_all(user_input,num_steps, template, width,height):
 ##################### START GRADIO HERE ############################
 image = gr.outputs.Image(type="pil", label="Your result")
 iface = gr.Interface(
     fn=run_all,
     inputs=[
     gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
-    gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=300,maximum=300,minimum=1,step=1),
-    gr.inputs.Dropdown(label="Style",choices=["none","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"]),
     gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
     gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
     ],
     outputs=image,
-    title="Generate images from text with VQGAN+CLIP",
     #description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
     #article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
     )
-iface.launch(enable_queue=True)

 import imageio
 from PIL import ImageFile, Image
 import time
+import base64
 import hashlib
 from PIL.PngImagePlugin import PngImageFile, PngInfo
 from random import randint
 from pathvalidate import sanitize_filename
 from huggingface_hub import hf_hub_download
+import shortuuid
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print("Using device:", device)
     .requires_grad_(False)
     .to(device)
 )
+def run_all(user_input, num_steps, flavor, markdown, template, width,height):
     import random
     #if uploaded_file is not None:
         #uploaded_folder = f"{DefaultPaths.root_path}/uploaded"
         #pass
     #else:
     image_path = None
+    url = shortuuid.uuid()
     args2 = argparse.Namespace(
             prompt=user_input,
             seed=int(random.randint(0, 2147483647)),
             template=template,
             vqgan_model='ImageNet 16384',
             seed_image=image_path,
+            image_file=f"{url}.png",
             #frame_dir=intermediary_folder,
          )
     if args2.seed is not None:
                 z_orig = z.tensor.clone()
             z.requires_grad_(True)
             # opt = optim.AdamW(z.parameters(), lr=args.mse_step_size, weight_decay=0.00000000)
+            print("Step size inside:", args.step_size)
             if self.normal_flip_optim == True:
                 if randint(1, 2) == 1:
                     opt = torch.optim.AdamW(
             sys.stdout.write("Iteration {}".format(i) + "\n")
             sys.stdout.flush()
+            if i % (args2.iterations-2) == 0:
                 self.checkin(i, lossAll, x)
             loss = sum(lossAll)
         def run(self, x):
             j = 0
             try:
+                print("Step size: ", args.step_size)
+                print("Step MSE size: ", args.mse_step_size)
                 before_start_time = time.perf_counter()
                 total_steps = int(args.max_iterations + args.mse_end) - 1
                 for _ in range(total_steps):
                 import shutil
                 import os
+                #image_data = Image.open(args2.image_file)
+                #os.remove(args2.image_file)
+                #return(image_data)
             except KeyboardInterrupt:
                 pass
                 is_gumbel=is_gumbel,
                 gen_seed=gen_seed,
             )
             mh = ModelHost(args)
             x = 0
             #for x in range(batch_size):
             mh.setup_model(x)
+            mh.run(x)
+            image_data = Image.open(args2.image_file)
+            os.remove(args2.image_file)
+            return(image_data)
+            #return(last_iter)
             #x = x + 1
         if zoom:
 ##################### START GRADIO HERE ############################
 image = gr.outputs.Image(type="pil", label="Your result")
+def cvt_2_base64(file_name):
+    with open(file_name , "rb") as image_file :
+        data = base64.b64encode(image_file.read())
+    return data.decode('utf-8')
+base64image = "data:image/jpg;base64,"+cvt_2_base64('flavors.jpg')
+markdown = gr.Markdown("<img src='"+base64image+"' />")
+def test(raw_input):
+    pass
+setattr(markdown, "requires_permissions", False)
+setattr(markdown, "label", "Flavors")
+setattr(markdown, "preprocess", test)
 iface = gr.Interface(
     fn=run_all,
     inputs=[
     gr.inputs.Textbox(label="Prompt - try adding increments to your prompt such as 'oil on canvas', 'a painting', 'a book cover'",default="chalk pastel drawing of a dog wearing a funny hat"),
+    gr.inputs.Slider(label="Steps - more steps can increase quality but will take longer to generate",default=50,maximum=250,minimum=1,step=1),
+    gr.inputs.Dropdown(label="Flavor",choices=["ginger", "cumin", "holywater", "zynth", "wyvern", "aaron", "moth", "juu", "custom"]),
+    markdown,
+    gr.inputs.Dropdown(label="Style",choices=["Default","Balanced","Detailed","Consistent Creativity","Realistic","Smooth","Subtle MSE","Hyper Fast Results"],default="Hyper Fast Results"),
     gr.inputs.Radio(label="Width", choices=[32,64,128,256,512],default=256),
     gr.inputs.Radio(label="Height", choices=[32,64,128,256,512],default=256),
     ],
     outputs=image,
+    title="Generate images from text with VQGAN+CLIP (Hypertron v2)",
     #description="<div>By typing a prompt and pressing submit you can generate images based on this prompt. <a href='https://github.com/CompVis/latent-diffusion' target='_blank'>Latent Diffusion</a> is a text-to-image model created by <a href='https://github.com/CompVis' target='_blank'>CompVis</a>, trained on the <a href='https://laion.ai/laion-400-open-dataset/'>LAION-400M dataset.</a><br>This UI to the model was assembled by <a style='color: rgb(245, 158, 11);font-weight:bold' href='https://twitter.com/multimodalart' target='_blank'>@multimodalart</a></div>",
     #article="<h4 style='font-size: 110%;margin-top:.5em'>Biases acknowledgment</h4><div>Despite how impressive being able to turn text into image is, beware to the fact that this model may output content that reinforces or exarcbates societal biases. According to the <a href='https://arxiv.org/abs/2112.10752' target='_blank'>Latent Diffusion paper</a>:<i> \"Deep learning modules tend to reproduce or exacerbate biases that are already present in the data\"</i>. The model was trained on an unfiltered version the LAION-400M dataset, which scrapped non-curated image-text-pairs from the internet (the exception being the the removal of illegal content) and is meant to be used for research purposes, such as this one. <a href='https://laion.ai/laion-400-open-dataset/' target='_blank'>You can read more on LAION's website</a></div><h4 style='font-size: 110%;margin-top:1em'>Who owns the images produced by this demo?</h4><div>Definetly not me! Probably you do. I say probably because the Copyright discussion about AI generated art is ongoing. So <a href='https://www.theverge.com/2022/2/21/22944335/us-copyright-office-reject-ai-generated-art-recent-entrance-to-paradise' target='_blank'>it may be the case that everything produced here falls automatically into the public domain</a>. But in any case it is either yours or is in the public domain.</div>"
     )
+iface.launch()

flavors.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -25,4 +25,5 @@ pathvalidate
 stegano
 imgtag
 timm
-python-xmp-toolkit

 stegano
 imgtag
 timm
+python-xmp-toolkit
+shortuuid