Spaces:

multimodalart
/

dreambooth-training

Running

App Files Files Community

multimodalart HF staff commited on Nov 21, 2022

Commit

666a605

1 Parent(s): c24dac7

Final features

Browse files

Files changed (1) hide show

app.py +119 -82

app.py CHANGED Viewed

@@ -6,14 +6,14 @@ import shutil
 from train_dreambooth import run_training
 from convertosd import convert
 from PIL import Image
 import torch
 css = '''
     .instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
     .arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
     #component-4, #component-3, #component-10{min-height: 0}
 '''
-shutil.unpack_archive("mix.zip", "mix")
 model_to_load = "multimodalart/sd-fine-tunable"
 maximum_concepts = 3
@@ -34,12 +34,13 @@ def swap_text(option):
 def count_files(*inputs):
     file_counter = 0
     for i, input in enumerate(inputs):
         if(i < maximum_concepts-1):
-            if(input):
-                files = inputs[i+(maximum_concepts*2)]
-                for j, tile_temp in enumerate(files):
-                    file_counter+= 1
     uses_custom = inputs[-1]
     type_of_thing = inputs[-4]
     if(uses_custom):
@@ -49,9 +50,13 @@ def count_files(*inputs):
             Training_Steps = file_counter*200*2
         else:
             Training_Steps = file_counter*200
-    return(gr.update(visible=True, value=f"You are going to train {file_counter} files for {Training_Steps} steps. This should take around {round(Training_Steps/1.5, 2)} seconds, or {round((Training_Steps/1.5)/3600, 2)}. The T4 GPU costs US$0.60 for 1h, so the estimated costs for this training run should be {round(((Training_Steps/1.5)/3600)*0.6, 2)}"))
-def train(*inputs):
     if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
     if os.path.exists("model.ckpt"): os.remove("model.ckpt")
     file_counter = 0
@@ -61,6 +66,8 @@ def train(*inputs):
                 os.makedirs('instance_images',exist_ok=True)
                 files = inputs[i+(maximum_concepts*2)]
                 prompt = inputs[i+maximum_concepts]
                 for j, file_temp in enumerate(files):
                     file = Image.open(file_temp.name)
                     width, height = file.size
@@ -84,64 +91,16 @@ def train(*inputs):
         Train_text_encoder_for = int(inputs[-2])
     else:
         Training_Steps = file_counter*200
-    if(type_of_thing == "person"):
-            class_data_dir = "mix"
-            Train_text_encoder_for=100
-            args_txt_encoder = argparse.Namespace(
-                image_captions_filename = True,
-                train_text_encoder = True,
-                pretrained_model_name_or_path=model_to_load,
-                instance_data_dir="instance_images",
-                class_data_dir=class_data_dir,
-                output_dir="output_model",
-                with_prior_preservation=True,
-                prior_loss_weight=1.0,
-                instance_prompt="",
-                seed=42,
-                resolution=512,
-                mixed_precision="fp16",
-                train_batch_size=1,
-                gradient_accumulation_steps=1,
-                gradient_checkpointing=True,
-                use_8bit_adam=True,
-                learning_rate=2e-6,
-                lr_scheduler="polynomial",
-                lr_warmup_steps=0,
-                max_train_steps=Training_Steps,
-                num_class_images=200
-            )
-            args_unet = argparse.Namespace(
-                image_captions_filename = True,
-                train_only_unet=True,
-                Session_dir="output_model",
-                save_starting_step=0,
-                save_n_steps=0,
-                pretrained_model_name_or_path=model_to_load,
-                instance_data_dir="instance_images",
-                output_dir="output_model",
-                instance_prompt="",
-                seed=42,
-                resolution=512,
-                mixed_precision="fp16",
-                train_batch_size=1,
-                gradient_accumulation_steps=1,
-                gradient_checkpointing=False,
-                use_8bit_adam=True,
-                learning_rate=2e-6,
-                lr_scheduler="polynomial",
-                lr_warmup_steps=0,
-                max_train_steps=Training_Steps
-            )
-            run_training(args_txt_encoder)
-            run_training(args_unet)
-    elif(type_of_thing == "object" or type_of_thing == "style"):
-            if(type_of_thing == "object"):
-                Train_text_encoder_for=30
-            elif(type_of_thing == "style"):
-                Train_text_encoder_for=15
-            class_data_dir = None
-            stptxt = int((Training_Steps*Train_text_encoder_for)/100)
-            args_general = argparse.Namespace(
                 image_captions_filename = True,
                 train_text_encoder = True,
                 stop_text_encoder_training = stptxt,
@@ -161,11 +120,11 @@ def train(*inputs):
                 lr_scheduler="polynomial",
                 lr_warmup_steps = 0,
                 max_train_steps=Training_Steps,
-            )
-            run_training(args_general)
     torch.cuda.empty_cache()
     #convert("output_model", "model.ckpt")
-    shutil.rmtree('instance_images')
     shutil.make_archive("diffusers_model", 'zip', "output_model")
     torch.cuda.empty_cache()
     return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
@@ -178,8 +137,80 @@ def generate(prompt):
     image = pipe(prompt).images[0]
     return(image)
-def push(path):
-    pass
 def convert_to_ckpt():
     convert("output_model", "model.ckpt")
@@ -200,7 +231,7 @@ with gr.Blocks(css=css) as demo:
             gr.HTML('''
                 <div class="gr-prose" style="max-width: 80%">
                 <h2>You have successfully cloned the Dreambooth Training Space</h2>
-                <p><a href="#">Now you can attribute a T4 GPU to it</a> (by going to the Settings tab) and run the training below. The GPU will be automatically unassigned after training is over. So you will be billed by the minute between when you activate the GPU and when it finishes training.</p>
                 </div>
             ''')
     gr.Markdown("# Dreambooth training")
@@ -258,32 +289,38 @@ with gr.Blocks(css=css) as demo:
-    with gr.Accordion("Advanced Settings", open=False):
-        swap_auto_calculated = gr.Checkbox(label="Use these advanced setting")
-        gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style`.")
         steps = gr.Number(label="How many steps", value=800)
         perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
     type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
     training_summary = gr.Textbox("", visible=False, label="Training Summary")
     for file in file_collection:
-        file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary, training_summary])
     train_btn = gr.Button("Start Training")
     with gr.Box(visible=False) as try_your_model:
-        gr.Markdown("Try your model")
         with gr.Row():
             prompt = gr.Textbox(label="Type your prompt")
-            result = gr.Image()
         generate_button = gr.Button("Generate Image")
     with gr.Box(visible=False) as push_to_hub:
-        gr.Markdown("Push to Hugging Face Hub")
-        model_repo_tag = gr.Textbox(label="Model name or URL", placeholder="username/model_name")
         push_button = gr.Button("Push to the Hub")
     result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
     convert_button = gr.Button("Convert to CKPT", visible=False)
     train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
-    generate_button.click(fn=generate, inputs=prompt, outputs=result)
-    push_button.click(fn=push, inputs=model_repo_tag, outputs=[])
     convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
 demo.launch()

 from train_dreambooth import run_training
 from convertosd import convert
 from PIL import Image
+from slugify import slugify
+import requests
 import torch
 css = '''
     .instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
     .arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
     #component-4, #component-3, #component-10{min-height: 0}
 '''
 model_to_load = "multimodalart/sd-fine-tunable"
 maximum_concepts = 3
 def count_files(*inputs):
     file_counter = 0
+    concept_counter = 0
     for i, input in enumerate(inputs):
         if(i < maximum_concepts-1):
+            files = inputs[i]
+            if(files):
+                concept_counter+=1
+                file_counter+=len(files)
     uses_custom = inputs[-1]
     type_of_thing = inputs[-4]
     if(uses_custom):
             Training_Steps = file_counter*200*2
         else:
             Training_Steps = file_counter*200
+    return(gr.update(visible=True, value=f"You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. This should take around {round(Training_Steps/1.5, 2)} seconds, or {round((Training_Steps/1.5)/3600, 2)} hours. As a reminder, the T4 GPU costs US$0.60 for 1h. Once training is over, don't forget to swap the hardware back to CPU."))
+def train(*inputs):
+    if "IS_SHARED_UI" in os.environ:
+        raise gr.Error("This Space only works in duplicated instances")
+    if os.path.exists("output_model"): shutil.rmtree('output_model')
+    if os.path.exists("instance_images"): shutil.rmtree('instance_images')
     if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
     if os.path.exists("model.ckpt"): os.remove("model.ckpt")
     file_counter = 0
                 os.makedirs('instance_images',exist_ok=True)
                 files = inputs[i+(maximum_concepts*2)]
                 prompt = inputs[i+maximum_concepts]
+                if(prompt == "" or prompt == None):
+                    raise gr.Error("You forgot to define your concept prompt")
                 for j, file_temp in enumerate(files):
                     file = Image.open(file_temp.name)
                     width, height = file.size
         Train_text_encoder_for = int(inputs[-2])
     else:
         Training_Steps = file_counter*200
+        if(type_of_thing == "object"):
+            Train_text_encoder_for=30
+        elif(type_of_thing == "person"):
+            Train_text_encoder_for=60
+        elif(type_of_thing == "style"):
+            Train_text_encoder_for=15
+    class_data_dir = None
+    stptxt = int((Training_Steps*Train_text_encoder_for)/100)
+    args_general = argparse.Namespace(
                 image_captions_filename = True,
                 train_text_encoder = True,
                 stop_text_encoder_training = stptxt,
                 lr_scheduler="polynomial",
                 lr_warmup_steps = 0,
                 max_train_steps=Training_Steps,
+    )
+    run_training(args_general)
     torch.cuda.empty_cache()
     #convert("output_model", "model.ckpt")
+    #shutil.rmtree('instance_images')
     shutil.make_archive("diffusers_model", 'zip', "output_model")
     torch.cuda.empty_cache()
     return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
     image = pipe(prompt).images[0]
     return(image)
+def push(model_name, where_to_upload, hf_token):
+    if(not os.path.exists("model.ckpt")):
+        convert("output_model", "model.ckpt")
+    from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
+    from huggingface_hub import create_repo
+    model_name_slug = slugify(model_name)
+    if(where_to_upload == "My personal profile"):
+        api = HfApi()
+        your_username = api.whoami(token=hf_token)["name"]
+        model_id = f"{your_username}/{model_name_slug}"
+    else:
+        model_id = f"sd-dreambooth-library/{model_name_slug}"
+        headers = {"Authorization" : f"Bearer: {hf_token}", "Content-Type": "application/json"}
+        response = requests.post("https://example.com/get-my-account-detail", headers=headers)
+    images_upload = os.listdir("instance_images")
+    image_string = ""
+    instance_prompt_list = []
+    previous_instance_prompt = ''
+    for i, image in enumerate(images_upload):
+        instance_prompt = image.split("_")[0]
+        if(instance_prompt != previous_instance_prompt):
+            title_instance_prompt_string = instance_prompt
+            instance_prompt_list.append(instance_prompt)
+        else:
+            title_instance_prompt_string = ''
+        previous_instance_prompt = instance_prompt
+        image_string = f'''
+    {title_instance_prompt_string}
+    {image_string}![{instance_prompt} {i}](https://huggingface.co/{model_name_slug}/resolve/main/sample_images/{image})
+        '''
+    readme_text = f'''---
+license: creativeml-openrail-m
+tags:
+- text-to-image
+---
+### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
+You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb)
+Sample pictures of this concept:
+{image_string}
+'''
+    #Save the readme to a file
+    readme_file = open("README.md", "w")
+    readme_file.write(readme_text)
+    readme_file.close()
+    #Save the token identifier to a file
+    text_file = open("token_identifier.txt", "w")
+    text_file.write(', '.join(instance_prompt_list))
+    text_file.close()
+    operations = [
+        CommitOperationAdd(path_in_repo="token_identifier.txt", path_or_fileobj="token_identifier.txt"),
+        CommitOperationAdd(path_in_repo="README.md", path_or_fileobj="README.md"),
+        CommitOperationAdd(path_in_repo=f"model.ckpt",path_or_fileobj="model.ckpt")
+    ]
+    api.create_commit(
+    repo_id=model_id,
+    operations=operations,
+    commit_message=f"Upload the model {model_name}",
+    token=hf_token
+    )
+    api.upload_folder(
+    folder_path="output_model",
+    repo_id=model_id,
+    token=hf_token
+    )
+    api.upload_folder(
+    folder_path="instance_images",
+    path_in_repo="concept_images",
+    repo_id=model_id,
+    token=hf_token
+    )
+    return [gr.update(visible=True, value=f"Successfully uploaded your model. Access it [here](https://huggingface.co/{model_id})"), gr.update(visible=True, value=["diffusers_model.zip", "model.ckpt"])]
 def convert_to_ckpt():
     convert("output_model", "model.ckpt")
             gr.HTML('''
                 <div class="gr-prose" style="max-width: 80%">
                 <h2>You have successfully cloned the Dreambooth Training Space</h2>
+                <p>If you haven't already, attribute a T4 GPU to it (via the Settings tab) and run the training below. You will be billed by the minute between when you activate the GPU until when you turn it off.</p>
                 </div>
             ''')
     gr.Markdown("# Dreambooth training")
+    with gr.Accordion("Custom Settings", open=False):
+        swap_auto_calculated = gr.Checkbox(label="Use custom settings")
+        gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 20. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and is fully trained for persons.")
         steps = gr.Number(label="How many steps", value=800)
         perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
     type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
     training_summary = gr.Textbox("", visible=False, label="Training Summary")
+    steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
+    perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
     for file in file_collection:
+        file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
     train_btn = gr.Button("Start Training")
     with gr.Box(visible=False) as try_your_model:
+        gr.Markdown("## Try your model")
         with gr.Row():
             prompt = gr.Textbox(label="Type your prompt")
+            result_image = gr.Image()
         generate_button = gr.Button("Generate Image")
     with gr.Box(visible=False) as push_to_hub:
+        gr.Markdown("## Push to Hugging Face Hub")
+        model_name = gr.Textbox(label="Name of your model", placeholder="Tarsila do Amaral Style")
+        where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to")
+        gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.")
+        hf_token = gr.Textbox(label="Hugging Face Write Token")
         push_button = gr.Button("Push to the Hub")
     result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
+    success_message_upload = gr.Markdown(visible=False)
     convert_button = gr.Button("Convert to CKPT", visible=False)
     train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
+    generate_button.click(fn=generate, inputs=prompt, outputs=result_image)
+    push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result])
     convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
 demo.launch()