LoRA-DreamBooth-Training-UI

Running

hysts commited on Jan 25, 2023

Commit

700bc3e

•

1 Parent(s): 3e6e590

Enable to remove GPU after model training is done

Files changed (4) hide show

app.py CHANGED Viewed

@@ -43,7 +43,7 @@ def show_warning(warning_text: str) -> gr.Blocks:
 pipe = InferencePipeline(HF_TOKEN)
-trainer = Trainer()
 with gr.Blocks(css='style.css') as demo:
     if os.getenv('IS_SHARED_UI'):

 pipe = InferencePipeline(HF_TOKEN)
+trainer = Trainer(HF_TOKEN)
 with gr.Blocks(css='style.css') as demo:
     if os.getenv('IS_SHARED_UI'):

app_training.py CHANGED Viewed

@@ -92,9 +92,10 @@ def create_training_demo(trainer: Trainer,
                     - **Note:** Due to [this issue](https://github.com/huggingface/accelerate/issues/944), currently, training will not terminate properly if you use W&B.
                     ''')
-        # TODO currently disabled
         remove_gpu_after_training = gr.Checkbox(
-            label='Remove GPU after training', value=False, interactive=False)
         run_button = gr.Button('Start Training')
         with gr.Box():
@@ -125,12 +126,14 @@ def create_training_demo(trainer: Trainer,
                              use_private_repo,
                              delete_existing_repo,
                              upload_to,
                          ],
                          outputs=output_message)
     return demo
 if __name__ == '__main__':
-    trainer = Trainer()
     demo = create_training_demo(trainer)
     demo.queue(max_size=1).launch(share=False)

                     - **Note:** Due to [this issue](https://github.com/huggingface/accelerate/issues/944), currently, training will not terminate properly if you use W&B.
                     ''')
         remove_gpu_after_training = gr.Checkbox(
+            label='Remove GPU after training',
+            value=False,
+            interactive=bool(os.getenv('SPACE_ID')))
         run_button = gr.Button('Start Training')
         with gr.Box():
                              use_private_repo,
                              delete_existing_repo,
                              upload_to,
+                             remove_gpu_after_training,
                          ],
                          outputs=output_message)
     return demo
 if __name__ == '__main__':
+    hf_token = os.getenv('HF_TOKEN')
+    trainer = Trainer(hf_token)
     demo = create_training_demo(trainer)
     demo.queue(max_size=1).launch(share=False)

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ datasets==2.8.0
 git+https://github.com/huggingface/diffusers@febaf863026bd014b7a14349336544fc109d0f57#egg=diffusers
 ftfy==6.1.1
 gradio==3.14.0
-huggingface-hub==0.11.1
 Pillow==9.4.0
 python-slugify==7.0.0
 tensorboard==2.11.2

 git+https://github.com/huggingface/diffusers@febaf863026bd014b7a14349336544fc109d0f57#egg=diffusers
 ftfy==6.1.1
 gradio==3.14.0
+git+https://github.com/huggingface/huggingface_hub@bdb9d06b5e67269d702860ca60e1cdb106a66c91#egg=huggingface-hub
 Pillow==9.4.0
 python-slugify==7.0.0
 tensorboard==2.11.2

trainer.py CHANGED Viewed

@@ -11,6 +11,7 @@ import gradio as gr
 import PIL.Image
 import slugify
 import torch
 from constants import UploadTarget
@@ -30,6 +31,10 @@ def pad_image(image: PIL.Image.Image) -> PIL.Image.Image:
 class Trainer:
     def prepare_dataset(self, instance_images: list, resolution: int,
                         instance_data_dir: pathlib.Path) -> None:
         shutil.rmtree(instance_data_dir, ignore_errors=True)
@@ -64,6 +69,7 @@ class Trainer:
         use_private_repo: bool,
         delete_existing_repo: bool,
         upload_to: str,
     ) -> str:
         if not torch.cuda.is_available():
             raise gr.Error('CUDA is not available.')
@@ -116,8 +122,7 @@ class Trainer:
         if use_wandb:
             command += ' --report_to wandb'
         if upload_to_hub:
-            hf_token = os.getenv('HF_TOKEN')
-            command += f' --push_to_hub --hub_token {hf_token}'
             if use_private_repo:
                 command += ' --private_repo'
             if delete_existing_repo:
@@ -127,6 +132,12 @@ class Trainer:
         subprocess.run(shlex.split(command))
         with open(output_dir / 'train.sh', 'w') as f:
             command_s = ' '.join(command.split())
             f.write(command_s)

 import PIL.Image
 import slugify
 import torch
+from huggingface_hub import HfApi
 from constants import UploadTarget
 class Trainer:
+    def __init__(self, hf_token: str | None = None):
+        self.hf_token = hf_token
+        self.api = HfApi(token=hf_token)
     def prepare_dataset(self, instance_images: list, resolution: int,
                         instance_data_dir: pathlib.Path) -> None:
         shutil.rmtree(instance_data_dir, ignore_errors=True)
         use_private_repo: bool,
         delete_existing_repo: bool,
         upload_to: str,
+        remove_gpu_after_training: bool,
     ) -> str:
         if not torch.cuda.is_available():
             raise gr.Error('CUDA is not available.')
         if use_wandb:
             command += ' --report_to wandb'
         if upload_to_hub:
+            command += f' --push_to_hub --hub_token {self.hf_token}'
             if use_private_repo:
                 command += ' --private_repo'
             if delete_existing_repo:
         subprocess.run(shlex.split(command))
+        if remove_gpu_after_training:
+            space_id = os.getenv('SPACE_ID')
+            if space_id:
+                self.api.request_space_hardware(repo_id=space_id,
+                                                hardware='cpu-basic')
         with open(output_dir / 'train.sh', 'w') as f:
             command_s = ' '.join(command.split())
             f.write(command_s)