# -*- coding: utf-8 -*- """p2_Stable_Diffusion_ProductSnapAI_Training_and_Inference-Martin_Valen.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1ZVSof0szrYoCO_lPNzP9ctTP_sZrUp1G ##Part 2: Fine-tuning Stable Diffusion To train stable diffusion on our images, we can use a technique called fine-tuning, which involves taking a pre-trained stable diffusion model and training it further on a new dataset. In this way, we can leverage the knowledge already learned by the pre-trained model and adapt it to the new dataset, allowing us to achieve better performance with less training time. In this context, we will be fine-tuning a stable diffusion model on our own images, which will enable us to generate new images that are similar in style and content to our original dataset. Once we have fine-tuned the model, we will be able to use it for inference, generating new images on demand. """ !cd /content/ !git clone https://github.com/huggingface/diffusers.git !pip install ./diffusers !pip install -U -r /content/diffusers/examples/text_to_image/requirements.txt """Let's configure HuggingFace Accelerate, a platform for allowing us to automatically configure our system to be able to run our training script. You can learn more about Accelerate [here](https://huggingface.co/docs/accelerate/index).""" !nvidia-smi !accelerate config default --mixed_precision fp16 """Time to configure our environment variables. For this, we want to tell the script our model name, dataset name, and where we would like it to output the model. We will be automatically pushing the model directly to HuggingFace Hub which will require us to login to our HuggingFace account using the token provided through your HuggingFace account settings.""" import os os.environ['MODEL_NAME'] = f'CompVis/stable-diffusion-v1-2' os.environ['DATASET_NAME'] = f'Ali-fb/martin_valen_dataset' os.environ['OUTPUT_DIR'] = f'sd_martin_valen-model-v1-2_400_demo' from huggingface_hub import notebook_login notebook_login() """Run our training script using HuggingFace accelerate. We'll be inputing our dataset and model with 400 steps (selected) and 134 epochs (default). The model will then be automatically pushed to the hub.""" !accelerate launch diffusers/examples/text_to_image/train_text_to_image.py \ --pretrained_model_name_or_path=$MODEL_NAME \ --dataset_name=$DATASET_NAME \ --use_ema \ --resolution=512 --center_crop --random_flip \ --train_batch_size=1 \ --gradient_accumulation_steps=4 \ --gradient_checkpointing \ --mixed_precision="fp16" \ --max_train_steps=400 \ --learning_rate=1e-05 \ --max_grad_norm=1 \ --push_to_hub \ --checkpointing_steps=100000 \ --lr_scheduler="constant" \ --lr_warmup_steps=0 \ --output_dir=$OUTPUT_DIR # Stable Diffusion V1 from diffusers import StableDiffusionPipeline import torch from PIL import Image model_path = "./sd_martin_valen-model-v1-2_400_demo" pipe = StableDiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16) pipe.to("cuda") # Run inference using ChatGPT prompts to acquire 4 image panels image1 = pipe(prompt="black hoodie with a front half zipper by martin valen").images[0] image1.save("ProductSnapAI_panel_1.png") image2 = pipe(prompt="white hoodie with a blue design by martin valen").images[0] image2.save("ProductSnapAI_panel_2.png") image3 = pipe(prompt="stripped hoodie by martin valen").images[0] image3.save("ProductSnapAI_panel_3.png") image4 = pipe(prompt="camouflage hoodie by martin valen").images[0] image4.save("ProductSnapAI_panel_4.png") # Image grid helper function from HuggingFace def image_grid(imgs, rows, cols): assert len(imgs) == rows*cols w, h = imgs[0].size grid = Image.new('RGB', size=(cols*w, rows*h)) grid_w, grid_h = grid.size for i, img in enumerate(imgs): grid.paste(img, box=(i%cols*w, i//cols*h)) return grid all_images = [image1, image2, image3, image4] grid = image_grid(all_images, rows=1, cols=4) grid