{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QV8xk7HmMX-M", "outputId": "f92c1174-5e29-43fa-a54a-4dac3bfe6d59" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'diffusers'...\n", "remote: Enumerating objects: 52829, done.\u001b[K\n", "remote: Counting objects: 100% (1298/1298), done.\u001b[K\n", "remote: Compressing objects: 100% (852/852), done.\u001b[K\n", "remote: Total 52829 (delta 594), reused 966 (delta 418), pack-reused 51531\u001b[K\n", "Receiving objects: 100% (52829/52829), 38.59 MiB | 24.11 MiB/s, done.\n", "Resolving deltas: 100% (37517/37517), done.\n", "/content/diffusers\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n", " Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.3/92.3 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m24.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.9/63.9 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m170.1/170.1 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m536.6/536.6 kB\u001b[0m \u001b[31m30.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.0/42.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m280.0/280.0 kB\u001b[0m \u001b[31m28.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.4/183.4 kB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.0/74.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m46.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.6/44.6 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m756.0/756.0 kB\u001b[0m \u001b[31m46.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.2/61.2 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m14.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.4/53.4 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m202.9/202.9 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m257.9/257.9 kB\u001b[0m \u001b[31m30.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Building editable for diffusers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "ibis-framework 7.1.0 requires pyarrow<15,>=2, but you have pyarrow 15.0.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "!git clone https://github.com/Bhavay-2001/diffusers\n", "%cd diffusers\n", "!pip install -q -e \".[dev]\"" ] }, { "cell_type": "code", "source": [ "!pwd" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7tNQHp0MascO", "outputId": "0ac02733-6a0f-484f-fd1b-ee58370e5bd8" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/diffusers\n" ] } ] }, { "cell_type": "code", "source": [ "import os\n", "# /content/diffusers/src/diffusers/utils/hub_utils.py\n", "from diffusers.src.diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card" ], "metadata": { "id": "kmQMzKuIXFvS" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "def save_model_card(repo_id: str, image_logs: dict =None, base_model: str=None, repo_folder: str=None):\n", " img_str = \"\"\n", " for i, log in enumerate(image_logs):\n", " images = log[\"images\"]\n", " validation_prompt = log[\"validation_prompt\"]\n", " validation_image = log[\"validation_image\"]\n", " validation_image.save(os.path.join(repo_folder, \"image_control.png\"))\n", " img_str += f\"![img_{i}](./image_{i}.png)\\n\"\n", "\n", " model_description = f\"\"\"\n", " # Textual inversion text2image fine-tuning - {repo_id}\n", " These are textual inversion adaption weights for {base_model}. You can find some example images in the following. \\n\n", " {img_str}\n", " \"\"\"\n", "\n", " model_card = load_or_create_model_card(\n", " repo_id_or_path=repo_id,\n", " from_training=True,\n", " license=\"creativeml-openrail-m\",\n", " base_model=base_model,\n", " model_description=model_description,\n", " inference=True,\n", " )\n", "\n", " tags = [\"stable-diffusion-xl\", \"stable-diffusion-xl-diffusers\", \"text-to-image\", \"diffusers\", \"textual_inversion\"]\n", " model_card = populate_model_card(model_card, tags=tags)\n", "\n", " model_card.save(os.path.join(repo_folder, \"README.md\"))" ], "metadata": { "id": "LiA0ILIdVp91" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "from diffusers.src.diffusers.utils import load_image\n", "\n", "images = [\n", " load_image(\"https://huggingface.co/datasets/diffusers/docs-images/resolve/main/amused/A%20mushroom%20in%20%5BV%5D%20style.png\")\n", " for _ in range(3)\n", "]\n", "\n", "image_logs = [\n", " dict(\n", " images=[image],\n", " validation_prompt=\"validation_prompt\",\n", " validation_image=image,\n", " )\n", " for image in images\n", "]\n", "\n", "save_model_card(\n", " repo_id=\"Bhavay-2001/textual-inversion\",\n", " image_logs=image_logs,\n", " base_model=\"runwayml/stable-diffusion-v1-5\",\n", " repo_folder=\".\",\n", ")" ], "metadata": { "id": "5UN8yQmYXEYQ" }, "execution_count": 5, "outputs": [] }, { "cell_type": "code", "source": [ "!cat README.md" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "yzaVaH8qfacW", "outputId": "fed30f61-1e39-4d5d-93cf-c31e92bb5950" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "---\n", "license: creativeml-openrail-m\n", "library_name: diffusers\n", "tags:\n", "- stable-diffusion-xl\n", "- stable-diffusion-xl-diffusers\n", "- text-to-image\n", "- diffusers\n", "- textual_inversion\n", "inference: true\n", "base_model: runwayml/stable-diffusion-v1-5\n", "---\n", "\n", "\n", "\n", "\n", " # Textual inversion text2image fine-tuning - Bhavay-2001/textual-inversion\n", " These are textual inversion adaption weights for runwayml/stable-diffusion-v1-5. You can find some example images in the following. \n", "\n", " ![img_0](./image_0.png)\n", "![img_1](./image_1.png)\n", "![img_2](./image_2.png)\n", "\n", " \n", "\n", "## Intended uses & limitations\n", "\n", "#### How to use\n", "\n", "```python\n", "# TODO: add an example code snippet for running this diffusion pipeline\n", "```\n", "\n", "#### Limitations and bias\n", "\n", "[TODO: provide examples of latent issues and potential remediations]\n", "\n", "## Training details\n", "\n", "[TODO: describe the data used to train the model]" ] } ] } ] }