diff --git "a/Text_to_Video_with_Diffusers.ipynb" "b/Text_to_Video_with_Diffusers.ipynb" new file mode 100644--- /dev/null +++ "b/Text_to_Video_with_Diffusers.ipynb" @@ -0,0 +1,4118 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "9b58f36c14ac45c1b979c2570927ca02": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8ec76c0b50a841fda853cefbd3b1bc95", + "IPY_MODEL_aabfacaea2884346b62fe4f6fdee6395", + "IPY_MODEL_d1580a8272d64f31ae0214b165e72086" + ], + "layout": "IPY_MODEL_9f3d3401edc74d8da018016683bd9ecd" + } + }, + "8ec76c0b50a841fda853cefbd3b1bc95": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4f56a98c661a437b80a9d6b1bc493565", + "placeholder": "โ€‹", + "style": "IPY_MODEL_10ab1ed7686f49fdbbc7f49253936579", + "value": "100%" + } + }, + "aabfacaea2884346b62fe4f6fdee6395": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6229ebd6bac84fa1a52928e8a99bfa51", + "max": 25, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fbcfbfc4fb8c42098e10091777378e75", + "value": 25 + } + }, + "d1580a8272d64f31ae0214b165e72086": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_834306faae8346538a30f97dc2fe6207", + "placeholder": "โ€‹", + "style": "IPY_MODEL_130fb2d4ebb74b3ea3e507e3bcdd3f9c", + "value": " 25/25 [00:38<00:00, 1.54s/it]" + } + }, + "9f3d3401edc74d8da018016683bd9ecd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f56a98c661a437b80a9d6b1bc493565": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "10ab1ed7686f49fdbbc7f49253936579": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6229ebd6bac84fa1a52928e8a99bfa51": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fbcfbfc4fb8c42098e10091777378e75": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "834306faae8346538a30f97dc2fe6207": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "130fb2d4ebb74b3ea3e507e3bcdd3f9c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# ๐Ÿงจ Diffusers meets Video\n", + "\n", + "This colab showcases the new research text-to-video model by Alibaba and its integration with the diffusers library https://huggingface.co/damo-vilab/text-to-video-ms-1.7b " + ], + "metadata": { + "id": "OE8UPJPBdRKD" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Check your GPU!\n", + "!nvidia-smi" + ], + "metadata": { + "id": "R6fBIouojIYa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9bxNLonIVDQP" + }, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "!pip install torch==2.0.0 git+https://github.com/huggingface/diffusers transformers accelerate imageio[ffmpeg]" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Setup pipeline\n", + "import torch\n", + "from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler\n", + "from diffusers.utils import export_to_video\n", + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "\n", + "pipe = DiffusionPipeline.from_pretrained(\"damo-vilab/text-to-video-ms-1.7b\", torch_dtype=torch.float16, variant=\"fp16\")\n", + "pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n", + "pipe.enable_model_cpu_offload()\n", + "pipe.enable_vae_slicing()" + ], + "metadata": { + "id": "lNJpGpqaVZTH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Generate your video\n", + "prompt = 'Spiderman chatting with a llama' #@param {type:\"string\"}\n", + "video_duration_seconds = 3 #@param {type:\"integer\"}\n", + "num_frames = video_duration_seconds * 10\n", + "video_frames = pipe(prompt, negative_prompt=\"low quality\", num_inference_steps=25, num_frames=num_frames).frames\n", + "video_path = export_to_video(video_frames)" + ], + "metadata": { + "id": "zNGp_304V4ZG", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "9b58f36c14ac45c1b979c2570927ca02", + "8ec76c0b50a841fda853cefbd3b1bc95", + "aabfacaea2884346b62fe4f6fdee6395", + "d1580a8272d64f31ae0214b165e72086", + "9f3d3401edc74d8da018016683bd9ecd", + "4f56a98c661a437b80a9d6b1bc493565", + "10ab1ed7686f49fdbbc7f49253936579", + "6229ebd6bac84fa1a52928e8a99bfa51", + "fbcfbfc4fb8c42098e10091777378e75", + "834306faae8346538a30f97dc2fe6207", + "130fb2d4ebb74b3ea3e507e3bcdd3f9c" + ] + }, + "outputId": "8687a2f8-9498-4828-b63e-ae294000167a" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/25 [00:00" + ], + "text/html": [ + "" + ] + }, + "metadata": {}, + "execution_count": 28 + } + ] + } + ] +} \ No newline at end of file