Spaces:

teticio
/

audio-diffusion

Runtime error

File size: 6,159 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "5f6c6cc2",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/teticio/audio-diffusion/blob/master/notebooks/conditional_generation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1935544",
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    # are we running on Google Colab?\n",
    "    import google.colab\n",
    "    !git clone -q https://github.com/teticio/audio-diffusion.git\n",
    "    %cd audio-diffusion\n",
    "    %pip install -q -r requirements.txt\n",
    "except:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0e656c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "sys.path.insert(0, os.path.dirname(os.path.abspath(\"\")))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d448b299",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import urllib\n",
    "import requests\n",
    "from IPython.display import Audio\n",
    "from audiodiffusion import AudioDiffusion\n",
    "from audiodiffusion.audio_encoder import AudioEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1548971",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "generator = torch.Generator(device=device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "056f179c",
   "metadata": {},
   "outputs": [],
   "source": [
    "audio_diffusion = AudioDiffusion(model_id=\"teticio/conditional-latent-audio-diffusion-512\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4a08500",
   "metadata": {},
   "outputs": [],
   "source": [
    "audio_encoder = AudioEncoder.from_pretrained(\"teticio/audio-encoder\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "387550ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Uncomment for faster (but slightly lower quality) generation\n",
    "#from diffusers import DDIMScheduler\n",
    "#audio_diffusion.pipe.scheduler = DDIMScheduler()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9936a72f",
   "metadata": {},
   "source": [
    "## Download and encode preview track from Spotify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57a9b134",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get temporary API credentials\n",
    "credentials = requests.get(\n",
    "    \"https://open.spotify.com/get_access_token?reason=transport&productType=embed\"\n",
    ").json()\n",
    "headers = {\n",
    "    \"Accept\": \"application/json\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": \"Bearer \" + credentials[\"accessToken\"]\n",
    "}\n",
    "\n",
    "# Search for tracks\n",
    "search_string = input(\"Search: \")\n",
    "response = requests.get(\n",
    "    f\"https://api.spotify.com/v1/search?q={urllib.parse.quote(search_string)}&type=track\",\n",
    "    headers=headers).json()\n",
    "\n",
    "# List results\n",
    "for _, track in enumerate(response[\"tracks\"][\"items\"]):\n",
    "    print(f\"{_ + 1}. {track['artists'][0]['name']} - {track['name']}\")\n",
    "selection = input(\"Select a track: \")\n",
    "\n",
    "# Download and encode selection\n",
    "r = requests.get(response[\"tracks\"][\"items\"][int(selection) -\n",
    "                                             1][\"preview_url\"],\n",
    "                 stream=True)\n",
    "with open(\"temp.mp3\", \"wb\") as f:\n",
    "    for chunk in r:\n",
    "        f.write(chunk)\n",
    "encoding = torch.unsqueeze(audio_encoder.encode([\"temp.mp3\"]),\n",
    "                           axis=1).to(device)\n",
    "os.remove(\"temp.mp3\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8af863f5",
   "metadata": {},
   "source": [
    "## Conditional Generation\n",
    "Bear in mind that the generative model can only generate music similar to that on which it was trained. The audio encoding will influence the generation within those limitations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f119ddd",
   "metadata": {},
   "outputs": [],
   "source": [
    "for _ in range(10):\n",
    "    seed = generator.seed()\n",
    "    print(f'Seed = {seed}')\n",
    "    generator.manual_seed(seed)\n",
    "    image, (sample_rate,\n",
    "            audio) = audio_diffusion.generate_spectrogram_and_audio(\n",
    "                generator=generator, encoding=encoding)\n",
    "    display(image)\n",
    "    display(Audio(audio, rate=sample_rate))\n",
    "    loop = AudioDiffusion.loop_it(audio, sample_rate)\n",
    "    if loop is not None:\n",
    "        display(Audio(loop, rate=sample_rate))\n",
    "    else:\n",
    "        print(\"Unable to determine loop points\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0bd18c0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "huggingface",
   "language": "python",
   "name": "huggingface"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}