Spaces:

berkaygkv
/

curate-me-a-playlist

Sleeping

File size: 9,929 Bytes

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "README.md                 \u001b[34mmodel_checkpoints\u001b[m\u001b[m         recommender.py\n",
      "app.py                    \u001b[34mnotebooks\u001b[m\u001b[m                 requirements.txt\n",
      "\u001b[34mdata\u001b[m\u001b[m                      orchestrate_audio_data.py \u001b[34msrc\u001b[m\u001b[m\n"
     ]
    }
   ],
   "source": [
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import librosa\n",
    "import torch\n",
    "from src import laion_clap\n",
    "from glob import glob\n",
    "import pandas as pd\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = laion_clap.CLAP_Module(enable_fusion=False, amodel= 'HTSAT-base')\n",
    "model.load_ckpt(ckpt=\"music_audioset_epoch_15_esc_90.14.pt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_music_file(file_name):\n",
    "    audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n",
    "    audio_data = audio_data.reshape(1, -1) # Make it (1,T) or (N,T)\n",
    "    # audio_data = torch.from_numpy(int16_to_float32(float32_to_int16(audio_data))).float() # quantize before send it in to the model\n",
    "    with torch.no_grad():\n",
    "        audio_embed = model.get_audio_embedding_from_data(x = audio_data, use_tensor=False)\n",
    "    return audio_embed\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "music_files = glob(\"/Users/berkayg/Codes/music-project/AudioCLIP/data/downloaded_tracks/*.wav\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open(\"/Users/berkayg/Codes/music-project/laion-clap-project/curate-me-a-playlist/data/vectors/song_names.pkl\", \"rb\") as reader:\n",
    "    ls = pickle.load(reader)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/sr/r72219hj06x_1xvw7hhd517h0000gn/T/ipykernel_18860/3009710654.py:2: UserWarning: PySoundFile failed. Trying audioread instead.\n",
      "  audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n",
      "/Users/berkayg/miniforge3/envs/playlist-curator/lib/python3.10/site-packages/librosa/core/audio.py:183: FutureWarning: librosa.core.audio.__audioread_load\n",
      "\tDeprecated as of librosa version 0.10.0.\n",
      "\tIt will be removed in librosa version 1.0.\n",
      "  y, sr_native = __audioread_load(path, offset, duration, dtype)\n"
     ]
    }
   ],
   "source": [
    "music_data = np.zeros((len(music_files), 512), dtype=np.float32)\n",
    "for m in range(music_data.shape[0]):\n",
    "    music_data[m] = load_music_file(music_files[m])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1, 512)\n"
     ]
    }
   ],
   "source": [
    "text_data = [\"This audio is a romantic song\"] \n",
    "text_embed = model.get_text_embedding(text_data)\n",
    "print(text_embed.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "song_names = [k.split(\"/\")[-1] for k in music_files]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([100, 1])\n"
     ]
    }
   ],
   "source": [
    "with torch.no_grad():\n",
    "    ranking = torch.tensor(music_data) @ torch.tensor(text_embed).t()\n",
    "    ranking = ranking[:, 0].reshape(-1, 1)\n",
    "print(ranking.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>This audio is a romantic song</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Coldplay - Charlie Brown.wav</th>\n",
       "      <td>0.400684</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sam Smith - I'm Not The Only One.wav</th>\n",
       "      <td>0.373561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pink Floyd - The Great Gig In The Sky - 2011 Remastered Version.wav</th>\n",
       "      <td>0.371584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Christina Aguilera - You Lost Me.wav</th>\n",
       "      <td>0.370390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Lana Del Rey - Yayo.wav</th>\n",
       "      <td>0.370379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Queen - It's A Hard Life - Remastered 2011.wav</th>\n",
       "      <td>0.348699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Teoman - Haziran.wav</th>\n",
       "      <td>0.331220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>John Lennon - Imagine - Remastered 2010.wav</th>\n",
       "      <td>0.330397</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sleeping At Last - Mars.wav</th>\n",
       "      <td>0.328770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Adele - Someone Like You.wav</th>\n",
       "      <td>0.325650</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Coldplay - What If.wav</th>\n",
       "      <td>0.315717</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Adamlar - Orda Ortada.wav</th>\n",
       "      <td>0.306465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Eric Clapton - Autumn Leaves.wav</th>\n",
       "      <td>0.305451</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Premiata Forneria Marconi - Impressioni di settembre.wav</th>\n",
       "      <td>0.295878</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Guthrie Govan - Lost in Rio.wav</th>\n",
       "      <td>0.284883</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                    This audio is a romantic song\n",
       "Coldplay - Charlie Brown.wav                                             0.400684\n",
       "Sam Smith - I'm Not The Only One.wav                                     0.373561\n",
       "Pink Floyd - The Great Gig In The Sky - 2011 Re...                       0.371584\n",
       "Christina Aguilera - You Lost Me.wav                                     0.370390\n",
       "Lana Del Rey - Yayo.wav                                                  0.370379\n",
       "Queen - It's A Hard Life - Remastered 2011.wav                           0.348699\n",
       "Teoman - Haziran.wav                                                     0.331220\n",
       "John Lennon - Imagine - Remastered 2010.wav                              0.330397\n",
       "Sleeping At Last - Mars.wav                                              0.328770\n",
       "Adele - Someone Like You.wav                                             0.325650\n",
       "Coldplay - What If.wav                                                   0.315717\n",
       "Adamlar - Orda Ortada.wav                                                0.306465\n",
       "Eric Clapton - Autumn Leaves.wav                                         0.305451\n",
       "Premiata Forneria Marconi - Impressioni di sett...                       0.295878\n",
       "Guthrie Govan - Lost in Rio.wav                                          0.284883"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(ranking, columns=[text_data[0]], index=song_names).nlargest(15, text_data[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "playlist-curator",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}