{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "README.md \u001b[34mmodel_checkpoints\u001b[m\u001b[m recommender.py\n", "app.py \u001b[34mnotebooks\u001b[m\u001b[m requirements.txt\n", "\u001b[34mdata\u001b[m\u001b[m orchestrate_audio_data.py \u001b[34msrc\u001b[m\u001b[m\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import librosa\n", "import torch\n", "from src import laion_clap\n", "from glob import glob\n", "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "model = laion_clap.CLAP_Module(enable_fusion=False, amodel= 'HTSAT-base')\n", "model.load_ckpt(ckpt=\"music_audioset_epoch_15_esc_90.14.pt\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def load_music_file(file_name):\n", " audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n", " audio_data = audio_data.reshape(1, -1) # Make it (1,T) or (N,T)\n", " # audio_data = torch.from_numpy(int16_to_float32(float32_to_int16(audio_data))).float() # quantize before send it in to the model\n", " with torch.no_grad():\n", " audio_embed = model.get_audio_embedding_from_data(x = audio_data, use_tensor=False)\n", " return audio_embed\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "music_files = glob(\"/Users/berkayg/Codes/music-project/AudioCLIP/data/downloaded_tracks/*.wav\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "with open(\"/Users/berkayg/Codes/music-project/laion-clap-project/curate-me-a-playlist/data/vectors/song_names.pkl\", \"rb\") as reader:\n", " ls = pickle.load(reader)\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/sr/r72219hj06x_1xvw7hhd517h0000gn/T/ipykernel_18860/3009710654.py:2: UserWarning: PySoundFile failed. Trying audioread instead.\n", " audio_data, _ = librosa.load(file_name, sr=48000) # sample rate should be 48000\n", "/Users/berkayg/miniforge3/envs/playlist-curator/lib/python3.10/site-packages/librosa/core/audio.py:183: FutureWarning: librosa.core.audio.__audioread_load\n", "\tDeprecated as of librosa version 0.10.0.\n", "\tIt will be removed in librosa version 1.0.\n", " y, sr_native = __audioread_load(path, offset, duration, dtype)\n" ] } ], "source": [ "music_data = np.zeros((len(music_files), 512), dtype=np.float32)\n", "for m in range(music_data.shape[0]):\n", " music_data[m] = load_music_file(music_files[m])\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1, 512)\n" ] } ], "source": [ "text_data = [\"This audio is a romantic song\"] \n", "text_embed = model.get_text_embedding(text_data)\n", "print(text_embed.shape)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "song_names = [k.split(\"/\")[-1] for k in music_files]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([100, 1])\n" ] } ], "source": [ "with torch.no_grad():\n", " ranking = torch.tensor(music_data) @ torch.tensor(text_embed).t()\n", " ranking = ranking[:, 0].reshape(-1, 1)\n", "print(ranking.shape)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
This audio is a romantic song
Coldplay - Charlie Brown.wav0.400684
Sam Smith - I'm Not The Only One.wav0.373561
Pink Floyd - The Great Gig In The Sky - 2011 Remastered Version.wav0.371584
Christina Aguilera - You Lost Me.wav0.370390
Lana Del Rey - Yayo.wav0.370379
Queen - It's A Hard Life - Remastered 2011.wav0.348699
Teoman - Haziran.wav0.331220
John Lennon - Imagine - Remastered 2010.wav0.330397
Sleeping At Last - Mars.wav0.328770
Adele - Someone Like You.wav0.325650
Coldplay - What If.wav0.315717
Adamlar - Orda Ortada.wav0.306465
Eric Clapton - Autumn Leaves.wav0.305451
Premiata Forneria Marconi - Impressioni di settembre.wav0.295878
Guthrie Govan - Lost in Rio.wav0.284883
\n", "
" ], "text/plain": [ " This audio is a romantic song\n", "Coldplay - Charlie Brown.wav 0.400684\n", "Sam Smith - I'm Not The Only One.wav 0.373561\n", "Pink Floyd - The Great Gig In The Sky - 2011 Re... 0.371584\n", "Christina Aguilera - You Lost Me.wav 0.370390\n", "Lana Del Rey - Yayo.wav 0.370379\n", "Queen - It's A Hard Life - Remastered 2011.wav 0.348699\n", "Teoman - Haziran.wav 0.331220\n", "John Lennon - Imagine - Remastered 2010.wav 0.330397\n", "Sleeping At Last - Mars.wav 0.328770\n", "Adele - Someone Like You.wav 0.325650\n", "Coldplay - What If.wav 0.315717\n", "Adamlar - Orda Ortada.wav 0.306465\n", "Eric Clapton - Autumn Leaves.wav 0.305451\n", "Premiata Forneria Marconi - Impressioni di sett... 0.295878\n", "Guthrie Govan - Lost in Rio.wav 0.284883" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(ranking, columns=[text_data[0]], index=song_names).nlargest(15, text_data[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "playlist-curator", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }