diff --git "a/notebooks/test-model.ipynb" "b/notebooks/test-model.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/test-model.ipynb" @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "fd262b00", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "sys.path.insert(0, os.path.dirname(os.path.abspath(\"\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d2253762", + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "from src.mel import Mel\n", + "from PIL import ImageOps, Image\n", + "from IPython.display import Audio\n", + "from datasets import load_from_disk\n", + "from diffusers import DDPMPipeline, DDIMPipeline, PNDMPipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "293dd2c7", + "metadata": {}, + "outputs": [], + "source": [ + "mel = Mel(x_res=64, y_res=64, hop_length=1024)" + ] + }, + { + "cell_type": "markdown", + "id": "5bdb2648", + "metadata": {}, + "source": [ + "### Run model inference to generate Mel spectrogram" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "aac92f90", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9fa5515ab1984c45bf459e9dfa12c3b9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1000 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "audio = mel.image_to_audio(ImageOps.grayscale(image))\n", + "Audio(data=audio, rate=mel.get_sample_rate())" + ] + }, + { + "cell_type": "markdown", + "id": "10805113", + "metadata": {}, + "source": [ + "### Compare results with random sample from training set" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "7a366813", + "metadata": {}, + "outputs": [], + "source": [ + "ds = load_from_disk('../data-64')" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "55a29505", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "audio = mel.image_to_audio(random.choice(ds['train'])['image'])\n", + "Audio(data=audio, rate=mel.get_sample_rate())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "afb1f699", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "huggingface", + "language": "python", + "name": "huggingface" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}