{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Overview\n", "\n", "This notebook can be used with both a single or multi- speaker corpus and allows the interactive plotting of speaker embeddings linked to underlying audio (see instructions in the repo's speaker_embedding directory)\n", "\n", "Depending on the directory structure used for your corpus, you may need to adjust handling of **speaker_to_utter** and **locations**." ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 2, ======= "execution_count": null, >>>>>>> dev "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "import random\n", "import numpy as np\n", "import torch\n", "import umap\n", "\n", "from TTS.speaker_encoder.model import SpeakerEncoder\n", <<<<<<< HEAD "from TTS.utils.audio import AudioProcessor\n", "from TTS.utils.io import load_config\n", ======= "from TTS.tts.utils.audio import AudioProcessor\n", "from TTS.tts.utils.generic_utils import load_config\n", >>>>>>> dev "\n", "from bokeh.io import output_notebook, show\n", "from bokeh.plotting import figure\n", "from bokeh.models import HoverTool, ColumnDataSource, BoxZoomTool, ResetTool, OpenURL, TapTool\n", "from bokeh.transform import factor_cmap, factor_mark\n", "from bokeh.palettes import Category10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For larger sets of speakers, you can use **Category20**, but you need to change it in the **pal** variable too\n", "\n", "List of Bokeh palettes here: http://docs.bokeh.org/en/1.4.0/docs/reference/palettes.html\n", "\n", "**NB:** if you have problems with other palettes, first see https://stackoverflow.com/questions/48333820/why-do-some-bokeh-palettes-raise-a-valueerror-when-used-in-factor-cmap" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", " var JS_MIME_TYPE = 'application/javascript';\n", " var HTML_MIME_TYPE = 'text/html';\n", " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " var CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " var script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " var cell = handle.cell;\n", "\n", " var id = cell.output_area._bokeh_element_id;\n", " var server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id != null && id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd, {\n", " iopub: {\n", " output: function(msg) {\n", " var id = msg.content.text.trim();\n", " if (id in Bokeh.index) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[toinsert.length - 1]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", "\n", " \n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " var el = document.getElementById(\"1001\");\n", " if (el != null) {\n", " el.textContent = \"BokehJS is loading...\";\n", " }\n", " if (root.Bokeh !== undefined) {\n", " if (el != null) {\n", " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", " }\n", " } else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) {\n", " if (callback != null)\n", " callback();\n", " });\n", " } finally {\n", " delete root._bokeh_onload_callbacks\n", " }\n", " console.debug(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(css_urls, js_urls, callback) {\n", " if (css_urls == null) css_urls = [];\n", " if (js_urls == null) js_urls = [];\n", "\n", " root._bokeh_onload_callbacks.push(callback);\n", " if (root._bokeh_is_loading > 0) {\n", " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", "\n", " function on_load() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", " run_callbacks()\n", " }\n", " }\n", "\n", " function on_error() {\n", " console.error(\"failed to load \" + url);\n", " }\n", "\n", " for (var i = 0; i < css_urls.length; i++) {\n", " var url = css_urls[i];\n", " const element = document.createElement(\"link\");\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.rel = \"stylesheet\";\n", " element.type = \"text/css\";\n", " element.href = url;\n", " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", " document.body.appendChild(element);\n", " }\n", "\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var element = document.createElement('script');\n", " element.onload = on_load;\n", " element.onerror = on_error;\n", " element.async = false;\n", " element.src = url;\n", " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.head.appendChild(element);\n", " }\n", " };var element = document.getElementById(\"1001\");\n", " if (element == null) {\n", " console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " function inject_raw_css(css) {\n", " const element = document.createElement(\"style\");\n", " element.appendChild(document.createTextNode(css));\n", " document.body.appendChild(element);\n", " }\n", "\n", " \n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n", " var css_urls = [];\n", " \n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " function(Bokeh) {\n", " \n", " \n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if (root.Bokeh !== undefined || force === true) {\n", " \n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i].call(root, root.Bokeh);\n", " }\n", " if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (root._bokeh_is_loading === 0) {\n", " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(css_urls, js_urls, function() {\n", " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(window));" ], "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" }, "metadata": {}, "output_type": "display_data" } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You should also adjust all the path constants to point at the relevant locations for you locally" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", "MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n", ======= "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", >>>>>>> dev "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", "\n", "# My single speaker locations\n", "#EMBED_PATH = \"/home/neil/main/Projects/TTS3/embeddings/neil14/\"\n", "#AUDIO_PATH = \"/home/neil/data/Projects/NeilTTS/neil14/wavs/\"\n", "\n", "# My multi speaker locations\n", "EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n", <<<<<<< HEAD "AUDIO_PATH = \"datasets/LibriTTS/test-clean/\"" ======= "AUDIO_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360/\"" >>>>>>> dev ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "best_model.pth.tar\r\n", "config.json\r\n", "events.out.tfevents.1569660396.erogol-desktop\r\n" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "!ls -1 $MODEL_RUN_PATH" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " > Setting up Audio Processor...\n", " | > sample_rate:16000\n", " | > num_mels:40\n", " | > min_level_db:-100\n", " | > frame_shift_ms:12.5\n", " | > frame_length_ms:50\n", " | > ref_level_db:20\n", " | > fft_size:1024\n", " | > power:None\n", " | > preemphasis:0.98\n", " | > griffin_lim_iters:None\n", " | > signal_norm:True\n", " | > symmetric_norm:True\n", " | > mel_fmin:0\n", " | > mel_fmax:8000.0\n", " | > spec_gain:20.0\n", " | > stft_pad_mode:reflect\n", " | > max_norm:4.0\n", " | > clip_norm:True\n", " | > do_trim_silence:False\n", " | > trim_db:60\n", " | > do_sound_norm:False\n", " | > stats_path:None\n", " | > hop_length:200\n", " | > win_length:800\n" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "CONFIG = load_config(CONFIG_PATH)\n", "ap = AudioProcessor(**CONFIG['audio'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Bring in the embeddings created by **compute_embeddings.py**" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Embeddings found: 0\n" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n", "print(f'Embeddings found: {len(embed_files)}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check that we did indeed find an embedding" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "IndexError", "evalue": "list index out of range", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mIndexError\u001b[0m: list index out of range" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "embed_files[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Process the speakers\n", "\n", "Assumes count of **speaker_paths** corresponds to number of speakers (so a corpus in just one directory would be treated like a single speaker and the multiple directories of LibriTTS are treated as distinct speakers)" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Speaker count: 0\n" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n", "speaker_to_utter = {}\n", "for embed_file in embed_files:\n", " speaker_path = os.path.dirname(os.path.dirname(embed_file))\n", " try:\n", " speaker_to_utter[speaker_path].append(embed_file)\n", " except:\n", " speaker_to_utter[speaker_path]=[embed_file]\n", "print(f'Speaker count: {len(speaker_paths)}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set up the embeddings\n", "\n", "Adjust the number of speakers to select and the number of utterances from each speaker and they will be randomly sampled from the corpus" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 11, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "'a' cannot be empty unless no samples are taken", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken" ] } ], "source": [ "ttsembeds = []\n", ======= "execution_count": null, "metadata": {}, "outputs": [], "source": [ "embeds = []\n", >>>>>>> dev "labels = []\n", "locations = []\n", "\n", "# single speaker \n", "#num_speakers = 1\n", "#num_utters = 1000\n", "\n", "# multi speaker\n", "num_speakers = 10\n", "num_utters = 20\n", "\n", "\n", "speaker_idxs = np.random.choice(range(len(speaker_paths)), num_speakers, replace=False )\n", "\n", "for speaker_num, speaker_idx in enumerate(speaker_idxs):\n", " speaker_path = speaker_paths[speaker_idx]\n", " speakers_utter = speaker_to_utter[speaker_path]\n", " utter_idxs = np.random.randint(0, len(speakers_utter) , num_utters)\n", " for utter_idx in utter_idxs:\n", " embed_path = speaker_to_utter[speaker_path][utter_idx]\n", " embed = np.load(embed_path)\n", " embeds.append(embed)\n", " labels.append(str(speaker_num))\n", <<<<<<< HEAD " #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n", ======= " locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n", >>>>>>> dev "embeds = np.concatenate(embeds)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load embeddings with UMAP" ] }, { "cell_type": "code", <<<<<<< HEAD "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "module 'umap' has no attribute 'UMAP'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'" ] } ], ======= "execution_count": null, "metadata": {}, "outputs": [], >>>>>>> dev "source": [ "model = umap.UMAP()\n", "projection = model.fit_transform(embeds)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Interactively charting the data in Bokeh\n", "\n", "Set up various details for Bokeh to plot the data\n", "\n", "You can use the regular Bokeh [tools](http://docs.bokeh.org/en/1.4.0/docs/user_guide/tools.html?highlight=tools) to explore the data, with reset setting it back to normal\n", "\n", "Once you have started the local server (see cell below) you can then click on plotted points which will open a tab to play the audio for that point, enabling easy exploration of your corpus\n", "\n", "File location in the tooltip is given relative to **AUDIO_PATH**" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "source_wav_stems = ColumnDataSource(\n", " data=dict(\n", " x = projection.T[0].tolist(),\n", " y = projection.T[1].tolist(),\n", " desc=locations,\n", " label=labels\n", " )\n", " )\n", "\n", "hover = HoverTool(\n", " tooltips=[\n", " (\"file\", \"@desc\"),\n", " (\"speaker\", \"@label\"),\n", " ]\n", " )\n", "\n", "# optionally consider adding these to the tooltips if you want additional detail\n", "# for the coordinates: (\"(x,y)\", \"($x, $y)\"),\n", "# for the index of the embedding / wav file: (\"index\", \"$index\"),\n", "\n", "factors = list(set(labels))\n", "pal_size = max(len(factors), 3)\n", "pal = Category10[pal_size]\n", "\n", "p = figure(plot_width=600, plot_height=400, tools=[hover,BoxZoomTool(), ResetTool(), TapTool()])\n", "\n", "\n", "p.circle('x', 'y', source=source_wav_stems, color=factor_cmap('label', palette=pal, factors=factors),)\n", "\n", "url = \"http://localhost:8000/@desc\"\n", "taptool = p.select(type=TapTool)\n", "taptool.callback = OpenURL(url=url)\n", "\n", "show(p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Local server to serve wav files from corpus\n", "\n", "This is required so that when you click on a data point the hyperlink associated with it will be served the file locally.\n", "\n", "There are other ways to serve this if you prefer and you can also run the commands manually on the command line\n", "\n", "The server will continue to run until stopped. To stop it simply interupt the kernel (ie square button or under Kernel menu)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%cd $AUDIO_PATH\n", "%pwd\n", "!python -m http.server" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", <<<<<<< HEAD "version": "3.8.5" ======= "version": "3.7.4" >>>>>>> dev } }, "nbformat": 4, "nbformat_minor": 4 }