{ "cells": [ { "cell_type": "code", "execution_count": 14, "id": "71cc7020", "metadata": {}, "outputs": [], "source": [ "from sklearn.cluster import *\n", "from sklearn import metrics\n", "from sklearn.mixture import GaussianMixture # 高斯混合模型\n", "import os\n", "import numpy as np\n", "import librosa\n", "import IPython.display as ipd\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "5409aa96", "metadata": {}, "outputs": [], "source": [ "embs = []\n", "wavnames = []\n", "speaker_wav_root_path = \"dataset/nene\"\n", "for idx, wavname in enumerate(os.listdir(speaker_wav_root_path)):\n", " if wavname.endswith(\".wav\"):\n", " embs.append(np.expand_dims(np.load(f\"{speaker_wav_root_path}/{wavname}.emo.npy\"), axis=0))\n", " wavnames.append(wavname)" ] }, { "cell_type": "code", "execution_count": 17, "id": "8295f174", "metadata": {}, "outputs": [], "source": [ "x = np.concatenate(embs,axis=0)\n", "\n", "# 聚类算法类的数量\n", "n_clusters = 3\n", "model = KMeans(n_clusters=n_clusters, random_state=10)\n", "# 可以自行尝试各种不同的聚类算法\n", "# model = Birch(n_clusters= n_clusters, threshold= 0.2)\n", "# model = SpectralClustering(n_clusters=n_clusters)\n", "# model = AgglomerativeClustering(n_clusters= n_clusters)\n", "\n", "y_predict = model.fit_predict(x)\n", "\n", "def disp(rootpath, wavname):\n", " wav, sr =librosa.load(f\"{rootpath}/{wavname}\", 16000)\n", " display(ipd.Audio(wav, rate=sr))\n", " " ] }, { "cell_type": "code", "execution_count": 18, "id": "25994586", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "类别: 0 本类中样本数量: 963\n", "nen103_251.wav\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_27702/445880133.py:14: FutureWarning: Pass sr=16000 as keyword args. From version 0.10 passing these as positional arguments will result in an error\n", " wav, sr =librosa.load(f\"{rootpath}/{wavname}\", 16000)\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen108_046.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen104_311.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen115_210.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "类别: 1 本类中样本数量: 2940\n", "nen301_005.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen113_082.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen005_110.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen203_047.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "类别: 2 本类中样本数量: 1555\n", "nen104_220.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen103_131.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen109_068.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "nen105_121.wav\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "classes=[[] for i in range(y_predict.max()+1)]\n", "\n", "for idx, wavname in enumerate(wavnames):\n", " classes[y_predict[idx]].append(wavname)\n", "\n", "for i in range(y_predict.max()+1):\n", " print(\"类别:\", i, \"本类中样本数量:\", len(classes[i]))\n", " \"\"\"每一个类只预览4条音频\"\"\"\n", " for j in range(4):\n", " print(classes[i][j])\n", " disp(speaker_wav_root_path, classes[i][j])" ] }, { "cell_type": "code", "execution_count": null, "id": "b9731356", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }