{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"id": "71cc7020",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import *\n",
"from sklearn import metrics\n",
"from sklearn.mixture import GaussianMixture # 高斯混合模型\n",
"import os\n",
"import numpy as np\n",
"import librosa\n",
"import IPython.display as ipd\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5409aa96",
"metadata": {},
"outputs": [],
"source": [
"embs = []\n",
"wavnames = []\n",
"speaker_wav_root_path = \"dataset/nene\"\n",
"for idx, wavname in enumerate(os.listdir(speaker_wav_root_path)):\n",
" if wavname.endswith(\".wav\"):\n",
" embs.append(np.expand_dims(np.load(f\"{speaker_wav_root_path}/{wavname}.emo.npy\"), axis=0))\n",
" wavnames.append(wavname)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8295f174",
"metadata": {},
"outputs": [],
"source": [
"x = np.concatenate(embs,axis=0)\n",
"\n",
"# 聚类算法类的数量\n",
"n_clusters = 3\n",
"model = KMeans(n_clusters=n_clusters, random_state=10)\n",
"# 可以自行尝试各种不同的聚类算法\n",
"# model = Birch(n_clusters= n_clusters, threshold= 0.2)\n",
"# model = SpectralClustering(n_clusters=n_clusters)\n",
"# model = AgglomerativeClustering(n_clusters= n_clusters)\n",
"\n",
"y_predict = model.fit_predict(x)\n",
"\n",
"def disp(rootpath, wavname):\n",
" wav, sr =librosa.load(f\"{rootpath}/{wavname}\", 16000)\n",
" display(ipd.Audio(wav, rate=sr))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "25994586",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"类别: 0 本类中样本数量: 963\n",
"nen103_251.wav\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_27702/445880133.py:14: FutureWarning: Pass sr=16000 as keyword args. From version 0.10 passing these as positional arguments will result in an error\n",
" wav, sr =librosa.load(f\"{rootpath}/{wavname}\", 16000)\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen108_046.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen104_311.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen115_210.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"类别: 1 本类中样本数量: 2940\n",
"nen301_005.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen113_082.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen005_110.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen203_047.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"类别: 2 本类中样本数量: 1555\n",
"nen104_220.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen103_131.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen109_068.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"nen105_121.wav\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"classes=[[] for i in range(y_predict.max()+1)]\n",
"\n",
"for idx, wavname in enumerate(wavnames):\n",
" classes[y_predict[idx]].append(wavname)\n",
"\n",
"for i in range(y_predict.max()+1):\n",
" print(\"类别:\", i, \"本类中样本数量:\", len(classes[i]))\n",
" \"\"\"每一个类只预览4条音频\"\"\"\n",
" for j in range(4):\n",
" print(classes[i][j])\n",
" disp(speaker_wav_root_path, classes[i][j])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9731356",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}