emotional / emotion_clustering.py
minerouser's picture
Upload 159 files
75d61ef
from sklearn.cluster import *
from sklearn import metrics
from sklearn.mixture import GaussianMixture # 高斯混合模型
import os
import numpy as np
import librosa
import IPython.display as ipd
from sklearn.cluster import KMeans
from sklearn.cluster import Birch
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering
# 读取音频文件
embs = []
wavnames = []
speaker_wav_root_path = "na"
for idx, wavname in enumerate(os.listdir(speaker_wav_root_path)):
if wavname.endswith(".wav"):
embs.append(np.expand_dims(np.load(f"{speaker_wav_root_path}/{wavname}.emo.npy"), axis=0))
wavnames.append(wavname)
x = np.concatenate(embs,axis=0)
# 聚类算法类的数量
n_clusters = 15
#model = KMeans(n_clusters=n_clusters, random_state=10)
# 可以自行尝试各种不同的聚类算法
model = Birch(n_clusters= n_clusters, threshold= 0.2)
#model = SpectralClustering(n_clusters=n_clusters)
#model = AgglomerativeClustering(n_clusters= n_clusters)
y_predict = model.fit_predict(x)
def disp(rootpath, wavname):
wav, sr =librosa.load(f"{rootpath}/{wavname}", sr=None)
ipd.display(ipd.Audio(wav, rate=sr))
classes=[[] for i in range(y_predict.max()+1)]
for idx, wavname in enumerate(wavnames):
classes[y_predict[idx]].append(wavname)
#write to file
for idx, c in enumerate(classes):
with open(f"{speaker_wav_root_path}/cluster{idx}.txt", "w") as f:
for wavname in c:
f.write(f"{wavname}\n")
disp(speaker_wav_root_path, wavname)