ML-with-Rajibul commited on
Commit
9910ecc
1 Parent(s): 5f93bf1

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. MT.py +34 -0
  3. SER.py +115 -0
  4. X_train.pkl +3 -0
  5. Y_train.pkl +3 -0
  6. speech-emotion-recognition.hdf5 +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ speech-emotion-recognition.hdf5 filter=lfs diff=lfs merge=lfs -text
MT.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import spotipy
3
+ from spotipy.oauth2 import SpotifyClientCredentials
4
+
5
+ # Authenticate with Spotify API
6
+ client_id = '471e06ff0a13445095909029b18c265c'
7
+ client_secret = 'c0f56895d29f434cbeac4309d0b42d05'
8
+ client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
9
+ sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
10
+
11
+ def search_song_by_emotion(emotion):
12
+ # Define a mapping of emotions to search keywords
13
+ emotion_keywords = {
14
+ "neutral": ["raga des sarangi", "raga malkauns", "raga bhairav", "raga rageshri"],
15
+ "surprise": ["raag hameer", "raag kedar", "raga puriya"],
16
+ "fear": ["raag bilahari", "raag purvi", "raag shudh kalyan", "raag miya ki malhar"],
17
+ "sad": ["raag yaman sitar", "raga hameer","raga shyam kalyan"],
18
+ "angry": ["raag jaijaiwanti", "raag bhairavi", "raga puriya", "raag kafi"],
19
+ "happy": ["raga hamsadhwani sarod", "raga khamaj", "raga bhupali", "raga bahar"],
20
+ "disgust": ["raga khamaj", "raga bilaskhani todi", "raga shudh kalyan", "raga puriya"]
21
+ }
22
+
23
+ # Search for playlists based on the emotion keywords
24
+ keywords = emotion_keywords.get(emotion.lower(), [])
25
+ if keywords:
26
+ keywords = random.choice(keywords)
27
+ results = sp.search(q=f"track:{keywords}", type="track", limit=1)
28
+ tracks = results["tracks"]["items"]
29
+
30
+ # Extract song previews from the playlists
31
+ if tracks:
32
+ preview_url = tracks[0]["preview_url"]
33
+ return preview_url
34
+ return None
SER.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ import librosa
5
+
6
+ import sklearn
7
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
8
+ from sklearn.model_selection import train_test_split
9
+
10
+ import tensorflow as tf
11
+ from keras.models import load_model
12
+
13
+ import pickle
14
+
15
+ sample_rate = 22050
16
+
17
+ def noise(data):
18
+ noise_value = 0.015 * np.random.uniform() * np.amax(data)
19
+ data = data + noise_value * np.random.normal(size=data.shape[0])
20
+ return data
21
+
22
+ def stretch(data, rate=0.8):
23
+ return librosa.effects.time_stretch(data, rate=rate)
24
+
25
+ def shift(data):
26
+ shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
27
+ return np.roll(data, shift_range)
28
+
29
+ def pitch(data,sampling_rate,pitch_factor=0.7):
30
+ return librosa.effects.pitch_shift(data,sr=sampling_rate, n_steps=pitch_factor)
31
+
32
+ def extract_process(data):
33
+
34
+ sample_rate = 22050
35
+ output_result = np.array([])
36
+ mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0)
37
+ output_result = np.hstack((output_result,mean_zero))
38
+
39
+ stft_out = np.abs(librosa.stft(data))
40
+ chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft_out,sr=sample_rate).T,axis=0)
41
+ output_result = np.hstack((output_result,chroma_stft))
42
+
43
+ mfcc_out = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
44
+ output_result = np.hstack((output_result,mfcc_out))
45
+
46
+ root_mean_out = np.mean(librosa.feature.rms(y=data).T,axis=0)
47
+ output_result = np.hstack((output_result,root_mean_out))
48
+
49
+ mel_spectogram = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0)
50
+ output_result = np.hstack((output_result,mel_spectogram))
51
+
52
+ return output_result
53
+
54
+ def export_process(path):
55
+
56
+ data,sample_rate = librosa.load(path,duration=2.5,offset=1)
57
+
58
+ output_1 = extract_process(data)
59
+ result = np.array(output_1)
60
+
61
+ noise_out = noise(data)
62
+ output_2 = extract_process(noise_out)
63
+ result = np.vstack((result,output_2))
64
+
65
+ new_out = stretch(data)
66
+ strectch_pitch = pitch(new_out,sample_rate)
67
+ output_3 = extract_process(strectch_pitch)
68
+ result = np.vstack((result,output_3))
69
+
70
+ return result
71
+
72
+ # Load X_train from Google Drive
73
+ with open('X_train.pkl', 'rb') as f:
74
+ X_train = pickle.load(f)
75
+
76
+ # Load X_train from Google Drive
77
+ with open('Y_train.pkl', 'rb') as f:
78
+ Y_train = pickle.load(f)
79
+
80
+ Features = pd.DataFrame(X_train)
81
+ Features['labels'] = Y_train
82
+
83
+ X = Features.iloc[: ,:-1].values
84
+ Y = Features['labels'].values
85
+
86
+ encoder_label = OneHotEncoder()
87
+ Y = encoder_label.fit_transform(np.array(Y).reshape(-1,1)).toarray()
88
+
89
+ x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9, random_state=42, shuffle=True)
90
+
91
+ scaler_data = StandardScaler()
92
+ x_train = scaler_data.fit_transform(x_train)
93
+ x_test = scaler_data.transform(x_test)
94
+
95
+ def preprocess_audio(audio):
96
+ #data, sample_rate = librosa.load(audio, duration=2.5, offset=0.6)
97
+ features = export_process(audio)
98
+ features = scaler_data.transform(features)
99
+ return np.expand_dims(features, axis=2)
100
+
101
+ # Function to predict emotion from preprocessed audio
102
+ def predict_emotion(preprocessed_audio):
103
+ model = load_model('speech-emotion-recognition.hdf5')
104
+ prediction = model.predict(preprocessed_audio)
105
+ predicted_emotion = encoder_label.inverse_transform(prediction)
106
+ return predicted_emotion[0]
107
+
108
+ # Live emotion recognition
109
+ def live_emotion_recognition(audio_path):
110
+ # Preprocess live audio
111
+ preprocessed_audio = preprocess_audio(audio_path)
112
+ # Predict emotion
113
+ predicted_emotion = predict_emotion(preprocessed_audio)
114
+ #print("Predicted Emotion:", predicted_emotion)
115
+ return predicted_emotion[0]
X_train.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca0b38da847556205b8092899f4472153bf65fd3af95055d5df4c51720c44e2
3
+ size 11165240
Y_train.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade5a4397190642531e4d909cb18b54aa5f12cae4b587483f8190576ac01c8b0
3
+ size 48108
speech-emotion-recognition.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9efa37a959fbc465d3a96912383c28cef9a35c3d0cb2065abda86f58c13ee32
3
+ size 6747280