jfforero commited on
Commit
8339020
1 Parent(s): 13ed879

Upload 3 files

Browse files
Files changed (3) hide show
  1. app (1).py +103 -0
  2. mymodel_SER_LSTM_RAVDESS (1).h5 +3 -0
  3. requirements.txt +12 -0
app (1).py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import requests
5
+ from io import BytesIO
6
+ from PIL import Image
7
+ import os
8
+ from tensorflow.keras.models import load_model
9
+ from faster_whisper import WhisperModel
10
+
11
+ # Load the emotion prediction model
12
+ def load_emotion_model(model_path):
13
+ try:
14
+ model = load_model(model_path)
15
+ return model
16
+ except Exception as e:
17
+ print("Error loading emotion prediction model:", e)
18
+ return None
19
+
20
+ model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
21
+ model = load_emotion_model(model_path)
22
+
23
+ # Initialize WhisperModel
24
+ model_size = "small"
25
+ model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
26
+
27
+ # Function to transcribe audio
28
+ def transcribe(wav_filepath):
29
+ segments, _ = model2.transcribe(wav_filepath, beam_size=5)
30
+ return "".join([segment.text for segment in segments])
31
+
32
+ # Function to extract MFCC features from audio
33
+ def extract_mfcc(wav_file_name):
34
+ try:
35
+ y, sr = librosa.load(wav_file_name)
36
+ mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
37
+ return mfccs
38
+ except Exception as e:
39
+ print("Error extracting MFCC features:", e)
40
+ return None
41
+
42
+ # Emotions dictionary
43
+ emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
44
+
45
+ # Function to predict emotion from audio
46
+ def predict_emotion_from_audio(wav_filepath):
47
+ try:
48
+ test_point = extract_mfcc(wav_filepath)
49
+ if test_point is not None:
50
+ test_point = np.reshape(test_point, newshape=(1, 40, 1))
51
+ predictions = model.predict(test_point)
52
+ predicted_emotion_label = np.argmax(predictions[0]) + 1
53
+ return emotions[predicted_emotion_label]
54
+ else:
55
+ return "Error: Unable to extract features"
56
+ except Exception as e:
57
+ print("Error predicting emotion:", e)
58
+ return None
59
+
60
+ api_key = os.getenv("DeepAI_api_key")
61
+
62
+ # Function to generate an image using DeepAI Text to Image API
63
+ def generate_image(api_key, text):
64
+ url = "https://api.deepai.org/api/text2img"
65
+ headers = {'api-key': api_key}
66
+ response = requests.post(
67
+ url,
68
+ data={'text': text},
69
+ headers=headers
70
+ )
71
+ response_data = response.json()
72
+ if 'output_url' in response_data:
73
+ image_url = response_data['output_url']
74
+ image_response = requests.get(image_url)
75
+ image = Image.open(BytesIO(image_response.content))
76
+ return image
77
+ else:
78
+ return None
79
+
80
+
81
+ # Function to get predictions
82
+ def get_predictions(audio_input):
83
+ emotion_prediction = predict_emotion_from_audio(audio_input)
84
+ transcribed_text = transcribe(audio_input)
85
+ texto_imagen = emotion_prediction + transcribed_text
86
+ image = generate_image(api_key, texto_imagen)
87
+ return emotion_prediction, transcribed_text, image
88
+
89
+ # Create the Gradio interface
90
+ interface = gr.Interface(
91
+ fn=get_predictions,
92
+ inputs=gr.Audio(label="Input Audio", type="filepath"),
93
+ outputs=[
94
+ gr.Label("Acoustic Prediction", label="Acoustic Prediction"),
95
+ gr.Label("Transcribed Text", label="Transcribed Text"),
96
+ gr.Image(type='pil', label="Generated Image")
97
+ ],
98
+ title="Affective Virtual Environments",
99
+ description="Create an AVE using your voice."
100
+ )
101
+
102
+
103
+ interface.launch()
mymodel_SER_LSTM_RAVDESS (1).h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad725c49cec0f25f17e1c798f35d3b3e486ffdf2cf97497f2beb99805dc6c8f
3
+ size 976728
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ kaleido
2
+ numpy
3
+ tensorflow==2.12.0
4
+ gradio
5
+ transformers
6
+ tf-keras
7
+ librosa
8
+ vaderSentiment
9
+ requests
10
+ torch
11
+ sentencepiece
12
+ faster_whisper