Garvitj commited on
Commit
9b5af99
·
verified ·
1 Parent(s): 199605d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -15,7 +15,7 @@ from collections import Counter
15
  import os
16
 
17
  # Load necessary models and files
18
- text_model = load_model('model_for_text_emotion_updated(1).keras') # Load your text emotion model
19
  with open('tokenizer.json') as json_file:
20
  tokenizer = tokenizer_from_json(json.load(json_file)) # Tokenizer for text emotion
21
  audio_model = load_model('my_model.h5') # Load audio emotion model
@@ -44,6 +44,9 @@ def predict_text_emotion(text):
44
 
45
  # Extract audio features and predict emotion
46
  def extract_audio_features(audio_data, sample_rate):
 
 
 
47
  mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
48
  return np.expand_dims(mfcc, axis=0)
49
 
@@ -111,7 +114,12 @@ def transcribe_and_predict_video(video_path):
111
  image_emotion = process_video(video_path)
112
 
113
  # Predict emotion from audio (sound-based)
114
- sample_rate, audio_data = librosa.load(audio_file, sr=None)
 
 
 
 
 
115
  audio_emotion = predict_audio_emotion(audio_data, sample_rate)
116
 
117
  # Combine the detected emotions for final output (you could average them or choose the most common)
 
15
  import os
16
 
17
  # Load necessary models and files
18
+ text_model = load_model('model_for_text_emotion_updated(1).keras') # Load text emotion model
19
  with open('tokenizer.json') as json_file:
20
  tokenizer = tokenizer_from_json(json.load(json_file)) # Tokenizer for text emotion
21
  audio_model = load_model('my_model.h5') # Load audio emotion model
 
44
 
45
  # Extract audio features and predict emotion
46
  def extract_audio_features(audio_data, sample_rate):
47
+ if not isinstance(audio_data, np.ndarray):
48
+ audio_data = np.array(audio_data) # Ensure it's a NumPy array
49
+
50
  mfcc = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate).T, axis=0)
51
  return np.expand_dims(mfcc, axis=0)
52
 
 
114
  image_emotion = process_video(video_path)
115
 
116
  # Predict emotion from audio (sound-based)
117
+ audio_data, sample_rate = librosa.load(audio_file, sr=None)
118
+
119
+ # Debugging print statements
120
+ print(f"Type of audio_data: {type(audio_data)}") # Ensure audio_data is numpy.ndarray
121
+ print(f"Sample rate: {sample_rate}")
122
+
123
  audio_emotion = predict_audio_emotion(audio_data, sample_rate)
124
 
125
  # Combine the detected emotions for final output (you could average them or choose the most common)