Terra_Autralis_Ignota

Sleeping

App Files Files Community

jfforero commited on May 14

Commit

7a544e4

•

1 Parent(s): 0cc05bd

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -65

app.py CHANGED Viewed

@@ -10,12 +10,12 @@ from faster_whisper import WhisperModel
 # Load the emotion prediction model
 def load_emotion_model(model_path):
-    try:
-        model = load_model(model_path)
-        return model
-    except Exception as e:
-        print("Error loading emotion prediction model:", e)
-        return None
 model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
 model = load_emotion_model(model_path)
@@ -26,84 +26,87 @@ model2 = WhisperModel(model_size, device="cpu", compute_type="int8")
 # Function to transcribe audio
 def transcribe(wav_filepath):
-    segments, _ = model2.transcribe(wav_filepath, beam_size=5)
-    return "".join([segment.text for segment in segments])
 # Function to extract MFCC features from audio
 def extract_mfcc(wav_file_name):
-    try:
-        y, sr = librosa.load(wav_file_name)
-        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
-        return mfccs
-    except Exception as e:
-        print("Error extracting MFCC features:", e)
-        return None
 # Emotions dictionary
 emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
 # Function to predict emotion from audio
 def predict_emotion_from_audio(wav_filepath):
-    try:
-        test_point = extract_mfcc(wav_filepath)
-        if test_point is not None:
-            test_point = np.reshape(test_point, newshape=(1, 40, 1))
-            predictions = model.predict(test_point)
-            predicted_emotion_label = np.argmax(predictions[0]) + 1
-            return emotions[predicted_emotion_label]
-        else:
-            return "Error: Unable to extract features"
-    except Exception as e:
-        print("Error predicting emotion:", e)
-        return None
 api_key = os.getenv("DeepAI_api_key")
 # Function to generate an image using DeepAI Text to Image API
-def generate_image(emotion_prediction, transcribed_text, image_folder='huggingface'):
-    try:
-        url = "https://api.deepai.org/api/image-editor"
-        headers = {'api-key': api_key}
-        files = {'image': open('TAI_Images/TerraIncognita2.jpg', 'rb'), 'text': emotion_prediction + " " + transcribed_text}
-        response = requests.post(url, headers=headers, files=files)
-        response_data = response.json()
-        if 'output_url' in response_data:
-            image_url = response_data['output_url']
-            image_response = requests.get(image_url)
-            image = Image.open(BytesIO(image_response.content))
-            # Save image
-            image.save(os.path.join(image_folder, 'generated_image.jpg'))
-            return response_data['output_url']
-        else:
-            return None
-    except Exception as e:
-        print("Error generating image:", e)
-        return None
-# Function to get predictions
-def get_predictions(audio_input, huggingface_folder='huggingface'):
-    temp_audio_path = gradio.inputs.FilesMixin.get_file(audio_input, save_path='huggingface')
-    emotion_prediction = predict_emotion_from_audio(temp_audio_path)
-    transcribed_text = transcribe(temp_audio_path)
-    texto_imagen = emotion_prediction + transcribed_text
-    image = generate_image(api_key, texto_imagen, image_folder=huggingface_folder)
-    # Save audio
-    os.rename(temp_audio_path, os.path.join(huggingface_folder, 'input_audio.wav'))
-    return emotion_prediction, transcribed_text, image
 # Create the Gradio interface
 interface = gr.Interface(
-    fn=get_predictions,
-    inputs=gr.Audio(label="Input Audio", type="filepath"),
-    outputs=[
-        gr.Label("Acoustic Prediction", label="Acoustic Prediction"),
-        gr.Label("Transcribed Text", label="Transcribed Text"),
-        gr.Image(type='pil', label="Generated Image")
-    ],
-    title="Affective Virtual Environments",
-    description="Create an AVE using your voice."
 )
 interface.launch()

 # Load the emotion prediction model
 def load_emotion_model(model_path):
+    try:
+        model = load_model(model_path)
+        return model
+    except Exception as e:
+        print("Error loading emotion prediction model:", e)
+        return None
 model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
 model = load_emotion_model(model_path)
 # Function to transcribe audio
 def transcribe(wav_filepath):
+    segments, _ = model2.transcribe(wav_filepath, beam_size=5)
+    return "".join([segment.text for segment in segments])
 # Function to extract MFCC features from audio
 def extract_mfcc(wav_file_name):
+    try:
+        y, sr = librosa.load(wav_file_name)
+        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
+        return mfccs
+    except Exception as e:
+        print("Error extracting MFCC features:", e)
+        return None
 # Emotions dictionary
 emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}
 # Function to predict emotion from audio
 def predict_emotion_from_audio(wav_filepath):
+    try:
+        test_point = extract_mfcc(wav_filepath)
+        if test_point is not None:
+            test_point = np.reshape(test_point, newshape=(1, 40, 1))
+            predictions = model.predict(test_point)
+            predicted_emotion_label = np.argmax(predictions[0]) + 1
+            return emotions[predicted_emotion_label]
+        else:
+            return "Error: Unable to extract features"
+    except Exception as e:
+        print("Error predicting emotion:", e)
+        return None
 api_key = os.getenv("DeepAI_api_key")
 # Function to generate an image using DeepAI Text to Image API
+def generate_image(emotion_prediction, transcribed_text):
+    try:
+        url = "https://api.deepai.org/api/image-editor"
+        headers = {
+            'api-key': api_key
+        }
+        files = {
+            'image': open('TAI_Images/TerraIncognita2.jpg', 'rb'),  # Replace 'path_to_your_image.jpg' with the actual path to your image file
+            'text': "this"
+            # 'text': emotion_prediction + " " + transcribed_text
+        }
+        response = requests.post(url, headers=headers, files=files)
+        response_data = response.json()
+        if 'output_url' in response_data:
+            image_url = response_data['output_url']  # Fuera
+            image_response = requests.get(image_url) # Fuera
+            image = Image.open(BytesIO(image_response.content)) # Fuera
+            return response_data['output_url']
+        else:
+            return None
+    except Exception as e:
+        print("Error generating image:", e)
+        return None
+# Function to get predictions
+def get_predictions(audio_input):
+    emotion_prediction = predict_emotion_from_audio(audio_input)
+    transcribed_text = transcribe(audio_input)
+    texto_imagen = emotion_prediction + transcribed_text
+    image = generate_image(api_key, texto_imagen)
+    return emotion_prediction, transcribed_text, image
 # Create the Gradio interface
 interface = gr.Interface(
+    fn=get_predictions,
+    inputs=gr.Audio(label="Input Audio", type="filepath"),
+    outputs=[
+        gr.Label("Acoustic Prediction", label="Acoustic Prediction"),
+        gr.Label("Transcribed Text", label="Transcribed Text"),
+        gr.Image(type='pil', label="Generated Image")
+    ],
+    title="Affective Virtual Environments",
+    description="Create an AVE using your voice."
 )
 interface.launch()