Terra_Autralis_Ignota

Running

App Files Files Community

jfforero commited on Apr 16, 2024

Commit

1d6f96a

verified ·

1 Parent(s): e2ed0ec

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -1

app.py CHANGED Viewed

@@ -1,3 +1,12 @@
 p = pipeline("automatic-speech-recognition")
 from tensorflow.keras.models import load_model
@@ -46,4 +55,62 @@ def transcribe(audio, state=""):
     time.sleep(3)
     text = p(audio)["text"]
     text = sentiment_vader(text)
-    return text

+import gradio as gr
+import tensorflow as tf
+import numpy as np
+import librosa
+import time
+from transformers import pipeline
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 p = pipeline("automatic-speech-recognition")
 from tensorflow.keras.models import load_model
     time.sleep(3)
     text = p(audio)["text"]
     text = sentiment_vader(text)
+    return text
+# Define functions for acoustic and semantic predictions (predict_emotion_from_audio and transcribe)
+# Create a combined function that calls both models
+def get_predictions(audio_input):
+    # Perform transcription to get the text
+    transcribed_text = transcribe(audio_input)
+    # Define the API key for DeepAI Text to Image API
+    api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
+    # Generate the image with the transcribed text using DeepAI Text to Image API
+    image = generate_image(api_key, transcribed_text)
+    # Get emotion prediction from audio
+    emotion_prediction = predict_emotion_from_audio(audio_input)
+    return [emotion_prediction, transcribed_text, image]
+# Define a function to generate an image using DeepAI Text to Image API
+def generate_image(api_key, text):
+    url = "https://api.deepai.org/api/text2img"
+    headers = {'api-key': api_key}
+    response = requests.post(
+        url,
+        data={
+            'text': text,
+        },
+        headers=headers
+    )
+    response_data = response.json()
+    if 'output_url' in response_data:
+        image_url = response_data['output_url']
+        image_response = requests.get(image_url)
+        image = Image.open(BytesIO(image_response.content))
+        return image
+    else:
+        return None
+# Create the Gradio interface for acoustic and semantic predictions
+with gr.Blocks() as interface:
+    gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
+    with gr.Tabs():
+        with gr.Tab("Acoustic and Semantic Predictions"):
+            with gr.Row():
+                input_audio = gr.Audio(label="Input Audio", type="filepath")
+                submit_button = gr.Button("Submit")
+            output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4), gr.Image(type='pil')]
+    # Set the function to be called when the button is clicked for acoustic and semantic predictions
+    submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
+    # Display transcribed text as a label
+    transcribed_text_label = gr.Label(label="Transcribed Text")
+# Launch the Gradio interface
+interface.launch()