jfforero commited on
Commit
1d6f96a
1 Parent(s): e2ed0ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -1
app.py CHANGED
@@ -1,3 +1,12 @@
 
 
 
 
 
 
 
 
 
1
  p = pipeline("automatic-speech-recognition")
2
 
3
  from tensorflow.keras.models import load_model
@@ -46,4 +55,62 @@ def transcribe(audio, state=""):
46
  time.sleep(3)
47
  text = p(audio)["text"]
48
  text = sentiment_vader(text)
49
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import numpy as np
4
+ import librosa
5
+ import time
6
+ from transformers import pipeline
7
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
+
9
+
10
  p = pipeline("automatic-speech-recognition")
11
 
12
  from tensorflow.keras.models import load_model
 
55
  time.sleep(3)
56
  text = p(audio)["text"]
57
  text = sentiment_vader(text)
58
+ return text
59
+
60
+
61
+ # Define functions for acoustic and semantic predictions (predict_emotion_from_audio and transcribe)
62
+
63
+ # Create a combined function that calls both models
64
+ def get_predictions(audio_input):
65
+ # Perform transcription to get the text
66
+ transcribed_text = transcribe(audio_input)
67
+
68
+ # Define the API key for DeepAI Text to Image API
69
+ api_key = 'dee3e3f2-d5cf-474c-8072-bd6bea47e865'
70
+
71
+ # Generate the image with the transcribed text using DeepAI Text to Image API
72
+ image = generate_image(api_key, transcribed_text)
73
+
74
+ # Get emotion prediction from audio
75
+ emotion_prediction = predict_emotion_from_audio(audio_input)
76
+
77
+ return [emotion_prediction, transcribed_text, image]
78
+
79
+ # Define a function to generate an image using DeepAI Text to Image API
80
+ def generate_image(api_key, text):
81
+ url = "https://api.deepai.org/api/text2img"
82
+ headers = {'api-key': api_key}
83
+ response = requests.post(
84
+ url,
85
+ data={
86
+ 'text': text,
87
+ },
88
+ headers=headers
89
+ )
90
+ response_data = response.json()
91
+ if 'output_url' in response_data:
92
+ image_url = response_data['output_url']
93
+ image_response = requests.get(image_url)
94
+ image = Image.open(BytesIO(image_response.content))
95
+ return image
96
+ else:
97
+ return None
98
+
99
+ # Create the Gradio interface for acoustic and semantic predictions
100
+ with gr.Blocks() as interface:
101
+ gr.Markdown("Emotional Machines test: Load or Record an audio file to speech emotion analysis")
102
+ with gr.Tabs():
103
+ with gr.Tab("Acoustic and Semantic Predictions"):
104
+ with gr.Row():
105
+ input_audio = gr.Audio(label="Input Audio", type="filepath")
106
+ submit_button = gr.Button("Submit")
107
+ output_labels = [gr.Label(num_top_classes=8), gr.Label(num_top_classes=4), gr.Image(type='pil')]
108
+
109
+ # Set the function to be called when the button is clicked for acoustic and semantic predictions
110
+ submit_button.click(get_predictions, inputs=input_audio, outputs=output_labels)
111
+
112
+ # Display transcribed text as a label
113
+ transcribed_text_label = gr.Label(label="Transcribed Text")
114
+
115
+ # Launch the Gradio interface
116
+ interface.launch()