Terra_Autralis_Ignota

Sleeping

App Files Files Community

Terra_Autralis_Ignota / app.py

jfforero

Update app.py

3cedbc6 verified about 2 months ago

raw

history blame contribute delete

No virus

3.86 kB

	import gradio as gr
	import numpy as np
	import librosa
	import requests
	from io import BytesIO
	from PIL import Image
	import os
	from tensorflow.keras.models import load_model
	from faster_whisper import WhisperModel

	# Load the emotion prediction model
	def load_emotion_model(model_path):
	try:
	model = load_model(model_path)
	return model
	except Exception as e:
	print("Error loading emotion prediction model:", e)
	return None

	model_path = 'mymodel_SER_LSTM_RAVDESS.h5'
	model = load_emotion_model(model_path)

	# Initialize WhisperModel
	model_size = "small"
	model2 = WhisperModel(model_size, device="cpu", compute_type="int8")

	# Function to transcribe audio
	def transcribe(wav_filepath):
	segments, _ = model2.transcribe(wav_filepath, beam_size=5)
	return "".join([segment.text for segment in segments])

	# Function to extract MFCC features from audio
	def extract_mfcc(wav_file_name):
	try:
	y, sr = librosa.load(wav_file_name)
	mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
	return mfccs
	except Exception as e:
	print("Error extracting MFCC features:", e)
	return None

	# Emotions dictionary
	emotions = {1: 'neutral', 2: 'calm', 3: 'happy', 4: 'sad', 5: 'angry', 6: 'fearful', 7: 'disgust', 8: 'surprised'}

	# Function to predict emotion from audio
	def predict_emotion_from_audio(wav_filepath):
	try:
	test_point = extract_mfcc(wav_filepath)
	if test_point is not None:
	test_point = np.reshape(test_point, newshape=(1, 40, 1))
	predictions = model.predict(test_point)
	predicted_emotion_label = np.argmax(predictions[0]) + 1
	return emotions[predicted_emotion_label]
	else:
	return "Error: Unable to extract features"
	except Exception as e:
	print("Error predicting emotion:", e)
	return None

	api_key = os.getenv("DeepAI_api_key")

	# Function to generate an image using DeepAI Text to Image API





	import random

	def generate_image(emotion_prediction, transcribed_text, output_resolution=(1024, 1024)):

	try:
	url = "https://api.deepai.org/api/image-editor"
	headers = {
	'api-key': api_key
	}
	# Select a random image file from TerraIncognita0.jpg to TerraIncognita9.jpg
	image_file_path = f'TAI_Images/TerraIncognita{random.randint(0, 9)}.jpg'
	files = {
	'image': open(image_file_path, 'rb'),
	'text': "Generate Patagonian Monsters' with a " + emotion_prediction + " attitude, representing the idea of: [ "+ transcribed_text + "]. Illustrate this using asemic writings in an old map style."

	}
	response = requests.post(url, headers=headers, files=files)
	response_data = response.json()
	if 'output_url' in response_data:
	return response_data['output_url']
	else:
	return None
	except Exception as e:
	print("Error generating image:", e)
	return None


	# Function to get predictions
	def get_predictions(audio_input):
	emotion_prediction = predict_emotion_from_audio(audio_input)
	transcribed_text = transcribe(audio_input)
	texto_imagen = emotion_prediction + transcribed_text
	image = generate_image(api_key, texto_imagen)
	return emotion_prediction, transcribed_text, image

	# Create the Gradio interface
	interface = gr.Interface(
	fn=get_predictions,
	inputs=gr.Audio(label="Input Audio", type="filepath", sources=["microphone"]),
	outputs=[
	gr.Label("Acoustic Prediction", label="Acoustic Prediction"),
	gr.Label("Transcribed Text", label="Transcribed Text"),
	gr.Image(type='pil', label="Generated Image")
	],
	title="Affective Virtual Environments",
	description="Create an AVE using your voice."
	)


	interface.launch()