Spaces:

szili2011
/

sound-to-text-converter

Runtime error

szili2011 commited on Oct 27, 2024

Commit

1d0caf2

verified ·

1 Parent(s): e7fed6c

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import os
+import librosa
+import numpy as np
+import tensorflow as tf
+import gradio as gr
+# Load the pre-trained model
+model_path = 'sound_to_text_model.h5'
+model = tf.keras.models.load_model(model_path)
+# Function to extract features from audio
+def extract_features(file_path):
+    y_audio, sr = librosa.load(file_path, duration=2.0)
+    mfccs = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=13)
+    return np.mean(mfccs.T, axis=0)  # Average to create a fixed size
+# Function to predict text from audio
+def predict_sound_text(audio):
+    features = extract_features(audio.name)
+    prediction = model.predict(np.array([features]))
+    label = encoder.inverse_transform([np.argmax(prediction)])
+    return label[0]
+# Define Gradio interface
+interface = gr.Interface(fn=predict_sound_text,
+                         inputs=gr.Audio(source="upload", type="filepath"),
+                         outputs="text",
+                         title="Audio to Text Converter",
+                         description="Upload an audio file (MP3 format) and get the textual representation.")
+# Launch the interface
+if __name__ == "__main__":
+    interface.launch()