Spaces:

keras-io
/

SpeakerRecognition

Runtime error

App Files Files Community

RobotJelly commited on Jun 13, 2022

Commit

4f0b2ef

•

1 Parent(s): afbe314

app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import shutil
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from pathlib import Path
+from IPython.display import display, Audio
+import numpy as np
+import tensorflow as tf
+import gradio as gr
+from huggingface_hub import from_pretrained_keras
+import cv2
+from IPython.display import Audio
+classes_names = ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela']
+labels = [0, 1, 2, 3, 4]
+class_labels = {0: 'Benjamin_Netanyau', 1: 'Jens_Stoltenberg', 2: 'Julia_Gillard', 3: 'Magaret_Tarcher', 4: 'Nelson_Mandela'}
+# Percentage of samples to use for validation
+# VALID_SPLIT = 0.1
+# Seed to use when shuffling the dataset and the noise
+# SHUFFLE_SEED = 43
+# The sampling rate to use.
+# This is the one used in all of the audio samples.
+# We will resample all of the noise to this sampling rate.
+# This will also be the output size of the audio wave samples
+# (since all samples are of 1 second long)
+SAMPLING_RATE = 16000
+# The factor to multiply the noise with according to:
+#   noisy_sample = sample + noise * prop * scale
+#      where prop = sample_amplitude / noise_amplitude
+# SCALE = 0.5
+# test_ds = paths_and_labels_to_dataset(valid_audio_paths, valid_labels)
+# test_ds = test_ds.shuffle(buffer_size=BATCH_SIZE * 8, seed=SHUFFLE_SEED).batch(
+#     BATCH_SIZE
+# )
+# test_ds = test_ds.map(lambda x, y: (add_noise(x, noises, scale=SCALE), y))
+model = from_pretrained_keras("keras-io/speaker-recognition")
+def path_to_audio(path):
+    """Reads and decodes an audio file."""
+    audio = tf.io.read_file(path)
+    audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)
+    return audio
+def audio_to_fft(audio):
+    # Since tf.signal.fft applies FFT on the innermost dimension,
+    # we need to squeeze the dimensions and then expand them again
+    # after FFT
+    audio = tf.squeeze(audio, axis=-1)
+    fft = tf.signal.fft(
+        tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64)
+    )
+    fft = tf.expand_dims(fft, axis=-1)
+    # print("audio.shape[1]", audio.shape)
+    # Return the absolute value of the first half of the FFT
+    # which represents the positive frequencies
+    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])
+actual_audio_path = '/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Benjamin_Netanyau/260.wav'
+# print(path_to_audio(actual_audio_path).shape)
+# print(actual_audio_path.shape)
+def predict(actual_audio_path, actual_label):
+  path_of_actual_audio = path_to_audio(actual_audio_path)
+  actual_audio = tf.expand_dims(path_of_actual_audio, axis=0)
+  # Get the signal FFT
+  ffts = audio_to_fft(actual_audio)
+  # Predict
+  y_pred = model.predict(ffts)
+  y_pred = np.argmax(y_pred, axis=-1)
+  # print(y_pred)
+  return classes_names[y_pred[0]], actual_audio_path
+# the app takes one AUDIO to be recognised
+input = [gr.inputs.Audio(source="upload", type="filepath", label="Take audio sample"), gr.inputs.Textbox(label="Actual Speaker")]
+# the app outputs two segmented images
+output = [gr.outputs.Textbox(label="Predicted Speaker"), gr.outputs.Audio(label="Corresponding Audio")]
+# it's good practice to pass examples, description and a title to guide users
+examples = [['/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Benjamin_Netanyau/260.wav', 'Benjamin_Netanyau'],
+            ['/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Jens_Stoltenberg/611.wav', 'Jens_Stoltenberg'],
+            ['/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Julia_Gillard/65.wav', 'Julia_Gillard'],
+            ['/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Magaret_Tarcher/1083.wav', 'Magaret_Tarcher'],
+            ['/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Nelson_Mandela/605.wav', 'Nelson_Mandela']]
+title = "Speaker Recognition"
+description = "Select the noisy audio samples from examples to check whether the speaker recognised by the model is correct or not even in presence of noise !!!"
+gr.Interface(fn=predict, inputs = input, outputs = output, examples=examples, allow_flagging=False, analytics_enabled=False,
+  title=title, description=description, article="Space By: <u><a href="https://github.com/robotjellyzone"><b>Kavya Bisht</b></a></u> \n Based on <a href="https://keras.io/examples/audio/speaker_recognition_using_cnn/"><b>this notebook</b></a>").launch(enable_queue=True, debug=True)