vumichien commited on
Commit
0446ab5
1 Parent(s): 83a8a3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import from_pretrained_keras
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tensorflow.keras import layers
5
+ import tensorflow_io as tfio
6
+
7
+ import gradio as gr
8
+ import librosa
9
+ import librosa.display
10
+ import matplotlib.pyplot as plt
11
+
12
+ class MelSpec(layers.Layer):
13
+ def __init__(
14
+ self,
15
+ frame_length=1024,
16
+ frame_step=256,
17
+ fft_length=None,
18
+ sampling_rate=22050,
19
+ num_mel_channels=80,
20
+ freq_min=125,
21
+ freq_max=7600,
22
+ **kwargs,
23
+ ):
24
+ super().__init__(**kwargs)
25
+ self.frame_length = frame_length
26
+ self.frame_step = frame_step
27
+ self.fft_length = fft_length
28
+ self.sampling_rate = sampling_rate
29
+ self.num_mel_channels = num_mel_channels
30
+ self.freq_min = freq_min
31
+ self.freq_max = freq_max
32
+ self.mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
33
+ num_mel_bins=self.num_mel_channels,
34
+ num_spectrogram_bins=self.frame_length // 2 + 1,
35
+ sample_rate=self.sampling_rate,
36
+ lower_edge_hertz=self.freq_min,
37
+ upper_edge_hertz=self.freq_max,
38
+ )
39
+
40
+ def call(self, audio):
41
+ stft = tf.signal.stft(
42
+ tf.squeeze(audio, -1),
43
+ self.frame_length,
44
+ self.frame_step,
45
+ self.fft_length,
46
+ pad_end=True,
47
+ )
48
+
49
+ # Taking the magnitude of the STFT output
50
+ magnitude = tf.abs(stft)
51
+
52
+ # Multiplying the Mel-filterbank with the magnitude and scaling it using the db scale
53
+ mel = tf.matmul(tf.square(magnitude), self.mel_filterbank)
54
+ log_mel_spec = tfio.audio.dbscale(mel, top_db=80)
55
+ return log_mel_spec
56
+
57
+
58
+ def get_config(self):
59
+ config = super(MelSpec, self).get_config()
60
+ config.update(
61
+ {
62
+ "frame_length": self.frame_length,
63
+ "frame_step": self.frame_step,
64
+ "fft_length": self.fft_length,
65
+ "sampling_rate": self.sampling_rate,
66
+ "num_mel_channels": self.num_mel_channels,
67
+ "freq_min": self.freq_min,
68
+ "freq_max": self.freq_max,
69
+ }
70
+ )
71
+ return config
72
+
73
+ model = from_pretrained_keras("keras-io/MelGAN-spectrogram-inversion")
74
+
75
+ def inference(audio, model):
76
+ input, sr = librosa.load(audio)
77
+ # input, sr = audio
78
+ x = tf.expand_dims(input, axis=-1)
79
+ mel = MelSpec()(x)
80
+ audio_sample = tf.expand_dims(mel, axis=0)
81
+ pred = model.predict(audio_sample, batch_size=1, verbose=0)
82
+ return input, pred.squeeze(), sr
83
+
84
+ def predict(audio, micro):
85
+
86
+ input = audio if audio is not None else micro
87
+ x, x_pred, sr = inference(audio, model)
88
+ fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(10, 8), dpi=120)
89
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(x)), ref=np.max)
90
+ img = librosa.display.specshow(D, y_axis='linear', x_axis='time',
91
+ sr=sr, ax=ax[0])
92
+ ax[0].set(title='Spectrogram of Original sample audio')
93
+ ax[0].label_outer()
94
+
95
+ D = librosa.amplitude_to_db(np.abs(librosa.stft(x_pred)), ref=np.max)
96
+ img = librosa.display.specshow(D, y_axis='linear', x_axis='time',
97
+ sr=sr, ax=ax[1])
98
+ ax[1].set(title='Spectrogram of synthesis sample audio ')
99
+ ax[1].label_outer()
100
+ return plt.gcf()
101
+
102
+ inputs = [
103
+ gr.Audio(source = "upload", label='Upload audio file', type="filepath"),
104
+ gr.Audio(source = "microphone", label='Record audio from microphone', type="filepath")
105
+ ]
106
+
107
+ examples = [
108
+
109
+ ]
110
+
111
+ gr.Interface(
112
+ fn=predict,
113
+ title="MelGAN-based spectrogram inversion",
114
+ description = "Inversion of audio from mel-spectrograms using the MelGAN architecture and feature matching",
115
+ inputs=inputs,
116
+ examples=examples,
117
+ outputs=gr.Plot(),
118
+ article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/audio/melgan_spectrogram_inversion/\">Darshan Deshpande</a>",
119
+ ).launch(debug=False, enable_queue=True)