Spaces:

genaibook
/

audio_visualizations

Running

File size: 4,030 Bytes

import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import write

audios = [
    ["Book Example", "speaker"],
    ["Swoosh", "swoosh"],
    ["Knocking", "knocking"],
    ["Forest", "forest"],
    ["Evil Laugh", "evil-laugh"],
    ["Morning", "morning"],
    ["Cinematic", "cinematic"],
]



with gr.Blocks() as demo:
    with gr.Tab("Waveforms"):
        gr.Markdown("""## Waveforms
                    
In this section, we'll look into the waveforms of multiple audios.

""")
        for title, path in audios:
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown(f"### {title}")
                with gr.Column(scale=5):
                    waveform = gr.Image(value=f"{path}/waveform.png")
                with gr.Column(scale=5):
                    video = gr.Video(value=f"{path}/waveform_video.mp4")

    with gr.Tab("Understanding Frequencies"):
        gr.Markdown("""## Understanding Frequencies
                    """)
        freq = gr.Slider(0, 300, step=20, value=40, label="Frequency")
        freq2 = gr.Slider(0, 30, step=5, value=0, label="Second Frequency")
        amplitude = gr.Slider(0.05, 1, step=0.05, value=1, label="Amplitude")

        audio = gr.Audio()
        with gr.Row():
            plots = gr.Plot(label="Results")
        with gr.Row():
            button = gr.Button(value="Create")
        
        # https://github.com/gradio-app/gradio/issues/5469
        @gr.on(inputs=[freq, freq2, amplitude], outputs=[audio, plots])
        def plot_sine(freq, freq2, a):
            sr = 44100   # samples per second
            ts = 1.0/sr # sampling interval
            t = np.arange(0, 1, ts) # time vector
            data = a * np.sin(2 * np.pi * freq * t) + a * np.sin(2 * np.pi * freq2 * t)

            # Normalize to [-1, 1]
            data = data / np.max(np.abs(data))

            # Convert to 16-bit integer PCM
            data = (data * 32767).astype(np.int16)
            audio_data = (sr, data)
    
            fig, axes = plt.subplots(nrows=2, ncols=1, sharex=False)
            ax_waveform = axes[0]
            ax_spectrum = axes[1]

            ax_waveform.plot(t, data)
            ax_waveform.set_title(f'Sine wave with frequency {freq} and amplitude {a}')
            ax_waveform.set_xlabel('Time )s)')
            ax_waveform.set_ylabel('Amplitude')
            ax_waveform.set_title("Time domain of the signal")

            X = np.fft.fft(data)
            N = len(X)
            n = np.arange(N)
            T = N/sr
            freq = n/T
            ax_spectrum.set_xlim((0,300))
            ax_spectrum.stem(freq, np.abs(X), 'r', \
                markerfmt=" ", basefmt="-b")
            ax_spectrum.set_xlabel("Frequency (Hz)")
            ax_spectrum.set_title("Frequency domain of the signal")

            fig.tight_layout()
            fig.savefig('foo.png')
            return audio_data, fig
        button.click(plot_sine, inputs=[freq, freq2, amplitude], outputs=[audio, plots])
    with gr.Tab("Spectrograms and Mel Spectrograms"):
        gr.Markdown("""## Waveforms
                    
In this section, we'll look into the waveforms of multiple audios.

""")
        for title, path in audios:
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown(f"### {title}")
                with gr.Column(scale=10):
                    gr.Image(value=f"{path}/waveform.png")
                with gr.Column(scale=10):
                    gr.Image(value=f"{path}/fft.png")
                with gr.Column(scale=10):
                    video = gr.Video(value=f"{path}/waveform_video.mp4")
            with gr.Row():
                with gr.Column(scale=5):
                    gr.Image(value=f"{path}/spectrogram.png")
                with gr.Column(scale=5):
                    gr.Image(value=f"{path}/mel_spectrogram.png")
                
  
if __name__ == '__main__':
    demo.launch(debug=True)