import gradio as gr
from speechbrain.pretrained import SepformerSeparation as separator
import torchaudio

# model = separator.from_hparams(source="speechbrain/sepformer-wham-enhancement", savedir='pretrained_models/sepformer-wham-enhancement')
model = separator.from_hparams(source="speechbrain/sepformer-dns4-16k-enhancement", savedir='pretrained_models/sepformer-dns4-16k-enhancement')

def predict_song(audio_path):
    est_sources = model.separate_file(path=audio_path)
    torchaudio.save("enhanced_wham.wav", est_sources[:, :, 0].detach().cpu(), 16000)
    return "enhanced_wham.wav"


# Create title, description and article strings
title = "Denoise Audio Using Sepformer"
description = "Using SepFormer model implemented with SpeechBrain"
article = "Tham khao Hunggingface [speechbrain/sepformer-wsj02mixt](https://huggingface.co/speechbrain/sepformer-wsj02mix)."

# Create the Gradio demo
demo = gr.Interface(fn=predict_song, # mapping function from input to output
                    inputs=gr.Audio(type="filepath"), # what are the inputs?
                    outputs=gr.File(file_count="multiple", file_types=[".wav"]), # our fn has two outputs, therefore we have two outputs
                    title=title,
                    description=description,
                    article=article)

# Launch the demo!
demo.launch()