ClearVoice-SR

Running on Zero

App Files Files Community

ClearVoice-SR / app.py

alibabasglab

Update app.py

ed2aa07 verified about 1 month ago

raw

history blame

2.67 kB

	import torch
	import soundfile as sf
	import gradio as gr
	import spaces
	from clearvoice import ClearVoice
	import os
	import random

	@spaces.GPU
	def fn_clearvoice_sr(input_wav, apply_se):
	wavname = input_wav.split('/')[-1]
	myClearVoice = ClearVoice(task='speech_super_resolution', model_names=['MossFormer2_SR_48K'])
	fs = 48000
	if apply_se:
	new_wavname = wavname.replace('.wav', str(random.randint(0,1000))+'.wav')
	myClearVoice_se = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
	output_wav_dict = myClearVoice_se(input_path=input_wav, online_write=True, output_path=new_wavname)
	input_wav = new_wavname

	output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
	if isinstance(output_wav_dict, dict):
	key = next(iter(output_wav_dict))
	output_wav = output_wav_dict[key]
	else:
	output_wav = output_wav_dict
	sf.write('enhanced_high_res.wav', output_wav[0,:], fs)
	return 'enhanced_high_res.wav'

	demo = gr.Blocks()

	sr_demo = gr.Interface(
	fn=fn_clearvoice_sr,
	inputs = [
	gr.Audio(label="Input Audio", type="filepath"),
	gr.Checkbox(label="Apply Speech Enhancement", value=True),
	],
	outputs = [
	gr.Audio(label="Output Audio", type="filepath"),
	],
	title = "<a href='https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice' target='_blank'>ClearVoice<a/>: Speech Super Resolution",
	description = ("ClearVoice ([Github Repo](https://github.com/modelscope/ClearerVoice-Studio/tree/main/clearvoice)) is AI-powered and transform low-resolution audio (effective sampling rate ≥ 16 kHz) into crystal-clear, high-resolution audio at 48 kHz. It supports most of audio types. "
	"To try it, simply upload your audio, or click one of the examples. "),
	article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> </p>"
	"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> </p>"),
	examples = [
	["examples/mandarin_speech_16kHz.wav", True],
	["examples/LJSpeech-001-0001-22k.wav", True],
	["examples/LibriTTS_986_129388_24k.wav", True],
	["examples/english_speech_48kHz.wav", True],
	],
	cache_examples = True,
	)

	with demo:
	gr.TabbedInterface([sr_demo], ["Task 4: Speech Super Resolution"])

	demo.launch()