Spaces:
Build error
Build error
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| from encoder.utils import convert_audio | |
| from decoder.pretrained import WavTokenizer | |
| # Initialize WavTokenizer | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| config_path = "wavtokenizer_smalldata_frame40_3s_nq1_code4096_dim512_kmeans200_attn.yaml" | |
| model_path = "WavTokenizer_small_600_24k_4096.ckpt" | |
| wavtokenizer = WavTokenizer.from_pretrained0802(config_path, model_path) | |
| wavtokenizer = wavtokenizer.to(device) | |
| def encode_audio(audio_file): | |
| # Load and preprocess the audio | |
| wav, sr = torchaudio.load(audio_file) | |
| wav = convert_audio(wav, sr, 24000, 1) | |
| wav = wav.to(device) | |
| # Encode the audio | |
| bandwidth_id = torch.tensor([0]).to(device) | |
| _, discrete_code = wavtokenizer.encode_infer(wav, bandwidth_id=bandwidth_id) | |
| # Convert the discrete code to a string representation | |
| code_str = ' '.join(map(str, discrete_code.cpu().numpy().flatten())) | |
| return code_str | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=encode_audio, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=gr.Textbox(label="Discrete Codes"), | |
| title="WavTokenizer Encoder Demo", | |
| description="Upload an audio file to see its WavTokenizer discrete codes. The output shows 40 tokens per second of audio." | |
| ) | |
| # Launch the demo | |
| iface.launch() |