Spaces:
Runtime error
Runtime error
| import torch,torchaudio | |
| import os,sys,json | |
| from tqdm import tqdm | |
| #from codeclm_song_v1.codeclm.semantic_extractor.SpeechDecoder_v01.generate import Tango | |
| from generate_septoken import Tango | |
| import kaldiio | |
| from kaldiio import WriteHelper | |
| from audio import AudioFile | |
| def read_wav(fname, sample_rate=48_000): | |
| try: | |
| orig_samples, fs = torchaudio.load(fname) | |
| except: | |
| af = AudioFile(fname) | |
| orig_samples = af.read() | |
| fs = af.samplerate() | |
| orig_samples = orig_samples[0] | |
| if(fs!=sample_rate): | |
| orig_samples = torchaudio.functional.resample(orig_samples, fs, sample_rate) | |
| fs = sample_rate | |
| if orig_samples.shape[0] == 1: | |
| orig_samples = torch.cat([orig_samples, orig_samples], 0) | |
| return orig_samples | |
| if __name__ == "__main__": | |
| # Define Model | |
| json_path = sys.argv[1] | |
| outdir = sys.argv[2] | |
| mus_infos = [] | |
| with open(json_path) as f: | |
| for line in f: | |
| item = json.loads(line) | |
| mus_infos.append(item) | |
| tango = Tango(model_path="./saved/model_septoken/model_2.safetensors") | |
| # Feature extraction loop | |
| # for i in tqdm(range(2000)): | |
| first_time = True | |
| with WriteHelper('ark,scp:{}/token_vocal.ark,{}/token_vocal.scp'.format(outdir, outdir), write_function="pickle") as writer_vocal, WriteHelper('ark,scp:{}/token_bgm.ark,{}/token_bgm.scp'.format(outdir, outdir), write_function="pickle") as writer_bgm: | |
| print('ark,scp:{}/token_vocal.ark,{}/token_vocal.scp'.format(outdir, outdir)) | |
| print('ark,scp:{}/token_bgm.ark,{}/token_bgm.scp'.format(outdir, outdir)) | |
| for item in tqdm(mus_infos): | |
| try: | |
| # if True: | |
| idx = item['idx'] | |
| # print(idx) | |
| if(os.path.exists(item['path'])): | |
| full_path = item['path'] | |
| else: | |
| full_path = '/mnt/share/' + item['path'] | |
| if(os.path.exists(item['vocal_path'])): | |
| vocal_path = item['vocal_path'] | |
| bgm_paths = item['bgm_path'] | |
| else: | |
| vocal_path = '/mnt/share/' + item['vocal_path'] | |
| bgm_paths = ['/mnt/share/' + p for p in item['bgm_path']] | |
| vocal_tensor = read_wav(vocal_path) | |
| # full_tensor = read_wav(full_path) | |
| # length = min(full_tensor.shape[-1], vocal_tensor.shape[-1]) | |
| # full_tensor, vocal_tensor = full_tensor[:, 0:length], vocal_tensor[:, 0:length] | |
| # bgm_tensor = full_tensor - vocal_tensor | |
| bgm_tensor = sum([read_wav(p) for p in bgm_paths]) | |
| codes_vocal, codes_bgm = tango.sound2code(vocal_tensor, bgm_tensor) | |
| writer_vocal(str(idx), codes_vocal.cpu()) | |
| writer_bgm(str(idx), codes_bgm.cpu()) | |
| if(first_time): | |
| first_time = False | |
| print(codes_vocal.shape, codes_bgm.shape) | |
| except: | |
| print(item['vocal_path']) | |
| print(item['bgm_path']) | |
| continue | |
| # idx = item['idx'] | |
| # # print(idx) | |
| # full_path = item['path'] | |
| # vocal_path = item['vocal_path'] | |
| # bgm_paths = item['bgm_path'] | |
| # full_tensor = read_wav(full_path) | |
| # vocal_tensor = read_wav(vocal_path) | |
| # length = min(full_tensor.shape[-1], vocal_tensor.shape[-1]) | |
| # full_tensor, vocal_tensor = full_tensor[:, 0:length], vocal_tensor[:, 0:length] | |
| # bgm_tensor = full_tensor - vocal_tensor | |
| # codes_vocal, codes_bgm = tango.sound2code(vocal_tensor, bgm_tensor) | |
| # writer_vocal(str(idx), codes_vocal.cpu()) | |
| # writer_bgm(str(idx), codes_bgm.cpu()) | |
| # if(first_time): | |
| # first_time = False | |
| # print(codes_vocal.shape, codes_bgm.shape) | |