import random import uuid import numpy import os import random import fnmatch from tqdm.auto import tqdm from scipy.io import wavfile from bark.generation import load_model, SAMPLE_RATE from bark.api import semantic_to_waveform from bark import text_to_semantic from bark.generation import load_model from training.data import load_books, random_split_chunk output = 'training/data/output' output_wav = 'training/data/output_wav' def prepare_semantics_from_text(num_generations): loaded_data = load_books(True) print('Loading semantics model') load_model(use_gpu=True, use_small=False, force_reload=False, model_type='text') if not os.path.isdir(output): os.mkdir(output) loop = 1 while 1: filename = uuid.uuid4().hex + '.npy' file_name = os.path.join(output, filename) text = '' while not len(text) > 0: text = random_split_chunk(loaded_data) # Obtain a short chunk of text text = text.strip() print(f'{loop} Generating semantics for text:', text) loop+=1 semantics = text_to_semantic(text, temp=round(random.uniform(0.6, 0.8), ndigits=2)) numpy.save(file_name, semantics) def prepare_wavs_from_semantics(): if not os.path.isdir(output): raise Exception('No \'output\' folder, make sure you run create_data.py first!') if not os.path.isdir(output_wav): os.mkdir(output_wav) print('Loading coarse model') load_model(use_gpu=True, use_small=False, force_reload=False, model_type='coarse') print('Loading fine model') load_model(use_gpu=True, use_small=False, force_reload=False, model_type='fine') files = fnmatch.filter(os.listdir(output), '*.npy') current = 1 total = len(files) for i, f in tqdm(enumerate(files), total=len(files)): real_name = '.'.join(f.split('.')[:-1]) # Cut off the extension file_name = os.path.join(output, f) out_file = os.path.join(output_wav, f'{real_name}.wav') if not os.path.isfile(out_file) and os.path.isfile(file_name): # Don't process files that have already been processed, to be able to continue previous generations print(f'Processing ({i+1}/{total}) -> {f}') wav = semantic_to_waveform(numpy.load(file_name), temp=round(random.uniform(0.6, 0.8), ndigits=2)) # Change to PCM16 # wav = (wav * 32767).astype(np.int16) wavfile.write(out_file, SAMPLE_RATE, wav) print('Done!')