Spaces:
Runtime error
Runtime error
import argparse | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from multiprocessing import cpu_count | |
from pathlib import Path | |
from typing import Any | |
import librosa | |
import pyloudnorm as pyln | |
import soundfile | |
from numpy.typing import NDArray | |
from tqdm import tqdm | |
from config import config | |
from style_bert_vits2.logging import logger | |
from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT | |
DEFAULT_BLOCK_SIZE: float = 0.400 # seconds | |
class BlockSizeException(Exception): | |
pass | |
def normalize_audio(data: NDArray[Any], sr: int): | |
meter = pyln.Meter(sr, block_size=DEFAULT_BLOCK_SIZE) # create BS.1770 meter | |
try: | |
loudness = meter.integrated_loudness(data) | |
except ValueError as e: | |
raise BlockSizeException(e) | |
data = pyln.normalize.loudness(data, loudness, -23.0) | |
return data | |
def resample( | |
file: Path, | |
input_dir: Path, | |
output_dir: Path, | |
target_sr: int, | |
normalize: bool, | |
trim: bool, | |
): | |
""" | |
fileを読み込んで、target_srなwavファイルに変換して、 | |
output_dirの中に、input_dirからの相対パスを保つように保存する | |
""" | |
try: | |
# librosaが読めるファイルかチェック | |
# wav以外にもmp3やoggやflacなども読める | |
wav: NDArray[Any] | |
sr: int | |
wav, sr = librosa.load(file, sr=target_sr) | |
if normalize: | |
try: | |
wav = normalize_audio(wav, sr) | |
except BlockSizeException: | |
print("") | |
logger.info( | |
f"Skip normalize due to less than {DEFAULT_BLOCK_SIZE} second audio: {file}" | |
) | |
if trim: | |
wav, _ = librosa.effects.trim(wav, top_db=30) | |
relative_path = file.relative_to(input_dir) | |
output_path = output_dir / relative_path.with_suffix(".wav") | |
output_path.parent.mkdir(parents=True, exist_ok=True) | |
soundfile.write(output_path, wav, sr) | |
except Exception as e: | |
logger.warning(f"Cannot load file, so skipping: {file}, {e}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--sr", | |
type=int, | |
default=config.resample_config.sampling_rate, | |
help="sampling rate", | |
) | |
parser.add_argument( | |
"--input_dir", | |
"-i", | |
type=str, | |
default=config.resample_config.in_dir, | |
help="path to source dir", | |
) | |
parser.add_argument( | |
"--output_dir", | |
"-o", | |
type=str, | |
default=config.resample_config.out_dir, | |
help="path to target dir", | |
) | |
parser.add_argument( | |
"--num_processes", | |
type=int, | |
default=4, | |
help="cpu_processes", | |
) | |
parser.add_argument( | |
"--normalize", | |
action="store_true", | |
default=False, | |
help="loudness normalize audio", | |
) | |
parser.add_argument( | |
"--trim", | |
action="store_true", | |
default=False, | |
help="trim silence (start and end only)", | |
) | |
args = parser.parse_args() | |
if args.num_processes == 0: | |
processes = cpu_count() - 2 if cpu_count() > 4 else 1 | |
else: | |
processes: int = args.num_processes | |
input_dir = Path(args.input_dir) | |
output_dir = Path(args.output_dir) | |
logger.info(f"Resampling {input_dir} to {output_dir}") | |
sr = int(args.sr) | |
normalize: bool = args.normalize | |
trim: bool = args.trim | |
# 後でlibrosaに読ませて有効な音声ファイルかチェックするので、全てのファイルを取得 | |
original_files = [f for f in input_dir.rglob("*") if f.is_file()] | |
if len(original_files) == 0: | |
logger.error(f"No files found in {input_dir}") | |
raise ValueError(f"No files found in {input_dir}") | |
output_dir.mkdir(parents=True, exist_ok=True) | |
with ThreadPoolExecutor(max_workers=processes) as executor: | |
futures = [ | |
executor.submit(resample, file, input_dir, output_dir, sr, normalize, trim) | |
for file in original_files | |
] | |
for future in tqdm( | |
as_completed(futures), total=len(original_files), file=SAFE_STDOUT | |
): | |
pass | |
logger.info("Resampling Done!") | |