rvc_api / modules /separate.py
aryo100's picture
first commit
b5a064f
import os
from typing import *
import tqdm
from pydub import AudioSegment
from pydub.silence import split_on_silence
def separate_audio(
input: str,
output: str,
silence_thresh: int,
min_silence_len: int = 1000,
keep_silence: int = 100,
margin: int = 0,
padding: bool = False,
min: Optional[int] = None,
max: Optional[int] = None,
):
if os.path.isfile(input):
input = [input]
elif os.path.isdir(input):
input = [os.path.join(input, f) for f in os.listdir(input)]
else:
raise ValueError("input must be a file or directory")
os.makedirs(output, exist_ok=True)
for file in input:
if os.path.splitext(file)[1] == ".mp3":
audio = AudioSegment.from_mp3(file)
elif os.path.splitext(file)[1] == ".wav":
audio = AudioSegment.from_wav(file)
elif os.path.splitext(file)[1] == ".flac":
audio = AudioSegment.from_file(file, "flac")
else:
raise ValueError(
"Invalid file format. Only MP3 and WAV files are supported."
)
chunks = split_on_silence(
audio,
min_silence_len=min_silence_len,
silence_thresh=silence_thresh,
keep_silence=keep_silence,
)
output_chunks: List[AudioSegment] = []
so_short = None
for chunk in tqdm.tqdm(chunks):
if so_short is not None:
chunk = so_short + chunk
so_short = None
if min is None or len(chunk) > min:
if max is not None and len(chunk) > max:
sub_chunks = [
chunk[i : i + max + margin]
for i in range(0, len(chunk) - margin, max)
]
if len(sub_chunks[-1]) < min:
if padding and len(sub_chunks) > 2:
output_chunks.extend(sub_chunks[0:-2])
output_chunks.append(sub_chunks[-2] + sub_chunks[-1])
else:
output_chunks.extend(sub_chunks[0:-1])
else:
output_chunks.extend(sub_chunks)
else:
output_chunks.append(chunk)
else:
if so_short is None:
so_short = chunk
else:
so_short += chunk
basename = os.path.splitext(os.path.basename(file))[0]
for i, chunk in enumerate(output_chunks):
filepath = os.path.join(output, f"{basename}_{i}.wav")
chunk.export(filepath, format="wav")