jhtonyKoo's picture
Upload 61 files
2777fde
""" Utility file for loaders """
import numpy as np
import soundfile as sf
import wave
# Function to convert frame level audio into atomic time
def frames_to_time(total_length, sr=44100):
in_time = total_length / sr
hour = int(in_time / 3600)
minute = int((in_time - hour*3600) / 60)
second = int(in_time - hour*3600 - minute*60)
return f"{hour:02d}:{minute:02d}:{second:02d}"
# Function to convert atomic labeled time into frames or seconds
def time_to_frames(input_time, to_frames=True, sr=44100):
hour, minute, second = input_time.split(':')
total_seconds = int(hour)*3600 + int(minute)*60 + int(second)
return total_seconds*sr if to_frames else total_seconds
# Function to convert seconds to atomic labeled time
def sec_to_time(input_time):
return frames_to_time(input_time, sr=1)
# Function to load total trainable raw audio lengths
def get_total_audio_length(audio_paths):
total_length = 0
for cur_audio_path in audio_paths:
cur_wav = wave.open(cur_audio_path, 'r')
total_length += cur_wav.getnframes() # here, length = # of frames
return total_length
# Function to load length of an input wav audio
def load_wav_length(audio_path):
pt_wav = wave.open(audio_path, 'r')
length = pt_wav.getnframes()
return length
# Function to load only selected 16 bit, stereo wav audio segment from an input wav audio
def load_wav_segment(audio_path, start_point=None, duration=None, axis=1, sample_rate=44100):
start_point = 0 if start_point==None else start_point
duration = load_wav_length(audio_path) if duration==None else duration
pt_wav = wave.open(audio_path, 'r')
if pt_wav.getframerate()!=sample_rate:
raise ValueError(f"ValueError: input audio's sample rate should be {sample_rate}")
pt_wav.setpos(start_point)
x = pt_wav.readframes(duration)
if pt_wav.getsampwidth()==2:
x = np.frombuffer(x, dtype=np.int16)
X = x / float(2**15) # needs to be 16 bit format
elif pt_wav.getsampwidth()==4:
x = np.frombuffer(x, dtype=np.int32)
X = x / float(2**31) # needs to be 32 bit format
else:
raise ValueError("ValueError: input audio's bit depth should be 16 or 32-bit")
# exception for stereo channels
if pt_wav.getnchannels()==2:
X_l = np.expand_dims(X[::2], axis=axis)
X_r = np.expand_dims(X[1::2], axis=axis)
X = np.concatenate((X_l, X_r), axis=axis)
return X