from pydub import AudioSegment import numpy as np DON_WAV = "./Arcade - Taiko no Tatsujin 2020 Version - Common Sound Effects/Don.wav" KATSU_WAV = "./Arcade - Taiko no Tatsujin 2020 Version - Common Sound Effects/Katsu.wav" BALLOON_BANG_WAV = "./Arcade - Taiko no Tatsujin 2020 Version - Common Sound Effects/Balloon.wav" COURSE = [ { "audio": AudioSegment.empty(), "label": "かんたん/梅花(簡單)/Easy", }, { "audio": AudioSegment.empty(), "label": "ふつう/竹子(普通)/Normal", }, { "audio": AudioSegment.empty(), "label": "むずかしい/樹(困難)/Hard", }, { "audio": AudioSegment.empty(), "label": "おに/魔王/Oni", }, { "audio": AudioSegment.empty(), "label": "裏/Edit", } ] HIT_PER_SEC = 30 def preprocess(data:list, offset:float=0): chart = [] for m in data: if m[0] in {1, 3}: # Don or Big Don chart.append((DON_WAV, offset + m[1])) elif m[0] in {2, 4}: # Katsu or Big Katsu chart.append((KATSU_WAV, offset + m[1])) elif m[0] in {5, 6}: # Drum Roll or Big Drum Roll count = m[1] while count < m[2]: chart.append((DON_WAV, offset + count)) count += (1 / HIT_PER_SEC) elif m[0] == 7: # Balloon count = m[1] balloon_count = 0 while count < m[2] and balloon_count < m[3]: chart.append((DON_WAV, offset + count)) count += (1 / HIT_PER_SEC) balloon_count += 1 if balloon_count >= m[3]: chart.append((BALLOON_BANG_WAV, offset + m[1])) else: raise ValueError("Your chart file has some problems.") return chart def resize_audio(file_path:str, target_duration:int, target_amplitude:int): audio = AudioSegment.from_wav(file_path) audio = audio[:target_duration * 1000] if file_path == DON_WAV or file_path == BALLOON_BANG_WAV: return audio audio = audio - (audio.dBFS - target_amplitude) return audio def generate_taiko_wav(chart: list, music:str=None): max_length = int(max([start_time + len(resize_audio(file_path, target_duration=1, target_amplitude=-20)) for file_path, start_time in chart])) mixed_audio = np.zeros(max_length) for file_path, start_time in chart: audio = resize_audio(file_path, target_duration=0.5, target_amplitude=-20) audio_array = np.array(audio.get_array_of_samples()) start_index = int(start_time * audio.frame_rate) end_index = start_index + len(audio_array) if len(mixed_audio) < end_index: mixed_audio = np.pad(mixed_audio, (0, end_index - len(mixed_audio))) mixed_audio[start_index:end_index] += audio_array mixed_audio = np.clip(mixed_audio, -32768, 32767) mixed_audio_segment = AudioSegment( mixed_audio.astype(np.int16).tobytes(), frame_rate=audio.frame_rate, sample_width=2, channels=1 ) if music is None: return mixed_audio_segment else: background_music = AudioSegment.from_ogg(music) mixed_audio_with_bg = background_music.overlay(mixed_audio_segment) return mixed_audio_with_bg