"""UltraSinger uses AI to automatically create UltraStar song files""" import copy import getopt import os import sys import re import Levenshtein import librosa from tqdm import tqdm from packaging import version import soundfile as sf from modules import os_helper from modules.Audio.denoise import ffmpeg_reduce_noise from modules.Audio.separation import separate_audio from modules.Audio.vocal_chunks import ( export_chunks_from_transcribed_data, export_chunks_from_ultrastar_data, ) from modules.Audio.silence_processing import remove_silence_from_transcription_data, get_silence_sections from modules.csv_handler import export_transcribed_data_to_csv from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_wav_to_mp3 from modules.Audio.youtube import ( download_youtube_audio, download_youtube_thumbnail, download_youtube_video, get_youtube_title, ) from modules.DeviceDetection.device_detection import check_gpu_support from modules.console_colors import ( ULTRASINGER_HEAD, blue_highlighted, gold_highlighted, light_blue_highlighted, red_highlighted, ) from modules.Midi import midi_creator from modules.Midi.midi_creator import ( convert_frequencies_to_notes, create_midi_notes_from_pitched_data, most_frequent, ) from modules.Pitcher.pitcher import ( get_frequencies_with_high_confidence, get_pitch_with_crepe_file, ) from modules.Pitcher.pitched_data import PitchedData from modules.Speech_Recognition.hyphenation import hyphenation, language_check, create_hyphenator from modules.Speech_Recognition.Whisper import transcribe_with_whisper from modules.Ultrastar import ultrastar_score_calculator, ultrastar_writer, ultrastar_converter, ultrastar_parser from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue from Settings import Settings from modules.Speech_Recognition.TranscribedData import TranscribedData from modules.plot import plot, plot_spectrogram from modules.musicbrainz_client import get_music_infos settings = Settings() def convert_midi_notes_to_ultrastar_notes(midi_notes: list[str]) -> list[int]: """Convert midi notes to ultrastar notes""" print(f"{ULTRASINGER_HEAD} Creating Ultrastar notes from midi data") ultrastar_note_numbers = [] for i in enumerate(midi_notes): pos = i[0] note_number_librosa = librosa.note_to_midi(midi_notes[pos]) pitch = ultrastar_converter.midi_note_to_ultrastar_note( note_number_librosa ) ultrastar_note_numbers.append(pitch) # todo: Progress? # print( # f"Note: {midi_notes[i]} midi_note: {str(note_number_librosa)} pitch: {str(pitch)}" # ) return ultrastar_note_numbers def pitch_each_chunk_with_crepe(directory: str) -> list[str]: """Pitch each chunk with crepe and return midi notes""" print( f"{ULTRASINGER_HEAD} Pitching each chunk with {blue_highlighted('crepe')}" ) midi_notes = [] for filename in sorted( [f for f in os.listdir(directory) if f.endswith(".wav")], key=lambda x: int(x.split("_")[1]), ): filepath = os.path.join(directory, filename) # todo: stepsize = duration? then when shorter than "it" it should take the duration. Otherwise there a more notes pitched_data = get_pitch_with_crepe_file( filepath, settings.crepe_model_capacity, settings.crepe_step_size, settings.tensorflow_device, ) conf_f = get_frequencies_with_high_confidence( pitched_data.frequencies, pitched_data.confidence ) notes = convert_frequencies_to_notes(conf_f) note = most_frequent(notes)[0][0] midi_notes.append(note) # todo: Progress? # print(filename + " f: " + str(mean)) return midi_notes def add_hyphen_to_data(transcribed_data: list[TranscribedData], hyphen_words: list[list[str]]): """Add hyphen to transcribed data return new data list""" new_data = [] for i, data in enumerate(transcribed_data): if not hyphen_words[i]: new_data.append(data) else: chunk_duration = data.end - data.start chunk_duration = chunk_duration / (len(hyphen_words[i])) next_start = data.start for j in enumerate(hyphen_words[i]): hyphenated_word_index = j[0] dup = copy.copy(data) dup.start = next_start next_start = data.end - chunk_duration * ( len(hyphen_words[i]) - 1 - hyphenated_word_index ) dup.end = next_start dup.word = hyphen_words[i][hyphenated_word_index] dup.is_hyphen = True if hyphenated_word_index == len(hyphen_words[i]) - 1: dup.is_word_end = True else: dup.is_word_end = False new_data.append(dup) return new_data def get_bpm_from_data(data, sampling_rate): """Get real bpm from audio data""" onset_env = librosa.onset.onset_strength(y=data, sr=sampling_rate) wav_tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sampling_rate) print( f"{ULTRASINGER_HEAD} BPM is {blue_highlighted(str(round(wav_tempo[0], 2)))}" ) return wav_tempo[0] def get_bpm_from_file(wav_file: str) -> float: """Get real bpm from audio file""" data, sampling_rate = librosa.load(wav_file, sr=None) return get_bpm_from_data(data, sampling_rate) def correct_words(recognized_words, word_list_file): """Docstring""" with open(word_list_file, "r", encoding="utf-8") as file: text = file.read() word_list = text.split() for i, rec_word in enumerate(recognized_words): if rec_word.word in word_list: continue closest_word = min( word_list, key=lambda x: Levenshtein.distance(rec_word.word, x) ) print(recognized_words[i].word + " - " + closest_word) recognized_words[i].word = closest_word return recognized_words def print_help() -> None: """Print help text""" help_string = """ UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra] [opt] -h This help text. -i Ultrastar.txt audio like .mp3, .wav, youtube link -o Output folder [mode] ## INPUT is audio ## default Creates all # Single file creation selection is in progress, you currently getting all! (-u Create ultrastar txt file) # In Progress (-m Create midi file) # In Progress (-s Create sheet file) # In Progress ## INPUT is ultrastar.txt ## default Creates all # Single selection is in progress, you currently getting all! (-r repitch Ultrastar.txt (input has to be audio)) # In Progress (-p Check pitch of Ultrastar.txt input) # In Progress (-m Create midi file) # In Progress [transcription] # Default is whisper --whisper Multilingual model > tiny|base|small|medium|large-v1|large-v2 >> ((default) is large-v2 English-only model > tiny.en|base.en|small.en|medium.en --whisper_align_model Use other languages model for Whisper provided from huggingface.co --language Override the language detected by whisper, does not affect transcription but steps after transcription --whisper_batch_size Reduce if low on GPU mem >> ((default) is 16) --whisper_compute_type Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu) [pitcher] # Default is crepe --crepe tiny|full >> ((default) is full) --crepe_step_size unit is miliseconds >> ((default) is 10) [extra] --hyphenation True|False >> ((default) is True) --disable_separation True|False >> ((default) is False) --disable_karaoke True|False >> ((default) is False) --create_audio_chunks True|False >> ((default) is False) --keep_cache True|False >> ((default) is False) --plot True|False >> ((default) is False) --format_version 0.3.0|1.0.0|1.1.0 >> ((default) is 1.0.0) [device] --force_cpu True|False >> ((default) is False) All steps will be forced to cpu --force_whisper_cpu True|False >> ((default) is False) Only whisper will be forced to cpu --force_crepe_cpu True|False >> ((default) is False) Only crepe will be forced to cpu """ print(help_string) def remove_unecessary_punctuations(transcribed_data: list[TranscribedData]) -> None: """Remove unecessary punctuations from transcribed data""" punctuation = ".," for i, data in enumerate(transcribed_data): data.word = data.word.translate( {ord(i): None for i in punctuation} ) def hyphenate_each_word(language: str, transcribed_data: list[TranscribedData]) -> list[list[str]] | None: """Hyphenate each word in the transcribed data.""" lang_region = language_check(language) if lang_region is None: print( f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}" ) return None hyphenated_word = [] try: hyphenator = create_hyphenator(lang_region) for i in tqdm(enumerate(transcribed_data)): pos = i[0] hyphenated_word.append( hyphenation(transcribed_data[pos].word, hyphenator) ) except: print(f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}") return None return hyphenated_word def print_support() -> None: """Print support text""" print() print( f"{ULTRASINGER_HEAD} {gold_highlighted('Do you like UltraSinger? Want it to be even better? Then help with your')} {light_blue_highlighted('support')}{gold_highlighted('!')}" ) print( f"{ULTRASINGER_HEAD} See project page -> https://github.com/rakuri255/UltraSinger" ) print( f"{ULTRASINGER_HEAD} {gold_highlighted('This will help a lot to keep this project alive and improved.')}" ) def print_version() -> None: """Print version text""" print() print( f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}" ) print( f"{ULTRASINGER_HEAD} {gold_highlighted('UltraSinger Version:')} {light_blue_highlighted(settings.APP_VERSION)}" ) print( f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}" ) def run() -> None: """The processing function of this program""" is_audio = ".txt" not in settings.input_file_path ultrastar_class = None real_bpm = None (title, artist, year, genre) = (None, None, None, None) if not is_audio: # Parse Ultrastar txt print( f"{ULTRASINGER_HEAD} {gold_highlighted('re-pitch mode')}" ) ( basename_without_ext, real_bpm, song_output, ultrastar_audio_input_path, ultrastar_class, ) = parse_ultrastar_txt() elif settings.input_file_path.startswith("https:"): # Youtube print( f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}" ) ( basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year, genre) ) = download_from_youtube() else: # Audio File print( f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}" ) ( basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year, genre) ) = infos_from_audio_input_file() cache_path = os.path.join(song_output, "cache") settings.processing_audio_path = os.path.join( cache_path, basename_without_ext + ".wav" ) os_helper.create_folder(cache_path) # Separate vocal from audio audio_separation_path = separate_vocal_from_audio( basename_without_ext, cache_path, ultrastar_audio_input_path ) vocals_path = os.path.join(audio_separation_path, "vocals.wav") instrumental_path = os.path.join(audio_separation_path, "no_vocals.wav") # Move instrumental and vocals if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"): karaoke_output_path = os.path.join(song_output, basename_without_ext + " [Karaoke].mp3") convert_wav_to_mp3(instrumental_path, karaoke_output_path) if version.parse(settings.format_version) >= version.parse("1.1.0"): instrumental_output_path = os.path.join(song_output, basename_without_ext + " [Instrumental].mp3") convert_wav_to_mp3(instrumental_path, instrumental_output_path) vocals_output_path = os.path.join(song_output, basename_without_ext + " [Vocals].mp3") convert_wav_to_mp3(vocals_path, vocals_output_path) if settings.use_separated_vocal: input_path = vocals_path else: input_path = ultrastar_audio_input_path # Denoise vocal audio denoised_output_path = os.path.join( cache_path, basename_without_ext + "_denoised.wav" ) denoise_vocal_audio(input_path, denoised_output_path) # Convert to mono audio mono_output_path = os.path.join( cache_path, basename_without_ext + "_mono.wav" ) convert_audio_to_mono_wav(denoised_output_path, mono_output_path) # Mute silence sections mute_output_path = os.path.join( cache_path, basename_without_ext + "_mute.wav" ) mute_no_singing_parts(mono_output_path, mute_output_path) # Define the audio file to process settings.processing_audio_path = mute_output_path # Audio transcription transcribed_data = None language = settings.language if is_audio: detected_language, transcribed_data = transcribe_audio() if language is None: language = detected_language remove_unecessary_punctuations(transcribed_data) if settings.hyphenation: hyphen_words = hyphenate_each_word(language, transcribed_data) if hyphen_words is not None: transcribed_data = add_hyphen_to_data(transcribed_data, hyphen_words) transcribed_data = remove_silence_from_transcription_data( settings.processing_audio_path, transcribed_data ) # todo: do we need to correct words? # lyric = 'input/faber_lyric.txt' # --corrected_words = correct_words(vosk_speech, lyric) # Create audio chunks if settings.create_audio_chunks: create_audio_chunks( cache_path, is_audio, transcribed_data, ultrastar_audio_input_path, ultrastar_class, ) # Pitch the audio midi_notes, pitched_data, ultrastar_note_numbers = pitch_audio( is_audio, transcribed_data, ultrastar_class ) # Create plot if settings.create_plot: vocals_path = os.path.join(audio_separation_path, "vocals.wav") plot_spectrogram(vocals_path, song_output, "vocals.wav") plot_spectrogram(settings.processing_audio_path, song_output, "processing audio") plot(pitched_data, song_output, transcribed_data, ultrastar_class, midi_notes) # Write Ultrastar txt if is_audio: real_bpm, ultrastar_file_output = create_ultrastar_txt_from_automation( basename_without_ext, song_output, transcribed_data, ultrastar_audio_input_path, ultrastar_note_numbers, language, title, artist, year, genre ) else: ultrastar_file_output = create_ultrastar_txt_from_ultrastar_data( song_output, ultrastar_class, ultrastar_note_numbers ) # Calc Points ultrastar_class, simple_score, accurate_score = calculate_score_points( is_audio, pitched_data, ultrastar_class, ultrastar_file_output ) # Add calculated score to Ultrastar txt #Todo: Missing Karaoke ultrastar_writer.add_score_to_ultrastar_txt( ultrastar_file_output, simple_score ) # Midi if settings.create_midi: create_midi_file(real_bpm, song_output, ultrastar_class, basename_without_ext) # Cleanup if not settings.keep_cache: remove_cache_folder(cache_path) # Print Support print_support() def mute_no_singing_parts(mono_output_path, mute_output_path): print( f"{ULTRASINGER_HEAD} Mute audio parts with no singing" ) silence_sections = get_silence_sections(mono_output_path) y, sr = librosa.load(mono_output_path, sr=None) # Mute the parts of the audio with no singing for i in silence_sections: # Define the time range to mute start_time = i[0] # Start time in seconds end_time = i[1] # End time in seconds # Convert time to sample indices start_sample = int(start_time * sr) end_sample = int(end_time * sr) y[start_sample:end_sample] = 0 sf.write(mute_output_path, y, sr) def get_unused_song_output_dir(path: str) -> str: """Get an unused song output dir""" # check if dir exists and add (i) if it does i = 1 if os_helper.check_if_folder_exists(path): path = f"{path} ({i})" else: return path while os_helper.check_if_folder_exists(path): path = path.replace(f"({i - 1})", f"({i})") i += 1 if i > 999: print( f"{ULTRASINGER_HEAD} {red_highlighted('Error: Could not create output folder! (999) is the maximum number of tries.')}" ) sys.exit(1) return path def transcribe_audio() -> (str, list[TranscribedData]): """Transcribe audio with AI""" if settings.transcriber == "whisper": device = "cpu" if settings.force_whisper_cpu else settings.pytorch_device transcribed_data, detected_language = transcribe_with_whisper( settings.processing_audio_path, settings.whisper_model, device, settings.whisper_align_model, settings.whisper_batch_size, settings.whisper_compute_type, settings.language, ) else: raise NotImplementedError return detected_language, transcribed_data def separate_vocal_from_audio( basename_without_ext: str, cache_path: str, ultrastar_audio_input_path: str ) -> str: """Separate vocal from audio""" audio_separation_path = os.path.join( cache_path, "separated", "htdemucs", basename_without_ext ) if settings.use_separated_vocal or settings.create_karaoke: separate_audio(ultrastar_audio_input_path, cache_path, settings.pytorch_device) return audio_separation_path def calculate_score_points( is_audio: bool, pitched_data: PitchedData, ultrastar_class: UltrastarTxtValue, ultrastar_file_output: str ): """Calculate score points""" if is_audio: ultrastar_class = ultrastar_parser.parse_ultrastar_txt( ultrastar_file_output ) ( simple_score, accurate_score, ) = ultrastar_score_calculator.calculate_score( pitched_data, ultrastar_class ) ultrastar_score_calculator.print_score_calculation( simple_score, accurate_score ) else: print( f"{ULTRASINGER_HEAD} {blue_highlighted('Score of original Ultrastar txt')}" ) ( simple_score, accurate_score, ) = ultrastar_score_calculator.calculate_score( pitched_data, ultrastar_class ) ultrastar_score_calculator.print_score_calculation( simple_score, accurate_score ) print( f"{ULTRASINGER_HEAD} {blue_highlighted('Score of re-pitched Ultrastar txt')}" ) ultrastar_class = ultrastar_parser.parse_ultrastar_txt( ultrastar_file_output ) ( simple_score, accurate_score, ) = ultrastar_score_calculator.calculate_score( pitched_data, ultrastar_class ) ultrastar_score_calculator.print_score_calculation( simple_score, accurate_score ) return ultrastar_class, simple_score, accurate_score def create_ultrastar_txt_from_ultrastar_data( song_output: str, ultrastar_class: UltrastarTxtValue, ultrastar_note_numbers: list[int] ) -> str: """Create Ultrastar txt from Ultrastar data""" output_repitched_ultrastar = os.path.join( song_output, ultrastar_class.title + ".txt" ) ultrastar_writer.create_repitched_txt_from_ultrastar_data( settings.input_file_path, ultrastar_note_numbers, output_repitched_ultrastar, ) return output_repitched_ultrastar def create_ultrastar_txt_from_automation( basename_without_ext: str, song_output: str, transcribed_data: list[TranscribedData], ultrastar_audio_input_path: str, ultrastar_note_numbers: list[int], language: str, title: str, artist: str, year: str, genre: str ): """Create Ultrastar txt from automation""" ultrastar_header = UltrastarTxtValue() ultrastar_header.version = settings.format_version ultrastar_header.title = basename_without_ext ultrastar_header.artist = basename_without_ext ultrastar_header.mp3 = basename_without_ext + ".mp3" ultrastar_header.audio = basename_without_ext + ".mp3" ultrastar_header.vocals = basename_without_ext + " [Vocals].mp3" ultrastar_header.instrumental = basename_without_ext + " [Instrumental].mp3" ultrastar_header.video = basename_without_ext + ".mp4" ultrastar_header.language = language cover = basename_without_ext + " [CO].jpg" ultrastar_header.cover = ( cover if os_helper.check_file_exists(os.path.join(song_output, cover)) else None ) ultrastar_header.creator = f"{ultrastar_header.creator} {Settings.APP_VERSION}" ultrastar_header.comment = f"{ultrastar_header.comment} {Settings.APP_VERSION}" # Additional data if title is not None: ultrastar_header.title = title if artist is not None: ultrastar_header.artist = artist if year is not None: ultrastar_header.year = extract_year(year) if genre is not None: ultrastar_header.genre = format_separated_string(genre) real_bpm = get_bpm_from_file(ultrastar_audio_input_path) ultrastar_file_output = os.path.join( song_output, basename_without_ext + ".txt" ) ultrastar_writer.create_ultrastar_txt_from_automation( transcribed_data, ultrastar_note_numbers, ultrastar_file_output, ultrastar_header, real_bpm, ) if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"): title = basename_without_ext + " [Karaoke]" ultrastar_header.title = title ultrastar_header.mp3 = title + ".mp3" karaoke_output_path = os.path.join(song_output, title) karaoke_txt_output_path = karaoke_output_path + ".txt" ultrastar_writer.create_ultrastar_txt_from_automation( transcribed_data, ultrastar_note_numbers, karaoke_txt_output_path, ultrastar_header, real_bpm, ) return real_bpm, ultrastar_file_output def extract_year(date: str) -> str: match = re.search(r'\b\d{4}\b', date) if match: return match.group(0) else: return date def format_separated_string(data: str) -> str: temp = re.sub(r'[;/]', ',', data) words = temp.split(',') words = [s for s in words if s.strip()] for i, word in enumerate(words): if "-" not in word: words[i] = word.strip().capitalize() + ', ' else: dash_words = word.split('-') capitalized_dash_words = [dash_word.strip().capitalize() for dash_word in dash_words] formatted_dash_word = '-'.join(capitalized_dash_words) + ', ' words[i] = formatted_dash_word formatted_string = ''.join(words) if formatted_string.endswith(', '): formatted_string = formatted_string[:-2] return formatted_string def infos_from_audio_input_file() -> tuple[str, str, str, tuple[str, str, str, str]]: """Infos from audio input file""" basename = os.path.basename(settings.input_file_path) basename_without_ext = os.path.splitext(basename)[0] artist, title = None, None if " - " in basename_without_ext: artist, title = basename_without_ext.split(" - ", 1) search_string = f"{artist} - {title}" else: search_string = basename_without_ext # Get additional data for song (title_info, artist_info, year_info, genre_info) = get_music_infos(search_string) if title_info is not None: title = title_info artist = artist_info if artist is not None and title is not None: basename_without_ext = f"{artist} - {title}" extension = os.path.splitext(basename)[1] basename = f"{basename_without_ext}{extension}" song_output = os.path.join(settings.output_file_path, basename_without_ext) song_output = get_unused_song_output_dir(song_output) os_helper.create_folder(song_output) os_helper.copy(settings.input_file_path, song_output) os_helper.rename(os.path.join(song_output, os.path.basename(settings.input_file_path)), os.path.join(song_output, basename)) ultrastar_audio_input_path = os.path.join(song_output, basename) return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info) FILENAME_REPLACEMENTS = (('?:"', ""), ("<", "("), (">", ")"), ("/\\|*", "-")) def sanitize_filename(fname: str) -> str: """Sanitize filename""" for old, new in FILENAME_REPLACEMENTS: for char in old: fname = fname.replace(char, new) if fname.endswith("."): fname = fname.rstrip(" .") # Windows does not like trailing periods return fname def download_from_youtube() -> tuple[str, str, str, tuple[str, str, str, str]]: """Download from YouTube""" (artist, title) = get_youtube_title(settings.input_file_path) # Get additional data for song (title_info, artist_info, year_info, genre_info) = get_music_infos(f"{artist} - {title}") if title_info is not None: title = title_info artist = artist_info basename_without_ext = sanitize_filename(f"{artist} - {title}") basename = basename_without_ext + ".mp3" song_output = os.path.join(settings.output_file_path, basename_without_ext) song_output = get_unused_song_output_dir(song_output) os_helper.create_folder(song_output) download_youtube_audio( settings.input_file_path, basename_without_ext, song_output ) download_youtube_video( settings.input_file_path, basename_without_ext, song_output ) download_youtube_thumbnail( settings.input_file_path, basename_without_ext, song_output ) ultrastar_audio_input_path = os.path.join(song_output, basename) return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info) def parse_ultrastar_txt() -> tuple[str, float, str, str, UltrastarTxtValue]: """Parse Ultrastar txt""" ultrastar_class = ultrastar_parser.parse_ultrastar_txt( settings.input_file_path ) real_bpm = ultrastar_converter.ultrastar_bpm_to_real_bpm( float(ultrastar_class.bpm.replace(",", ".")) ) ultrastar_mp3_name = ultrastar_class.mp3 basename_without_ext = os.path.splitext(ultrastar_mp3_name)[0] dirname = os.path.dirname(settings.input_file_path) ultrastar_audio_input_path = os.path.join(dirname, ultrastar_mp3_name) song_output = os.path.join( settings.output_file_path, ultrastar_class.artist.strip() + " - " + ultrastar_class.title.strip(), ) song_output = get_unused_song_output_dir(str(song_output)) os_helper.create_folder(song_output) return ( str(basename_without_ext), real_bpm, song_output, str(ultrastar_audio_input_path), ultrastar_class, ) def create_midi_file(real_bpm: float, song_output: str, ultrastar_class: UltrastarTxtValue, basename_without_ext: str) -> None: """Create midi file""" print( f"{ULTRASINGER_HEAD} Creating Midi with {blue_highlighted('pretty_midi')}" ) voice_instrument = [ midi_creator.convert_ultrastar_to_midi_instrument(ultrastar_class) ] midi_output = os.path.join(song_output, f"{basename_without_ext}.mid") midi_creator.instruments_to_midi( voice_instrument, real_bpm, midi_output ) def pitch_audio(is_audio: bool, transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue) -> tuple[ list[str], PitchedData, list[int]]: """Pitch audio""" # todo: chunk pitching as option? # midi_notes = pitch_each_chunk_with_crepe(chunk_folder_name) device = "cpu" if settings.force_crepe_cpu else settings.tensorflow_device pitched_data = get_pitch_with_crepe_file( settings.processing_audio_path, settings.crepe_model_capacity, settings.crepe_step_size, device, ) if is_audio: start_times = [] end_times = [] for i, data in enumerate(transcribed_data): start_times.append(data.start) end_times.append(data.end) midi_notes = create_midi_notes_from_pitched_data( start_times, end_times, pitched_data ) else: midi_notes = create_midi_notes_from_pitched_data( ultrastar_class.startTimes, ultrastar_class.endTimes, pitched_data ) ultrastar_note_numbers = convert_midi_notes_to_ultrastar_notes(midi_notes) return midi_notes, pitched_data, ultrastar_note_numbers def create_audio_chunks( cache_path: str, is_audio: bool, transcribed_data: list[TranscribedData], ultrastar_audio_input_path: str, ultrastar_class: UltrastarTxtValue ) -> None: """Create audio chunks""" audio_chunks_path = os.path.join( cache_path, settings.audio_chunk_folder_name ) os_helper.create_folder(audio_chunks_path) if is_audio: # and csv csv_filename = os.path.join(audio_chunks_path, "_chunks.csv") export_chunks_from_transcribed_data( settings.processing_audio_path, transcribed_data, audio_chunks_path ) export_transcribed_data_to_csv(transcribed_data, csv_filename) else: export_chunks_from_ultrastar_data( ultrastar_audio_input_path, ultrastar_class, audio_chunks_path ) def denoise_vocal_audio(input_path: str, output_path: str) -> None: """Denoise vocal audio""" ffmpeg_reduce_noise(input_path, output_path) def main(argv: list[str]) -> None: """Main function""" print_version() init_settings(argv) run() sys.exit() def remove_cache_folder(cache_path: str) -> None: """Remove cache folder""" os_helper.remove_folder(cache_path) def init_settings(argv: list[str]) -> None: """Init settings""" long, short = arg_options() opts, args = getopt.getopt(argv, short, long) if len(opts) == 0: print_help() sys.exit() for opt, arg in opts: if opt == "-h": print_help() sys.exit() elif opt in ("-i", "--ifile"): settings.input_file_path = arg elif opt in ("-o", "--ofile"): settings.output_file_path = arg elif opt in ("--whisper"): settings.transcriber = "whisper" settings.whisper_model = arg elif opt in ("--whisper_align_model"): settings.whisper_align_model = arg elif opt in ("--whisper_batch_size"): settings.whisper_batch_size = int(arg) elif opt in ("--whisper_compute_type"): settings.whisper_compute_type = arg elif opt in ("--language"): settings.language = arg elif opt in ("--crepe"): settings.crepe_model_capacity = arg elif opt in ("--crepe_step_size"): settings.crepe_step_size = int(arg) elif opt in ("--plot"): settings.create_plot = arg in ["True", "true"] elif opt in ("--midi"): settings.create_midi = arg in ["True", "true"] elif opt in ("--hyphenation"): settings.hyphenation = eval(arg.title()) elif opt in ("--disable_separation"): settings.use_separated_vocal = not arg elif opt in ("--disable_karaoke"): settings.create_karaoke = not arg elif opt in ("--create_audio_chunks"): settings.create_audio_chunks = arg elif opt in ("--force_cpu"): settings.force_cpu = arg if settings.force_cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" elif opt in ("--force_whisper_cpu"): settings.force_whisper_cpu = eval(arg.title()) elif opt in ("--force_crepe_cpu"): settings.force_crepe_cpu = eval(arg.title()) elif opt in ("--format_version"): if arg != '0.3.0' and arg != '1.0.0' and arg != '1.1.0': print( f"{ULTRASINGER_HEAD} {red_highlighted('Error: Format version')} {blue_highlighted(arg)} {red_highlighted('is not supported.')}" ) sys.exit(1) settings.format_version = arg elif opt in ("--keep_cache"): settings.keep_cache = arg if settings.output_file_path == "": if settings.input_file_path.startswith("https:"): dirname = os.getcwd() else: dirname = os.path.dirname(settings.input_file_path) settings.output_file_path = os.path.join(dirname, "output") if not settings.force_cpu: settings.tensorflow_device, settings.pytorch_device = check_gpu_support() def arg_options(): short = "hi:o:amv:" long = [ "ifile=", "ofile=", "crepe=", "crepe_step_size=", "whisper=", "whisper_align_model=", "whisper_batch_size=", "whisper_compute_type=", "language=", "plot=", "midi=", "hyphenation=", "disable_separation=", "disable_karaoke=", "create_audio_chunks=", "force_cpu=", "force_whisper_cpu=", "force_crepe_cpu=", "format_version=", "keep_cache" ] return long, short if __name__ == "__main__": main(sys.argv[1:])