Spaces:

mimbres
/

YourMT3

Running on Zero

YourMT3 / amt /src /utils /preprocess /preprocess_mir1k.py

a03c9b4 4 months ago

4.11 kB

	"""preprocess_mir1k.py"""
	import os
	import glob
	import re
	import json
	from typing import Dict, List, Tuple
	import numpy as np
	from utils.audio import get_audio_file_info, load_audio_file
	from utils.midi import midi2note, note_event2midi
	from utils.note2event import note2note_event, sort_notes, validate_notes, trim_overlapping_notes
	from utils.event2note import event2note_event
	from utils.note_event_dataclasses import Note, NoteEvent
	from utils.utils import note_event2token2note_event_sanity_check

	# def create_spleeter_audio_stem(vocal_audio_file, accomp_audio_file, mir_st500_id) -> Dict:
	# program = MIR_ST500_PROGRAM
	# is_drum = [0, 0]

	# audio_tracks = [] # multi-channel audio array (C, T)
	# vocal_audio = load_audio_file(vocal_audio_file, dtype=np.int16) / 2**15 # returns bytes
	# audio_tracks.append(vocal_audio.astype(np.float16))
	# accomp_audio = load_audio_file(accomp_audio_file, dtype=np.int16) / 2**15 # returns bytes
	# audio_tracks.append(accomp_audio.astype(np.float16))
	# max_length = max(len(vocal_audio), len(accomp_audio))

	# # collate all the audio tracks into a single array
	# n_tracks = 2
	# audio_array = np.zeros((n_tracks, max_length), dtype=np.float16)
	# for j, audio in enumerate(audio_tracks):
	# audio_array[j, :len(audio)] = audio

	# stem_content = {
	# 'mir_st500_id': mir_st500_id,
	# 'program': np.array(program, dtype=np.int64),
	# 'is_drum': np.array(is_drum, dtype=np.int64),
	# 'n_frames': max_length, # int
	# 'audio_array': audio_array # (n_tracks, n_frames)
	# }
	# return stem_content

	# def create_note_note_event_midi_from_mir1k_annotation(ann, midi_file, mir_st500_id):
	# """
	# Args:
	# ann: List[List[float, float, float]] # [onset, offset, pitch]
	# mir_st500_id: str
	# Returns:
	# notes: List[Note]
	# note_events: List[NoteEvent]
	# midi: List[List[int]]
	# """
	# notes = []
	# for onset, offset, pitch in ann:
	# notes.append(
	# Note(
	# is_drum=False,
	# program=100,
	# onset=float(onset),
	# offset=float(offset),
	# pitch=int(pitch),
	# velocity=1))
	# notes = sort_notes(notes)
	# notes = validate_notes(notes)
	# notes = trim_overlapping_notes(notes)
	# note_events = note2note_event(notes)

	# # Write midi file
	# note_event2midi(note_events, midi_file)
	# print(f"Created {midi_file}")

	# return { # notes
	# 'mir_st500_id': mir_st500_id,
	# 'program': MIR_ST500_PROGRAM,
	# 'is_drum': [0, 0],
	# 'duration_sec': note_events[-1].time,
	# 'notes': notes,
	# }, { # note_events
	# 'mir_st500_id': mir_st500_id,
	# 'program': MIR_ST500_PROGRAM,
	# 'is_drum': [0, 0],
	# 'duration_sec': note_events[-1].time,
	# 'note_events': note_events,
	# }


	def preprocess_mir1k_16k(data_home=os.PathLike, dataset_name='mir1k', sanity_check=False) -> None:
	"""
	Splits:
	- train: index 1 to 400, 346 files (54 files missing)
	- test: index 401 to 500, 94 files (6 files missing)
	- all: 440 files (60 files missing)

	Writes:
	- {dataset_name}_{split}_file_list.json: a dictionary with the following keys:
	{
	index:
	{
	'mir_st500_id': mir_st500_id,
	'n_frames': (int),
	'mix_audio_file': 'path/to/mix.wav',
	'notes_file': 'path/to/notes.npy',
	'note_events_file': 'path/to/note_events.npy',
	'midi_file': 'path/to/midi.mid',
	'program': List[int], # [100, 129], 100 for singing voice, and 129 for unannotated
	'is_drum': List[int], # [0] or [1]
	}
	}
	"""

	# Directory and file paths
	base_dir = os.path.join(data_home, dataset_name + '_yourmt3_16k')
	output_index_dir = os.path.join(data_home, 'yourmt3_indexes')
	os.makedirs(output_index_dir, exist_ok=True)