Orpheus-MIDI-Comparator

Running on Zero

App Files Files Community

projectlosangeles commited on Jul 6

Commit

306d753

verified ·

1 Parent(s): a558736

Upload TMIDIX.py

Browse files

Files changed (1) hide show

TMIDIX.py +2295 -56

TMIDIX.py CHANGED Viewed

@@ -1,14 +1,12 @@
 #! /usr/bin/python3
 r'''###############################################################################
 ###################################################################################
 #
 #
 #	Tegridy MIDI X Module (TMIDI X / tee-midi eks)
-#	Version 1.0
 #
-#   NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1342
 #
 #	Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
 #
@@ -21,19 +19,19 @@ r'''############################################################################
 #
 ###################################################################################
 ###################################################################################
-#       Copyright 2025 Project Los Angeles / Tegridy Code
 #
-#       Licensed under the Apache License, Version 2.0 (the "License");
-#       you may not use this file except in compliance with the License.
-#       You may obtain a copy of the License at
 #
-#           http://www.apache.org/licenses/LICENSE-2.0
 #
-#       Unless required by applicable law or agreed to in writing, software
-#       distributed under the License is distributed on an "AS IS" BASIS,
-#       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#       See the License for the specific language governing permissions and
-#       limitations under the License.
 ###################################################################################
 ###################################################################################
 #
@@ -48,9 +46,23 @@ r'''############################################################################
 #	Copyright 2020 Peter Billam
 #
 ###################################################################################
-###################################################################################'''
 import sys, struct, copy
 Version = '6.7'
 VersionDate = '20201120'
@@ -1440,14 +1452,14 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
 ###################################################################################
 #
 #	Tegridy MIDI X Module (TMIDI X / tee-midi eks)
-#	Version 1.0
 #
 #	Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
 #	pjb.com.au
 #
 #	Project Los Angeles
-#	Tegridy Code 2021
-# https://github.com/Tegridy-Code/Project-Los-Angeles
 #
 ###################################################################################
 ###################################################################################
@@ -1457,8 +1469,6 @@ import os
 import datetime
-import copy
 from datetime import datetime
 import secrets
@@ -1475,12 +1485,13 @@ import multiprocessing
 from itertools import zip_longest
 from itertools import groupby
 from collections import Counter
 from operator import itemgetter
-import sys
 from abc import ABC, abstractmethod
 from difflib import SequenceMatcher as SM
@@ -1490,6 +1501,21 @@ import math
 import matplotlib.pyplot as plt
 ###################################################################################
 #
 # Original TMIDI Tegridy helper functions
@@ -3842,7 +3868,10 @@ def chordify_score(score,
     else:
       return None
-def fix_monophonic_score_durations(monophonic_score):
     fixed_score = []
@@ -3854,15 +3883,17 @@ def fix_monophonic_score_durations(monophonic_score):
         nmt = monophonic_score[i+1][1]
         if note[1]+note[2] >= nmt:
-          note_dur = nmt-note[1]-1
         else:
           note_dur = note[2]
         new_note = [note[0], note[1], note_dur] + note[3:]
-        fixed_score.append(new_note)
-      fixed_score.append(monophonic_score[-1])
     elif type(monophonic_score[0][0]) == int:
@@ -3872,15 +3903,17 @@ def fix_monophonic_score_durations(monophonic_score):
         nmt = monophonic_score[i+1][0]
         if note[0]+note[1] >= nmt:
-          note_dur = nmt-note[0]-1
         else:
           note_dur = note[1]
         new_note = [note[0], note_dur] + note[2:]
-        fixed_score.append(new_note)
-      fixed_score.append(monophonic_score[-1])
     return fixed_score
@@ -4142,15 +4175,16 @@ def tones_chord_to_pitches(tones_chord, base_pitch=60):
 ###################################################################################
 def advanced_score_processor(raw_score,
-                              patches_to_analyze=list(range(129)),
-                              return_score_analysis=False,
-                              return_enhanced_score=False,
-                              return_enhanced_score_notes=False,
-                              return_enhanced_monophonic_melody=False,
-                              return_chordified_enhanced_score=False,
-                              return_chordified_enhanced_score_with_lyrics=False,
-                              return_score_tones_chords=False,
-                              return_text_and_lyric_events=False
                             ):
   '''TMIDIX Advanced Score Processor'''
@@ -4179,6 +4213,20 @@ def advanced_score_processor(raw_score,
               basic_single_track_score.append(ev)
             num_tracks += 1
       basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
       basic_single_track_score.sort(key=lambda x: x[1])
@@ -4193,7 +4241,7 @@ def advanced_score_processor(raw_score,
               enhanced_single_track_score.append(event)
               num_patch_changes += 1
-        if event[0] == 'note':
             if event[3] != 9:
               event.extend([patches[event[3]]])
               all_score_patches.extend([patches[event[3]]])
@@ -4693,7 +4741,8 @@ def augment_enhanced_score_notes(enhanced_score_notes,
                                   ceil_timings=False,
                                   round_timings=False,
                                   legacy_timings=True,
-                                  sort_drums_last=False
                                 ):
     esn = copy.deepcopy(enhanced_score_notes)
@@ -4736,6 +4785,16 @@ def augment_enhanced_score_notes(enhanced_score_notes,
       e[4] = max(1, min(127, e[4] + pitch_shift))
       pe = enhanced_score_notes[i]
     if full_sorting:
@@ -6676,12 +6735,23 @@ def find_next_bar(escore_notes, bar_time, start_note_idx, cur_bar):
 def align_escore_notes_to_bars(escore_notes,
                                bar_time=4000,
                                trim_durations=False,
-                               split_durations=False
                                ):
   #=============================================================================
-  aligned_escore_notes = copy.deepcopy(escore_notes)
   abs_time = 0
   nidx = 0
@@ -6693,13 +6763,13 @@ def align_escore_notes_to_bars(escore_notes,
   while next_bar:
-    next_bar = find_next_bar(escore_notes, bar_time, nidx, bcount)
     if next_bar:
-      gescore_notes = escore_notes[nidx:next_bar[1]]
     else:
-      gescore_notes = escore_notes[nidx:]
     original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
     adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
@@ -6714,7 +6784,8 @@ def align_escore_notes_to_bars(escore_notes,
       nidx += 1
     if next_bar:
-      delta = escore_notes[next_bar[1]][1]-escore_notes[next_bar[1]-1][1]
     bcount += 1
   #=============================================================================
@@ -11125,13 +11196,17 @@ def escore_notes_core(escore_notes, core_len=128):
 ###################################################################################
-def multiprocessing_wrapper(function, data_list):
     with multiprocessing.Pool() as pool:
         results = []
-        for result in tqdm.tqdm(pool.imap_unordered(function, data_list), total=len(data_list)):
             results.append(result)
     return results
@@ -11182,7 +11257,2171 @@ def rle_decode_ones(encoding, size=(128, 128)):
     return matrix
 ###################################################################################
-#
-# This is the end of the TMIDI X Python module
-#
 ###################################################################################

 #! /usr/bin/python3
 r'''###############################################################################
 ###################################################################################
 #
 #
 #	Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 #
+#   NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1450
 #
 #	Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
 #
 #
 ###################################################################################
 ###################################################################################
+#   Copyright 2025 Project Los Angeles / Tegridy Code
 #
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
 #
+#   http://www.apache.org/licenses/LICENSE-2.0
 #
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
 ###################################################################################
 ###################################################################################
 #
 #	Copyright 2020 Peter Billam
 #
 ###################################################################################
+###################################################################################
+'''
+###################################################################################
+__version__ = "25.7.5"
+print('=' * 70)
+print('TMIDIX Python module')
+print('Version:', __version__)
+print('=' * 70)
+print('Loading module...')
+###################################################################################
 import sys, struct, copy
 Version = '6.7'
 VersionDate = '20201120'
 ###################################################################################
 #
 #	Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 #
 #	Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
 #	pjb.com.au
 #
 #	Project Los Angeles
+#	Tegridy Code 2025
+#
+#   https://github.com/Tegridy-Code/Project-Los-Angeles
 #
 ###################################################################################
 ###################################################################################
 import datetime
 from datetime import datetime
 import secrets
 from itertools import zip_longest
 from itertools import groupby
 from collections import Counter
+from collections import defaultdict
+from collections import OrderedDict
 from operator import itemgetter
 from abc import ABC, abstractmethod
 from difflib import SequenceMatcher as SM
 import matplotlib.pyplot as plt
+import psutil
+import json
+from pathlib import Path
+import shutil
+import hashlib
+from array import array
+from pathlib import Path
+from fnmatch import fnmatch
 ###################################################################################
 #
 # Original TMIDI Tegridy helper functions
     else:
       return None
+def fix_monophonic_score_durations(monophonic_score,
+                                   min_notes_gap=1,
+                                   min_notes_dur=1
+                                   ):
     fixed_score = []
         nmt = monophonic_score[i+1][1]
         if note[1]+note[2] >= nmt:
+          note_dur = max(1, nmt-note[1]-min_notes_gap)
         else:
           note_dur = note[2]
         new_note = [note[0], note[1], note_dur] + note[3:]
+        if new_note[2] >= min_notes_dur:
+            fixed_score.append(new_note)
+      if monophonic_score[-1][2] >= min_notes_dur:
+          fixed_score.append(monophonic_score[-1])
     elif type(monophonic_score[0][0]) == int:
         nmt = monophonic_score[i+1][0]
         if note[0]+note[1] >= nmt:
+          note_dur = max(1, nmt-note[0]-min_notes_gap)
         else:
           note_dur = note[1]
         new_note = [note[0], note_dur] + note[2:]
+        if new_note[1] >= min_notes_dur:
+            fixed_score.append(new_note)
+      if monophonic_score[-1][1] >= min_notes_dur:
+          fixed_score.append(monophonic_score[-1])
     return fixed_score
 ###################################################################################
 def advanced_score_processor(raw_score,
+                             patches_to_analyze=list(range(129)),
+                             return_score_analysis=False,
+                             return_enhanced_score=False,
+                             return_enhanced_score_notes=False,
+                             return_enhanced_monophonic_melody=False,
+                             return_chordified_enhanced_score=False,
+                             return_chordified_enhanced_score_with_lyrics=False,
+                             return_score_tones_chords=False,
+                             return_text_and_lyric_events=False,
+                             apply_sustain=False
                             ):
   '''TMIDIX Advanced Score Processor'''
               basic_single_track_score.append(ev)
             num_tracks += 1
+      for e in basic_single_track_score:
+          if e[0] == 'note':
+              e[3] = e[3] % 16
+              e[4] = e[4] % 128
+              e[5] = e[5] % 128
+          if e[0] == 'patch_change':
+              e[2] = e[2] % 16
+              e[3] = e[3] % 128
+      if apply_sustain:
+          apply_sustain_to_ms_score([1000, basic_single_track_score])
       basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
       basic_single_track_score.sort(key=lambda x: x[1])
               enhanced_single_track_score.append(event)
               num_patch_changes += 1
+        if event[0] == 'note':
             if event[3] != 9:
               event.extend([patches[event[3]]])
               all_score_patches.extend([patches[event[3]]])
                                   ceil_timings=False,
                                   round_timings=False,
                                   legacy_timings=True,
+                                  sort_drums_last=False,
+                                  even_timings=False
                                 ):
     esn = copy.deepcopy(enhanced_score_notes)
       e[4] = max(1, min(127, e[4] + pitch_shift))
       pe = enhanced_score_notes[i]
+    if even_timings:
+      for e in esn:
+          if e[1] % 2 != 0:
+              e[1] += 1
+          if e[2] % 2 != 0:
+              e[2] += 1
     if full_sorting:
 def align_escore_notes_to_bars(escore_notes,
                                bar_time=4000,
                                trim_durations=False,
+                               split_durations=False,
+                               even_timings=False
                                ):
   #=============================================================================
+  escore = copy.deepcopy(escore_notes)
+  if even_timings:
+      for e in escore:
+          if e[1] % 2 != 0:
+              e[1] += 1
+          if e[2] % 2 != 0:
+              e[2] += 1
+  aligned_escore_notes = copy.deepcopy(escore)
   abs_time = 0
   nidx = 0
   while next_bar:
+    next_bar = find_next_bar(escore, bar_time, nidx, bcount)
     if next_bar:
+      gescore_notes = escore[nidx:next_bar[1]]
     else:
+      gescore_notes = escore[nidx:]
     original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
     adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
       nidx += 1
     if next_bar:
+      delta = escore[next_bar[1]][1]-escore[next_bar[1]-1][1]
     bcount += 1
   #=============================================================================
 ###################################################################################
+def multiprocessing_wrapper(function, data_list, verbose=True):
     with multiprocessing.Pool() as pool:
         results = []
+        for result in tqdm.tqdm(pool.imap_unordered(function, data_list),
+                                total=len(data_list),
+                                disable=not verbose
+                                ):
             results.append(result)
     return results
     return matrix
 ###################################################################################
+def vertical_list_search(list_of_lists, trg_list):
+    src_list = list_of_lists
+    if not src_list or not trg_list:
+        return []
+    num_rows = len(src_list)
+    k = len(trg_list)
+    row_sets = [set(row) for row in src_list]
+    results = []
+    for start in range(num_rows - k + 1):
+        valid = True
+        for offset, target in enumerate(trg_list):
+            if target not in row_sets[start + offset]:
+                valid = False
+                break
+        if valid:
+            results.append(list(range(start, start + k)))
+    return results
+###################################################################################
+def smooth_values(values, window_size=3):
+    smoothed = []
+    for i in range(len(values)):
+        start = max(0, i - window_size // 2)
+        end = min(len(values), i + window_size // 2 + 1)
+        window = values[start:end]
+        smoothed.append(int(sum(window) / len(window)))
+    return smoothed
+###################################################################################
+def is_mostly_wide_peaks_and_valleys(values,
+                                     min_range=32,
+                                     threshold=0.7,
+                                     smoothing_window=5
+                                    ):
+    if not values:
+        return False
+    smoothed_values = smooth_values(values, smoothing_window)
+    value_range = max(smoothed_values) - min(smoothed_values)
+    if value_range < min_range:
+        return False
+    if all(v == smoothed_values[0] for v in smoothed_values):
+        return False
+    trend_types = []
+    for i in range(1, len(smoothed_values)):
+        if smoothed_values[i] > smoothed_values[i - 1]:
+            trend_types.append(1)
+        elif smoothed_values[i] < smoothed_values[i - 1]:
+            trend_types.append(-1)
+        else:
+            trend_types.append(0)
+    trend_count = trend_types.count(1) + trend_types.count(-1)
+    proportion = trend_count / len(trend_types)
+    return proportion >= threshold
+###################################################################################
+def system_memory_utilization(return_dict=False):
+    if return_dict:
+        return dict(psutil.virtual_memory()._asdict())
+    else:
+        print('RAM memory % used:', psutil.virtual_memory()[2])
+        print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
+###################################################################################
+def system_cpus_utilization(return_dict=False):
+    if return_dict:
+        return {'num_cpus': psutil.cpu_count(),
+                'cpus_util': psutil.cpu_percent()
+                }
+    else:
+        print('Number of CPUs:', psutil.cpu_count())
+        print('CPUs utilization:', psutil.cpu_percent())
+###################################################################################
+def create_files_list(datasets_paths=['./'],
+                      files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'],
+                      max_num_files_per_dir=-1,
+                      randomize_dir_files=False,
+                      max_total_files=-1,
+                      randomize_files_list=True,
+                      check_for_dupes=False,
+                      use_md5_hashes=False,
+                      return_dupes=False,
+                      verbose=True
+                     ):
+    if verbose:
+        print('=' * 70)
+        print('Searching for files...')
+        print('This may take a while on a large dataset in particular...')
+        print('=' * 70)
+    files_exts = tuple(files_exts)
+    filez_set = defaultdict(None)
+    dupes_list = []
+    for dataset_addr in datasets_paths:
+        print('=' * 70)
+        print('Processing', dataset_addr)
+        print('=' * 70)
+        for dirpath, dirnames, filenames in tqdm.tqdm(os.walk(dataset_addr), disable=not verbose):
+                if randomize_dir_files:
+                    random.shuffle(filenames)
+                if max_num_files_per_dir > 0:
+                    max_num_files = max_num_files_per_dir
+                else:
+                    max_num_files = len(filenames)
+                for file in filenames[:max_num_files]:
+                    if file.endswith(files_exts):
+                        if check_for_dupes:
+                            if use_md5_hashes:
+                                md5_hash = hashlib.md5(open(os.path.join(dirpath, file), 'rb').read()).hexdigest()
+                                if md5_hash not in filez_set:
+                                    filez_set[md5_hash] = os.path.join(dirpath, file)
+                                else:
+                                    dupes_list.append(os.path.join(dirpath, file))
+                            else:
+                                if file not in filez_set:
+                                    filez_set[file] = os.path.join(dirpath, file)
+                                else:
+                                    dupes_list.append(os.path.join(dirpath, file))
+                        else:
+                            fpath = os.path.join(dirpath, file)
+                            filez_set[fpath] = fpath
+    filez = list(filez_set.values())
+    if verbose:
+        print('Done!')
+        print('=' * 70)
+    if filez:
+        if randomize_files_list:
+            if verbose:
+                print('Randomizing file list...')
+            random.shuffle(filez)
+            if verbose:
+                print('Done!')
+                print('=' * 70)
+        if verbose:
+            print('Found', len(filez), 'files.')
+            print('Skipped', len(dupes_list), 'duplicate files.')
+            print('=' * 70)
+    else:
+        if verbose:
+            print('Could not find any files...')
+            print('Please check dataset dirs and files extensions...')
+            print('=' * 70)
+    if max_total_files > 0:
+        if return_dupes:
+            return filez[:max_total_files], dupes_list
+        else:
+            return filez[:max_total_files]
+    else:
+        if return_dupes:
+            return filez, dupes_list
+        else:
+            return filez
+###################################################################################
+def has_consecutive_trend(nums, count):
+    if len(nums) < count:
+        return False
+    increasing_streak = 1
+    decreasing_streak = 1
+    for i in range(1, len(nums)):
+        if nums[i] > nums[i - 1]:
+            increasing_streak += 1
+            decreasing_streak = 1
+        elif nums[i] < nums[i - 1]:
+            decreasing_streak += 1
+            increasing_streak = 1
+        else:
+            increasing_streak = decreasing_streak = 1
+        if increasing_streak == count or decreasing_streak == count:
+            return True
+    return False
+###################################################################################
+def escore_notes_primary_features(escore_notes):
+    #=================================================================
+    def mean(values):
+        return sum(values) / len(values) if values else None
+    def std(values):
+        if not values:
+            return None
+        m = mean(values)
+        return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
+    def skew(values):
+        if not values:
+            return None
+        m = mean(values)
+        s = std(values)
+        if s is None or s == 0:
+            return None
+        return sum(((x - m) / s) ** 3 for x in values) / len(values)
+    def kurtosis(values):
+        if not values:
+            return None
+        m = mean(values)
+        s = std(values)
+        if s is None or s == 0:
+            return None
+        return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
+    def median(values):
+        if not values:
+            return None
+        srt = sorted(values)
+        n = len(srt)
+        mid = n // 2
+        if n % 2 == 0:
+            return (srt[mid - 1] + srt[mid]) / 2.0
+        return srt[mid]
+    def percentile(values, p):
+        if not values:
+            return None
+        srt = sorted(values)
+        n = len(srt)
+        k = (n - 1) * p / 100.0
+        f = int(k)
+        c = k - f
+        if f + 1 < n:
+            return srt[f] * (1 - c) + srt[f + 1] * c
+        return srt[f]
+    def diff(values):
+        if not values or len(values) < 2:
+            return []
+        return [values[i + 1] - values[i] for i in range(len(values) - 1)]
+    def mad(values):
+        if not values:
+            return None
+        m = median(values)
+        return median([abs(x - m) for x in values])
+    def entropy(values):
+        if not values:
+            return None
+        freq = {}
+        for v in values:
+            freq[v] = freq.get(v, 0) + 1
+        total = len(values)
+        ent = 0.0
+        for count in freq.values():
+            p_val = count / total
+            ent -= p_val * math.log2(p_val)
+        return ent
+    def mode(values):
+        if not values:
+            return None
+        freq = {}
+        for v in values:
+            freq[v] = freq.get(v, 0) + 1
+        max_count = max(freq.values())
+        modes = [k for k, count in freq.items() if count == max_count]
+        return min(modes)
+    #=================================================================
+    sp_score = solo_piano_escore_notes(escore_notes)
+    dscore = delta_score_notes(sp_score)
+    seq = []
+    for d in dscore:
+        seq.extend([d[1], d[2], d[4]])
+    #=================================================================
+    n = len(seq)
+    if n % 3 != 0:
+        seq = seq[: n - (n % 3)]
+    arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
+    #=================================================================
+    features = {}
+    delta_times = [row[0] for row in arr]
+    if delta_times:
+        features['delta_times_mean'] = mean(delta_times)
+        features['delta_times_std'] = std(delta_times)
+        features['delta_times_min'] = min(delta_times)
+        features['delta_times_max'] = max(delta_times)
+        features['delta_times_skew'] = skew(delta_times)
+        features['delta_times_kurtosis'] = kurtosis(delta_times)
+        delta_zero_count = sum(1 for x in delta_times if x == 0)
+        features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
+        nonzero_dt = [x for x in delta_times if x != 0]
+        if nonzero_dt:
+            features['delta_times_nonzero_mean'] = mean(nonzero_dt)
+            features['delta_times_nonzero_std'] = std(nonzero_dt)
+        else:
+            features['delta_times_nonzero_mean'] = None
+            features['delta_times_nonzero_std'] = None
+        features['delta_times_mad'] = mad(delta_times)
+        features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
+                                      if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
+        features['delta_times_entropy'] = entropy(delta_times)
+        features['delta_times_range'] = max(delta_times) - min(delta_times)
+        features['delta_times_median'] = median(delta_times)
+        features['delta_times_quantile_25'] = percentile(delta_times, 25)
+        features['delta_times_quantile_75'] = percentile(delta_times, 75)
+        if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
+            features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
+        else:
+            features['delta_times_iqr'] = None
+    else:
+        for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
+                    'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
+                    'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
+                    'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
+                    'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
+            features[key] = None
+    #=================================================================
+    durations = [row[1] for row in arr]
+    if durations:
+        features['durations_mean'] = mean(durations)
+        features['durations_std'] = std(durations)
+        features['durations_min'] = min(durations)
+        features['durations_max'] = max(durations)
+        features['durations_skew'] = skew(durations)
+        features['durations_kurtosis'] = kurtosis(durations)
+        features['durations_mad'] = mad(durations)
+        features['durations_cv'] = (features['durations_std'] / features['durations_mean']
+                                    if features['durations_mean'] and features['durations_mean'] != 0 else None)
+        features['durations_entropy'] = entropy(durations)
+        features['durations_range'] = max(durations) - min(durations)
+        features['durations_median'] = median(durations)
+        features['durations_quantile_25'] = percentile(durations, 25)
+        features['durations_quantile_75'] = percentile(durations, 75)
+        if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
+            features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
+        else:
+            features['durations_iqr'] = None
+    else:
+        for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
+                    'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
+                    'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
+                    'durations_quantile_75', 'durations_iqr']:
+            features[key] = None
+    #=================================================================
+    pitches = [row[2] for row in arr]
+    if pitches:
+        features['pitches_mean'] = mean(pitches)
+        features['pitches_std'] = std(pitches)
+        features['pitches_min'] = min(pitches)
+        features['pitches_max'] = max(pitches)
+        features['pitches_skew'] = skew(pitches)
+        features['pitches_kurtosis'] = kurtosis(pitches)
+        features['pitches_range'] = max(pitches) - min(pitches)
+        features['pitches_median'] = median(pitches)
+        features['pitches_quantile_25'] = percentile(pitches, 25)
+        features['pitches_quantile_75'] = percentile(pitches, 75)
+        if len(pitches) > 1:
+            dps = diff(pitches)
+            features['pitches_diff_mean'] = mean(dps)
+            features['pitches_diff_std'] = std(dps)
+        else:
+            features['pitches_diff_mean'] = None
+            features['pitches_diff_std'] = None
+        features['pitches_mad'] = mad(pitches)
+        if len(pitches) > 2:
+            peaks = sum(1 for i in range(1, len(pitches)-1)
+                        if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
+            valleys = sum(1 for i in range(1, len(pitches)-1)
+                          if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
+        else:
+            peaks, valleys = None, None
+        features['pitches_peak_count'] = peaks
+        features['pitches_valley_count'] = valleys
+        if len(pitches) > 1:
+            x = list(range(len(pitches)))
+            denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
+            if denominator != 0:
+                slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
+                         sum(x) * sum(pitches)) / denominator
+            else:
+                slope = None
+            features['pitches_trend_slope'] = slope
+        else:
+            features['pitches_trend_slope'] = None
+        features['pitches_unique_count'] = len(set(pitches))
+        pitch_class_hist = {i: 0 for i in range(12)}
+        for p in pitches:
+            pitch_class_hist[p % 12] += 1
+        total_pitch = len(pitches)
+        for i in range(12):
+            features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
+        max_asc = 0
+        cur_asc = 0
+        max_desc = 0
+        cur_desc = 0
+        for i in range(1, len(pitches)):
+            if pitches[i] > pitches[i-1]:
+                cur_asc += 1
+                max_asc = max(max_asc, cur_asc)
+                cur_desc = 0
+            elif pitches[i] < pitches[i-1]:
+                cur_desc += 1
+                max_desc = max(max_desc, cur_desc)
+                cur_asc = 0
+            else:
+                cur_asc = 0
+                cur_desc = 0
+        features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
+        features['pitches_max_consecutive_descending'] = max_desc if pitches else None
+        p_intervals = diff(pitches)
+        features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
+        if p_intervals:
+            dc = sum(1 for i in range(1, len(p_intervals))
+                     if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
+            features['pitches_direction_changes'] = dc
+        else:
+            features['pitches_direction_changes'] = None
+    else:
+        for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
+                     'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
+                     'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
+                     'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
+                     'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
+                     'pitches_median_diff', 'pitches_direction_changes'] +
+                    [f'pitches_pc_{i}' for i in range(12)]):
+            features[key] = None
+    #=================================================================
+    overall = [x for row in arr for x in row]
+    if overall:
+        features['overall_mean'] = mean(overall)
+        features['overall_std'] = std(overall)
+        features['overall_min'] = min(overall)
+        features['overall_max'] = max(overall)
+        features['overall_cv'] = (features['overall_std'] / features['overall_mean']
+                                  if features['overall_mean'] and features['overall_mean'] != 0 else None)
+    else:
+        for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
+            features[key] = None
+    #=================================================================
+    onsets = []
+    cumulative = 0
+    for dt in delta_times:
+        onsets.append(cumulative)
+        cumulative += dt
+    if onsets and durations:
+        overall_piece_duration = onsets[-1] + durations[-1]
+    else:
+        overall_piece_duration = None
+    features['overall_piece_duration'] = overall_piece_duration
+    features['overall_notes_density'] = (len(arr) / overall_piece_duration
+                                         if overall_piece_duration and overall_piece_duration > 0 else None)
+    features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
+                                if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
+    features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
+    features['overall_sum_durations'] = (sum(durations) if durations else None)
+    features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
+                                         if overall_piece_duration and durations else None)
+    features['overall_onset_std'] = std(onsets) if onsets else None
+    #=================================================================
+    chords_raw = []
+    chords_pc = []
+    current_group = []
+    for i, note in enumerate(arr):
+        dt = note[0]
+        if i == 0:
+            current_group = [i]
+        else:
+            if dt == 0:
+                current_group.append(i)
+            else:
+                if len(current_group) >= 2:
+                    chord_notes = [arr[j][2] for j in current_group]
+                    chords_raw.append(tuple(sorted(chord_notes)))
+                    chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
+                current_group = [i]
+    if current_group and len(current_group) >= 2:
+        chord_notes = [arr[j][2] for j in current_group]
+        chords_raw.append(tuple(sorted(chord_notes)))
+        chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
+    if chords_raw:
+        chord_count = len(chords_raw)
+        features['chords_count'] = chord_count
+        features['chords_density'] = (chord_count / overall_piece_duration
+                                      if overall_piece_duration and chord_count is not None else None)
+        chord_sizes = [len(ch) for ch in chords_raw]
+        features['chords_size_mean'] = mean(chord_sizes)
+        features['chords_size_std'] = std(chord_sizes)
+        features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
+        features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
+        features['chords_unique_raw_count'] = len(set(chords_raw))
+        features['chords_unique_pc_count'] = len(set(chords_pc))
+        features['chords_entropy_raw'] = entropy(chords_raw)
+        features['chords_entropy_pc'] = entropy(chords_pc)
+        if len(chords_raw) > 1:
+            rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
+            features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
+        else:
+            features['chords_repeat_ratio_raw'] = None
+        if len(chords_pc) > 1:
+            rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
+            features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
+        else:
+            features['chords_repeat_ratio_pc'] = None
+        if len(chords_raw) > 1:
+            bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
+            features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
+        else:
+            features['chords_bigram_entropy_raw'] = None
+        if len(chords_pc) > 1:
+            bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
+            features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
+        else:
+            features['chords_bigram_entropy_pc'] = None
+        features['chords_mode_raw'] = mode(chords_raw)
+        features['chords_mode_pc'] = mode(chords_pc)
+        if chords_pc:
+            pc_sizes = [len(ch) for ch in chords_pc]
+            features['chords_pc_size_mean'] = mean(pc_sizes)
+        else:
+            features['chords_pc_size_mean'] = None
+    else:
+        for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
+                    'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
+                    'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
+                    'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
+                    'chords_pc_size_mean']:
+            features[key] = None
+    #=================================================================
+    if delta_times:
+        med_dt = features['delta_times_median']
+        iqr_dt = features['delta_times_iqr']
+        threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
+        threshold_b = percentile(delta_times, 90)
+        if threshold_a is not None and threshold_b is not None:
+            phrase_threshold = max(threshold_a, threshold_b)
+        elif threshold_a is not None:
+            phrase_threshold = threshold_a
+        elif threshold_b is not None:
+            phrase_threshold = threshold_b
+        else:
+            phrase_threshold = None
+    else:
+        phrase_threshold = None
+    phrases = []
+    current_phrase = []
+    if onsets:
+        current_phrase.append(0)
+        for i in range(len(onsets) - 1):
+            gap = onsets[i + 1] - onsets[i]
+            if phrase_threshold is not None and gap > phrase_threshold:
+                phrases.append(current_phrase)
+                current_phrase = []
+            current_phrase.append(i + 1)
+        if current_phrase:
+            phrases.append(current_phrase)
+    if phrases:
+        phrase_note_counts = []
+        phrase_durations = []
+        phrase_densities = []
+        phrase_mean_pitches = []
+        phrase_pitch_ranges = []
+        phrase_start_times = []
+        phrase_end_times = []
+        for phrase in phrases:
+            note_count = len(phrase)
+            phrase_note_counts.append(note_count)
+            ph_start = onsets[phrase[0]]
+            ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
+            phrase_start_times.append(ph_start)
+            phrase_end_times.append(ph_end)
+            ph_duration = ph_end - ph_start
+            phrase_durations.append(ph_duration)
+            density = note_count / ph_duration if ph_duration > 0 else None
+            phrase_densities.append(density)
+            ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
+            phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
+            phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
+        if len(phrases) > 1:
+            phrase_gaps = []
+            for i in range(len(phrases) - 1):
+                gap = phrase_start_times[i + 1] - phrase_end_times[i]
+                phrase_gaps.append(gap if gap > 0 else 0)
+        else:
+            phrase_gaps = []
+        features['phrases_count'] = len(phrases)
+        features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
+        features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
+        features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
+        features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
+        features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
+        features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
+        features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
+        features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
+        features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
+        features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
+        features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
+        features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
+        if phrase_gaps:
+            features['phrases_avg_gap'] = mean(phrase_gaps)
+            features['phrases_std_gap'] = std(phrase_gaps)
+            features['phrases_min_gap'] = min(phrase_gaps)
+            features['phrases_max_gap'] = max(phrase_gaps)
+        else:
+            features['phrases_avg_gap'] = None
+            features['phrases_std_gap'] = None
+            features['phrases_min_gap'] = None
+            features['phrases_max_gap'] = None
+        features['phrases_threshold'] = phrase_threshold
+    else:
+        for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
+                    'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
+                    'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
+                    'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
+                    'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
+                    'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
+            features[key] = None
+    #=================================================================
+    return features
+###################################################################################
+def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
+    #=================================================================
+    new_min, new_max = new_range
+    #=================================================================
+    def percentile(values, p):
+        srt = sorted(values)
+        n = len(srt)
+        if n == 1:
+            return srt[0]
+        k = (n - 1) * p / 100.0
+        f = int(k)
+        c = k - f
+        if f + 1 < n:
+            return srt[f] * (1 - c) + srt[f + 1] * c
+        return srt[f]
+    #=================================================================
+    q1 = percentile(data, 25)
+    q3 = percentile(data, 75)
+    iqr = q3 - q1
+    lower_bound_w = q1 - clip * iqr
+    upper_bound_w = q3 + clip * iqr
+    data_min = min(data)
+    data_max = max(data)
+    effective_low = max(lower_bound_w, data_min)
+    effective_high = min(upper_bound_w, data_max)
+    #=================================================================
+    if effective_high == effective_low:
+        if data_max == data_min:
+            return [int(new_min)] * len(data)
+        normalized = [(x - data_min) / (data_max - data_min) for x in data]
+        return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
+    #=================================================================
+    clipped = [x if x >= effective_low else effective_low for x in data]
+    clipped = [x if x <= effective_high else effective_high for x in clipped]
+    normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
+    #=================================================================
+    return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
+###################################################################################
+def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
+    values = []
+    tokens = []
+    #=================================================================
+    def process_value(val):
+        if isinstance(val, (int, float)):
+            return int(round(abs(val)))
+        elif isinstance(val, (list, tuple)):
+            return int(round(abs(sum(val) / len(val))))
+        else:
+            return int(abs(hash(val)) % (10 ** 8))
+    #=================================================================
+    for key in sorted(features.keys()):
+        value = features[key]
+        if value is None:
+            tokens.append(none_token)
+            values.append(none_token)
+        else:
+            tokens.append(process_value(value))
+            if isinstance(value, (list, tuple)):
+                values.append(sum(value) / len(value))
+            else:
+                values.append(value)
+    #=================================================================
+    norm_tokens = winsorized_normalize(tokens, new_range, clip)
+    #=================================================================
+    return values, tokens, norm_tokens
+###################################################################################
+def write_jsonl(records_dicts_list,
+                file_name='data',
+                file_ext='.jsonl',
+                file_mode='w',
+                line_sep='\n',
+                verbose=True
+               ):
+    if verbose:
+        print('=' * 70)
+        print('Writing', len(records_dicts_list), 'records to jsonl file...')
+        print('=' * 70)
+    if not os.path.splitext(file_name)[1]:
+        file_name += file_ext
+    l_count = 0
+    with open(file_name, mode=file_mode) as f:
+        for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
+            f.write(json.dumps(record) + line_sep)
+            l_count += 1
+    f.close()
+    if verbose:
+        print('=' * 70)
+        print('Written total of', l_count, 'jsonl records.')
+        print('=' * 70)
+        print('Done!')
+        print('=' * 70)
+###################################################################################
+def read_jsonl(file_name='data',
+               file_ext='.jsonl',
+               verbose=True
+              ):
+    if verbose:
+        print('=' * 70)
+        print('Reading jsonl file...')
+        print('=' * 70)
+    if not os.path.splitext(file_name)[1]:
+        file_name += file_ext
+    with open(file_name, 'r') as f:
+        records = []
+        gl_count = 0
+        for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
+            try:
+                record = json.loads(line)
+                records.append(record)
+                gl_count += 1
+            except KeyboardInterrupt:
+                if verbose:
+                    print('=' * 70)
+                    print('Stoping...')
+                    print('=' * 70)
+                f.close()
+                return records
+            except json.JSONDecodeError:
+                if verbose:
+                    print('=' * 70)
+                    print('[ERROR] Line', i, 'is corrupted! Skipping it...')
+                    print('=' * 70)
+                continue
+    f.close()
+    if verbose:
+        print('=' * 70)
+        print('Loaded total of', gl_count, 'jsonl records.')
+        print('=' * 70)
+        print('Done!')
+        print('=' * 70)
+    return records
+###################################################################################
+def read_jsonl_lines(lines_indexes_list,
+                     file_name='data',
+                     file_ext='.jsonl',
+                     verbose=True
+                    ):
+    if verbose:
+        print('=' * 70)
+        print('Reading jsonl file...')
+        print('=' * 70)
+    if not os.path.splitext(file_name)[1]:
+        file_name += file_ext
+    records = []
+    l_count = 0
+    lines_indexes_list.sort(reverse=True)
+    with open(file_name, 'r') as f:
+        for current_line_number, line in tqdm.tqdm(enumerate(f)):
+            try:
+                if current_line_number in lines_indexes_list:
+                    record = json.loads(line)
+                    records.append(record)
+                    lines_indexes_list = lines_indexes_list[:-1]
+                    l_count += 1
+                if not lines_indexes_list:
+                    break
+            except KeyboardInterrupt:
+                if verbose:
+                    print('=' * 70)
+                    print('Stoping...')
+                    print('=' * 70)
+                f.close()
+                return records
+            except json.JSONDecodeError:
+                if verbose:
+                    print('=' * 70)
+                    print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
+                    print('=' * 70)
+                continue
+    f.close()
+    if verbose:
+        print('=' * 70)
+        print('Loaded total of', l_count, 'jsonl records.')
+        print('=' * 70)
+        print('Done!')
+        print('=' * 70)
+    return records
+###################################################################################
+def compute_base(x: int, n: int) -> int:
+    if x < 0:
+        raise ValueError("x must be non-negative.")
+    if x == 0:
+        return 2
+    b = max(2, int(x ** (1 / n)))
+    if b ** n <= x:
+        b += 1
+    return b
+###################################################################################
+def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
+    base = compute_base(x, n)
+    digits = [0] * n
+    for i in range(n - 1, -1, -1):
+        digits[i] = x % base
+        x //= base
+    return base, digits
+###################################################################################
+def decode_int_auto(base: int, digits: list[int]) -> int:
+    x = 0
+    for digit in digits:
+        if digit < 0 or digit >= base:
+            raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
+        x = x * base + digit
+    return x
+###################################################################################
+def encode_int_manual(x, base, n):
+    digits = [0] * n
+    for i in range(n - 1, -1, -1):
+        digits[i] = x % base
+        x //= base
+    return digits
+###################################################################################
+def escore_notes_pitches_chords_signature(escore_notes,
+                                          max_patch=128,
+                                          sort_by_counts=False,
+                                          use_full_chords=False
+                                         ):
+    if use_full_chords:
+        CHORDS = ALL_CHORDS_FULL
+    else:
+        CHORDS = ALL_CHORDS_SORTED
+    max_patch = max(0, min(128, max_patch))
+    escore_notes = [e for e in escore_notes if e[6] <= max_patch]
+    if escore_notes:
+        cscore = chordify_score([1000, escore_notes])
+        sig = []
+        dsig = []
+        drums_offset = len(CHORDS) + 128
+        bad_chords_counter = 0
+        for c in cscore:
+            all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
+            chord = sorted(set(all_pitches))
+            pitches = sorted([p for p in chord if p < 128], reverse=True)
+            drums = [(d+drums_offset)-128 for d in chord if d > 127]
+            if pitches:
+              if len(pitches) > 1:
+                tones_chord = sorted(set([p % 12 for p in pitches]))
+                try:
+                    sig_token = CHORDS.index(tones_chord) + 128
+                except:
+                    checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
+                    sig_token = CHORDS.index(checked_tones_chord) + 128
+                    bad_chords_counter += 1
+              elif len(pitches) == 1:
+                sig_token = pitches[0]
+              sig.append(sig_token)
+            if drums:
+              dsig.extend(drums)
+        sig_p = {}
+        for item in sig+dsig:
+            if item in sig_p:
+                sig_p[item] += 1
+            else:
+                sig_p[item] = 1
+        sig_p[-1] = bad_chords_counter
+        fsig = [list(v) for v in sig_p.items()]
+        if sort_by_counts:
+            fsig.sort(key=lambda x: x[1], reverse=True)
+        return fsig
+    else:
+        return []
+###################################################################################
+def compute_sustain_intervals(events):
+    intervals = []
+    pedal_on = False
+    current_start = None
+    for t, cc in events:
+        if not pedal_on and cc >= 64:
+            pedal_on = True
+            current_start = t
+        elif pedal_on and cc < 64:
+            pedal_on = False
+            intervals.append((current_start, t))
+            current_start = None
+    if pedal_on:
+        intervals.append((current_start, float('inf')))
+    merged = []
+    for interval in intervals:
+        if merged and interval[0] <= merged[-1][1]:
+            merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1]))
+        else:
+            merged.append(interval)
+    return merged
+###################################################################################
+def apply_sustain_to_ms_score(score):
+    sustain_by_channel = {}
+    for track in score[1:]:
+        for event in track:
+            if event[0] == 'control_change' and event[3] == 64:
+                channel = event[2]
+                sustain_by_channel.setdefault(channel, []).append((event[1], event[4]))
+    sustain_intervals_by_channel = {}
+    for channel, events in sustain_by_channel.items():
+        events.sort(key=lambda x: x[0])
+        sustain_intervals_by_channel[channel] = compute_sustain_intervals(events)
+    global_max_off = 0
+    for track in score[1:]:
+        for event in track:
+            if event[0] == 'note':
+                global_max_off = max(global_max_off, event[1] + event[2])
+    for channel, intervals in sustain_intervals_by_channel.items():
+        updated_intervals = []
+        for start, end in intervals:
+            if end == float('inf'):
+                end = global_max_off
+            updated_intervals.append((start, end))
+        sustain_intervals_by_channel[channel] = updated_intervals
+    if sustain_intervals_by_channel:
+        for track in score[1:]:
+            for event in track:
+                if event[0] == 'note':
+                    start = event[1]
+                    nominal_dur = event[2]
+                    nominal_off = start + nominal_dur
+                    channel = event[3]
+                    intervals = sustain_intervals_by_channel.get(channel, [])
+                    effective_off = nominal_off
+                    for intv_start, intv_end in intervals:
+                        if intv_start < nominal_off < intv_end:
+                            effective_off = intv_end
+                            break
+                    effective_dur = effective_off - start
+                    event[2] = effective_dur
+    return score
+###################################################################################
+def copy_file(src_file: str, trg_dir: str, add_subdir: bool = False, verbose: bool = False):
+    src_path = Path(src_file)
+    target_directory = Path(trg_dir)
+    if not src_path.is_file():
+        if verbose:
+            print("Source file does not exist or is not a file.")
+        return None
+    target_directory.mkdir(parents=True, exist_ok=True)
+    if add_subdir:
+        first_letter = src_path.name[0]
+        target_directory = target_directory / first_letter
+        target_directory.mkdir(parents=True, exist_ok=True)
+    destination = target_directory / src_path.name
+    try:
+        shutil.copy2(src_path, destination)
+    except:
+        if verbose:
+            print('File could not be copied!')
+        return None
+    if verbose:
+        print('File copied!')
+    return None
+###################################################################################
+def escore_notes_even_timings(escore_notes, in_place=True):
+    if in_place:
+        for e in escore_notes:
+            if e[1] % 2 != 0:
+                e[1] += 1
+            if e[2] % 2 != 0:
+                e[2] += 1
+        return []
+    else:
+        escore = copy.deepcopy(escore_notes)
+        for e in escore:
+            if e[1] % 2 != 0:
+                e[1] += 1
+            if e[2] % 2 != 0:
+                e[2] += 1
+        return escore
+###################################################################################
+def both_chords(chord1, chord2, merge_threshold=2):
+    if len(chord1) > 1 and len(chord2) > 0 and chord2[0][1]-chord1[0][1] <= merge_threshold:
+        return True
+    elif len(chord1) > 0 and len(chord2) > 1 and chord2[0][1]-chord1[0][1] <= merge_threshold:
+        return True
+    else:
+        return False
+def merge_chords(chord1, chord2, sort_drums_last=False):
+    mchord = chord1
+    seen = []
+    for e in chord2:
+        if tuple([e[4], e[6]]) not in seen:
+            mchord.append(e)
+            seen.append(tuple([e[4], e[6]]))
+    for e in mchord[1:]:
+        e[1] = mchord[0][1]
+    if sort_drums_last:
+        mchord.sort(key=lambda x: (-x[4], x[6]) if x[6] != 128 else (x[6], -x[4]))
+    else:
+        mchord.sort(key=lambda x: (-x[4], x[6]))
+    return mchord
+def merge_escore_notes(escore_notes, merge_threshold=2, sort_drums_last=False):
+    cscore = chordify_score([1000, escore_notes])
+    merged_chords = []
+    merged_chord = cscore[0]
+    for i in range(1, len(cscore)):
+        cchord = cscore[i]
+        if both_chords(merged_chord, cchord, merge_threshold=merge_threshold):
+            merged_chord = merge_chords(merged_chord, cchord, sort_drums_last=sort_drums_last)
+        else:
+            merged_chords.append(merged_chord)
+            merged_chord = cchord
+    return flatten(merged_chords)
+###################################################################################
+def solo_piano_escore_notes_tokenized(escore_notes,
+                                      compress_start_times=True,
+                                      encode_velocities=False,
+                                      verbose=False
+                                      ):
+    if verbose:
+        print('=' * 70)
+        print('Encoding MIDI...')
+    sp_escore_notes = solo_piano_escore_notes(escore_notes)
+    zscore = recalculate_score_timings(sp_escore_notes)
+    dscore = delta_score_notes(zscore, timings_clip_value=127)
+    score = []
+    notes_counter = 0
+    chords_counter = 1
+    for i, e in enumerate(dscore):
+        dtime = e[1]
+        dur = e[2]
+        ptc = e[4]
+        vel = e[5]
+        if compress_start_times:
+            if i == 0:
+                score.extend([0, dur+128, ptc+256])
+                if encode_velocities:
+                    score.append(vel+384)
+            else:
+                if dtime == 0:
+                    score.extend([dur+128, ptc+256])
+                else:
+                    score.extend([dtime, dur+128, ptc+256])
+                if encode_velocities:
+                    score.append(vel+384)
+            if dtime != 0:
+                chords_counter += 1
+        else:
+            score.extend([dtime, dur+128, ptc+256])
+            if encode_velocities:
+                score.append(vel+384)
+            if dtime != 0:
+                chords_counter += 1
+        notes_counter += 1
+    if verbose:
+        print('Done!')
+        print('=' * 70)
+        print('Source MIDI composition has', len(zscore), 'notes')
+        print('Source MIDI composition has', len([d[1] for d in dscore if d[1] !=0 ])+1, 'chords')
+        print('-' * 70)
+        print('Encoded sequence has', notes_counter, 'pitches')
+        print('Encoded sequence has', chords_counter, 'chords')
+        print('-' * 70)
+        print('Final encoded sequence has', len(score), 'tokens')
+        print('=' * 70)
+    return score
+###################################################################################
+def equalize_closest_elements_dynamic(seq,
+                                      min_val=128,
+                                      max_val=256,
+                                      splitting_factor=1.5,
+                                      tightness_threshold=0.15
+                                      ):
+    candidates = [(i, x) for i, x in enumerate(seq) if min_val <= x <= max_val]
+    if len(candidates) < 2:
+        return seq.copy()
+    sorted_candidates = sorted(candidates, key=lambda pair: pair[1])
+    candidate_values = [val for _, val in sorted_candidates]
+    differences = [candidate_values[i+1] - candidate_values[i] for i in range(len(candidate_values)-1)]
+    def median(lst):
+        n = len(lst)
+        sorted_lst = sorted(lst)
+        mid = n // 2
+        if n % 2 == 0:
+            return (sorted_lst[mid - 1] + sorted_lst[mid]) / 2.0
+        else:
+            return sorted_lst[mid]
+    med_diff = median(differences)
+    split_indices = [i for i, diff in enumerate(differences) if diff > splitting_factor * med_diff]
+    clusters = []
+    if split_indices:
+        start = 0
+        for split_index in split_indices:
+            clusters.append(sorted_candidates[start:split_index+1])
+            start = split_index + 1
+        clusters.append(sorted_candidates[start:])
+    else:
+        clusters = [sorted_candidates]
+    valid_clusters = [cluster for cluster in clusters if len(cluster) >= 2]
+    if not valid_clusters:
+        return seq.copy()
+    def cluster_spread(cluster):
+        values = [val for (_, val) in cluster]
+        return max(values) - min(values)
+    valid_clusters.sort(key=lambda cluster: (len(cluster), -cluster_spread(cluster)), reverse=True)
+    selected_cluster = valid_clusters[0]
+    allowed_range_width = max_val - min_val
+    spread = cluster_spread(selected_cluster)
+    ratio = spread / allowed_range_width
+    if ratio > tightness_threshold:
+        return seq.copy()
+    cluster_values = [val for (_, val) in selected_cluster]
+    equal_value = sum(cluster_values) // len(cluster_values)
+    result = list(seq)
+    for idx, _ in selected_cluster:
+        result[idx] = equal_value
+    return result
+###################################################################################
+def chunk_list(lst, chunk_size):
+    return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
+###################################################################################
+def compress_tokens_sequence(seq,
+                             min_val=128,
+                             max_val=256,
+                             group_size=2,
+                             splitting_factor=1.5,
+                             tightness_threshold=0.15
+                            ):
+    comp_seq = equalize_closest_elements_dynamic(seq,
+                                                 min_val,
+                                                 max_val,
+                                                 splitting_factor=splitting_factor,
+                                                 tightness_threshold=tightness_threshold
+                                                 )
+    seq_split = sorted(chunk_list(comp_seq, group_size), key=lambda x: (-x[0], -x[1]))
+    seq_grouped = [[[k]] + [vv[1:] for vv in v] for k, v in groupby(seq_split, key=lambda x: x[0])]
+    return flatten(flatten(sorted(seq_grouped, key=lambda x: -x[1][0])))
+###################################################################################
+def merge_adjacent_pairs(values_counts):
+    merged = []
+    i = 0
+    while i < len(values_counts):
+        if i < len(values_counts) - 1:
+            value1, count1 = values_counts[i]
+            value2, count2 = values_counts[i + 1]
+            if value2 - value1 == 1:
+                if count2 > count1:
+                    merged_value = value2
+                else:
+                    merged_value = value1
+                merged_count = count1 + count2
+                merged.append((merged_value, merged_count))
+                i += 2
+                continue
+        merged.append(values_counts[i])
+        i += 1
+    return merged
+###################################################################################
+def merge_escore_notes_start_times(escore_notes, num_merges=1):
+    new_dscore = delta_score_notes(escore_notes)
+    times = [e[1] for e in new_dscore if e[1] != 0]
+    times_counts = sorted(Counter(times).most_common())
+    prev_counts = []
+    new_times_counts = times_counts
+    mcount = 0
+    while prev_counts != new_times_counts:
+        prev_counts = new_times_counts
+        new_times_counts = merge_adjacent_pairs(new_times_counts)
+        mcount += 1
+        if mcount == num_merges:
+            break
+    gtimes = [r[0] for r in new_times_counts]
+    for e in new_dscore:
+        if e[1] > 0:
+            e[1] = find_closest_value(gtimes, e[1])[0]
+            e[2] -= num_merges
+    return delta_score_to_abs_score(new_dscore)
+###################################################################################
+def multi_instrumental_escore_notes_tokenized(escore_notes, compress_seq=False):
+    melody_chords = []
+    pe = escore_notes[0]
+    for i, e in enumerate(escore_notes):
+        dtime = max(0, min(255, e[1]-pe[1]))
+        dur = max(0, min(255, e[2]))
+        cha = max(0, min(15, e[3]))
+        if cha == 9:
+          pat = 128
+        else:
+          pat = max(0, min(127, e[6]))
+        ptc = max(0, min(127, e[4]))
+        vel = max(8, min(127, e[5]))
+        velocity = round(vel / 15)-1
+        dur_vel = (8 * dur) + velocity
+        pat_ptc = (129 * pat) + ptc
+        if compress_seq:
+            if dtime != 0 or i == 0:
+                melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
+            else:
+                melody_chords.extend([dur_vel+256, pat_ptc+2304])
+        else:
+            melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
+        pe = e
+    return melody_chords
+###################################################################################
+def merge_counts(data, return_lists=True):
+    merged = defaultdict(int)
+    for value, count in data:
+        merged[value] += count
+    if return_lists:
+        return [[k, v] for k, v in merged.items()]
+    else:
+        return list(merged.items())
+###################################################################################
+def convert_escore_notes_pitches_chords_signature(signature, convert_to_full_chords=True):
+    if convert_to_full_chords:
+        SRC_CHORDS = ALL_CHORDS_SORTED
+        TRG_CHORDS = ALL_CHORDS_FULL
+    else:
+        SRC_CHORDS = ALL_CHORDS_FULL
+        TRG_CHORDS = ALL_CHORDS_SORTED
+    cdiff = len(TRG_CHORDS) - len(SRC_CHORDS)
+    pitches_counts = [c for c in signature if -1 < c[0] < 128]
+    chords_counts = [c for c in signature if 127 < c[0] < len(SRC_CHORDS)+128]
+    drums_counts = [[c[0]+cdiff, c[1]] for c in signature if len(SRC_CHORDS)+127 < c[0] < len(SRC_CHORDS)+256]
+    bad_chords_count = [c for c in signature if c[0] == -1]
+    new_chords_counts = []
+    for c in chords_counts:
+        tones_chord = SRC_CHORDS[c[0]-128]
+        if tones_chord not in TRG_CHORDS:
+            tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=convert_to_full_chords)
+            bad_chords_count[0][1] += 1
+        new_chords_counts.append([TRG_CHORDS.index(tones_chord)+128, c[1]])
+    return pitches_counts + merge_counts(new_chords_counts) + drums_counts + bad_chords_count
+###################################################################################
+def convert_bytes_in_nested_list(lst, encoding='utf-8', errors='ignore'):
+    new_list = []
+    for item in lst:
+        if isinstance(item, list):
+            new_list.append(convert_bytes_in_nested_list(item))
+        elif isinstance(item, bytes):
+            new_list.append(item.decode(encoding, errors=errors))
+        else:
+            new_list.append(item)
+    return new_list
+###################################################################################
+def mult_pitches(pitches, min_oct=4, max_oct=6):
+    tones_chord = sorted(set([p % 12 for p in pitches]))
+    mult_ptcs = []
+    for t in tones_chord:
+        for i in range(min_oct, max_oct):
+            mult_ptcs.append((i*12)+t)
+    return mult_ptcs
+###################################################################################
+def find_next(pitches, cur_ptc):
+    i = 0
+    for i, p in enumerate(pitches):
+        if p != cur_ptc:
+            break
+    return i
+###################################################################################
+def ordered_groups_unsorted(data, key_index):
+    def keyfunc(sublist):
+        return sublist[key_index]
+    groups = []
+    for key, group in groupby(data, key=keyfunc):
+        groups.append((key, list(group)))
+    return groups
+###################################################################################
+def ordered_groups(data, ptc_idx, pat_idx):
+    groups = OrderedDict()
+    for sublist in data:
+        key = tuple([sublist[ptc_idx], sublist[pat_idx]])
+        if key not in groups:
+            groups[key] = []
+        groups[key].append(sublist)
+    return list(groups.items())
+###################################################################################
+def merge_melody_notes(escore_notes, pitches_idx=4, max_dur=255, last_dur=128):
+    groups = ordered_groups_unsorted(escore_notes, pitches_idx)
+    merged_melody_notes = []
+    for i, (k, g) in enumerate(groups[:-1]):
+        if len(g) == 1:
+            merged_melody_notes.extend(g)
+        else:
+            dur = min(max_dur, groups[i+1][1][0][1] - g[0][1])
+            merged_melody_notes.append(['note',
+                                        g[0][1],
+                                        dur,
+                                        g[0][3],
+                                        g[0][4],
+                                        g[0][5],
+                                        g[0][6]
+                                       ])
+    merged_melody_notes.append(['note',
+                                groups[-1][1][0][1],
+                                last_dur,
+                                groups[-1][1][0][3],
+                                groups[-1][1][0][4],
+                                groups[-1][1][0][5],
+                                groups[-1][1][0][6]
+                               ])
+    return merged_melody_notes
+###################################################################################
+def add_expressive_melody_to_enhanced_score_notes(escore_notes,
+                                                  melody_start_chord=0,
+                                                  melody_prime_pitch=60,
+                                                  melody_step=1,
+                                                  melody_channel=3,
+                                                  melody_patch=40,
+                                                  melody_notes_max_duration=255,
+                                                  melody_last_note_dur=128,
+                                                  melody_clip_max_min_durs=[],
+                                                  melody_max_velocity=120,
+                                                  acc_max_velocity=90,
+                                                  return_melody=False
+                                                  ):
+    score = copy.deepcopy(escore_notes)
+    adjust_score_velocities(score, acc_max_velocity)
+    cscore = chordify_score([1000, score])
+    melody_pitches = [melody_prime_pitch]
+    for i, c in enumerate(cscore[melody_start_chord:]):
+        if i % melody_step == 0:
+            pitches = [e[4] for e in c if e[3] != 9]
+            if pitches:
+                cptc = find_closest_value(mult_pitches(pitches), melody_pitches[-1])[0]
+                melody_pitches.append(cptc)
+    song_f = []
+    mel_f = []
+    idx = 1
+    for i, c in enumerate(cscore[:-melody_step]):
+        pitches = [e[4] for e in c if e[3] != 9]
+        if pitches and i >= melody_start_chord and i % melody_step == 0:
+            dur = min(cscore[i+melody_step][0][1] - c[0][1], melody_notes_max_duration)
+            mel_f.append(['note',
+                          c[0][1],
+                          dur,
+                          melody_channel,
+                          60+(melody_pitches[idx] % 24),
+                          100 + ((melody_pitches[idx] % 12) * 2),
+                          melody_patch
+                         ])
+            idx += 1
+        song_f.extend(c)
+    song_f.extend(flatten(cscore[-melody_step:]))
+    if len(melody_clip_max_min_durs) == 2:
+        for e in mel_f:
+            if e[2] >= melody_clip_max_min_durs[0]:
+                e[2] = melody_clip_max_min_durs[1]
+    adjust_score_velocities(mel_f, melody_max_velocity)
+    merged_melody_notes = merge_melody_notes(mel_f,
+                                             max_dur=melody_notes_max_duration,
+                                             last_dur=melody_last_note_dur
+                                             )
+    song_f = sorted(merged_melody_notes + song_f,
+                    key=lambda x: x[1]
+                   )
+    if return_melody:
+        return mel_f
+    else:
+        return song_f
+###################################################################################
+def list_md5_hash(ints_list):
+    arr = array('H', ints_list)
+    binary_data = arr.tobytes()
+    return hashlib.md5(binary_data).hexdigest()
+###################################################################################
+def fix_escore_notes_durations(escore_notes,
+                               min_notes_gap=1,
+                               min_notes_dur=1,
+                               times_idx=1,
+                               durs_idx=2,
+                               channels_idx = 3,
+                               pitches_idx=4,
+                               patches_idx=6
+                              ):
+    notes = [e for e in escore_notes if e[channels_idx] != 9]
+    drums = [e for e in escore_notes if e[channels_idx] == 9]
+    escore_groups = ordered_groups(notes, pitches_idx, patches_idx)
+    merged_score = []
+    for k, g in escore_groups:
+        if len(g) > 2:
+            fg = fix_monophonic_score_durations(g,
+                                                min_notes_gap=min_notes_gap,
+                                                min_notes_dur=min_notes_dur
+                                               )
+            merged_score.extend(fg)
+        elif len(g) == 2:
+            if g[0][times_idx]+g[0][durs_idx] >= g[1][times_idx]:
+                g[0][durs_idx] = max(1, g[1][times_idx] - g[0][times_idx] - min_notes_gap)
+            merged_score.extend(g)
+        else:
+            merged_score.extend(g)
+    return sorted(merged_score + drums, key=lambda x: x[times_idx])
+###################################################################################
+def create_nested_chords_tree(chords_list):
+    tree = {}
+    for chord in chords_list:
+        node = tree
+        for semitone in chord:
+            if semitone not in node:
+                node[semitone] = {}
+            node = node[semitone]
+        node.setdefault(-1, []).append(chord)
+    return tree
+###################################################################################
+def get_chords_with_prefix(nested_chords_tree, prefix):
+    node = nested_chords_tree
+    for semitone in prefix:
+        if semitone in node:
+            node = node[semitone]
+        else:
+            return []
+    collected_chords = []
+    def recursive_collect(subnode):
+        if -1 in subnode:
+            collected_chords.extend(subnode[-1])
+        for key, child in subnode.items():
+            if key != -1:
+                recursive_collect(child)
+    recursive_collect(node)
+    return collected_chords
+###################################################################################
+def get_chords_by_semitones(chords_list, chord_semitones):
+    query_set = set(chord_semitones)
+    results = []
+    for chord in chords_list:
+        chord_set = set(chord)
+        if query_set.issubset(chord_set):
+            results.append(sorted(set(chord)))
+    return results
+###################################################################################
+def remove_duplicate_pitches_from_escore_notes(escore_notes,
+                                               pitches_idx=4,
+                                               patches_idx=6,
+                                               return_dupes_count=False
+                                              ):
+    cscore = chordify_score([1000, escore_notes])
+    new_escore = []
+    bp_count = 0
+    for c in cscore:
+        cho = []
+        seen = []
+        for cc in c:
+            if [cc[pitches_idx], cc[patches_idx]] not in seen:
+                cho.append(cc)
+                seen.append([cc[pitches_idx], cc[patches_idx]])
+            else:
+                bp_count += 1
+        new_escore.extend(cho)
+    if return_dupes_count:
+        return bp_count
+    else:
+        return new_escore
+###################################################################################
+def chunks_shuffle(lst,
+                   min_len=1,
+                   max_len=3,
+                   seed=None
+                   ):
+    rnd = random.Random(seed)
+    chunks = []
+    i, n = 0, len(lst)
+    while i < n:
+        size = rnd.randint(min_len, max_len)
+        size = min(size, n - i)
+        chunks.append(lst[i : i + size])
+        i += size
+    rnd.shuffle(chunks)
+    flattened = []
+    for chunk in chunks:
+        flattened.extend(chunk)
+    return flattened
+###################################################################################
+def convert_bytes_in_nested_list(lst,
+                                 encoding='utf-8',
+                                 errors='ignore',
+                                 return_changed_events_count=False
+                                ):
+    new_list = []
+    ce_count = 0
+    for item in lst:
+        if isinstance(item, list):
+            new_list.append(convert_bytes_in_nested_list(item))
+        elif isinstance(item, bytes):
+            new_list.append(item.decode(encoding, errors=errors))
+            ce_count += 1
+        else:
+            new_list.append(item)
+    if return_changed_events_count:
+        return new_list, ce_count
+    else:
+        return new_list
+###################################################################################
+def find_deepest_midi_dirs(roots,
+                           marker_file="midi_score.mid",
+                           suffixes=None,
+                           randomize=False,
+                           seed=None,
+                           verbose=False
+                          ):
+    try:
+        iter(roots)
+        if isinstance(roots, (str, Path)):
+            root_list = [roots]
+        else:
+            root_list = list(roots)
+    except TypeError:
+        root_list = [roots]
+    if isinstance(marker_file, (list, tuple)):
+        patterns = [p.lower() for p in marker_file if p]
+    else:
+        patterns = [marker_file.lower()] if marker_file else []
+    allowed = {s.lower() for s in (suffixes or ['.mid', '.midi', '.kar'])}
+    if verbose:
+        print("Settings:")
+        print("  Roots:", [str(r) for r in root_list])
+        print("  Marker patterns:", patterns or "<no marker filter>")
+        print("  Allowed suffixes:", allowed)
+        print(f"  Randomize={randomize}, Seed={seed}")
+    results = defaultdict(list)
+    rng = random.Random(seed)
+    for root in root_list:
+        root_path = Path(root)
+        if not root_path.is_dir():
+            print(f"Warning: '{root_path}' is not a valid directory, skipping.")
+            continue
+        if verbose:
+            print(f"\nScanning root: {str(root_path)}")
+        all_dirs = list(root_path.rglob("*"))
+        dirs_iter = tqdm.tqdm(all_dirs, desc=f"Dirs in {root_path.name}", disable=not verbose)
+        for dirpath in dirs_iter:
+            if not dirpath.is_dir():
+                continue
+            children = list(dirpath.iterdir())
+            if any(child.is_dir() for child in children):
+                if verbose:
+                    print(f"Skipping non-leaf: {str(dirpath)}")
+                continue
+            files = [f for f in children if f.is_file()]
+            names = [f.name.lower() for f in files]
+            if patterns:
+                matched = any(fnmatch(name, pat) for name in names for pat in patterns)
+                if not matched:
+                    if verbose:
+                        print(f"No marker in: {str(dirpath)}")
+                    continue
+                if verbose:
+                    print(f"Marker found in: {str(dirpath)}")
+            else:
+                if verbose:
+                    print(f"Including leaf (no marker): {str(dirpath)}")
+            for f in files:
+                if f.suffix.lower() in allowed:
+                    results[str(dirpath)].append(str(f))
+                    if verbose:
+                        print(f"  Collected: {f.name}")
+    all_leaves = list(results.keys())
+    if randomize:
+        if verbose:
+            print("\nShuffling leaf directories")
+        rng.shuffle(all_leaves)
+    else:
+        all_leaves.sort()
+    final_dict = {}
+    for leaf in all_leaves:
+        file_list = results[leaf][:]
+        if randomize:
+            if verbose:
+                print(f"Shuffling files in: {leaf}")
+            rng.shuffle(file_list)
+        else:
+            file_list.sort()
+        final_dict[leaf] = file_list
+    if verbose:
+        print("\nScan complete. Found directories:")
+        for d, fl in final_dict.items():
+            print(f"  {d} -> {len(fl)} files")
+    return final_dict
+###################################################################################
+print('Module loaded!')
+print('=' * 70)
+print('Enjoy! :)')
+print('=' * 70)
+###################################################################################
+# This is the end of the TMIDI X Python module
 ###################################################################################