projectlosangeles commited on
Commit
306d753
·
verified ·
1 Parent(s): a558736

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +2295 -56
TMIDIX.py CHANGED
@@ -1,14 +1,12 @@
1
  #! /usr/bin/python3
2
 
3
-
4
  r'''###############################################################################
5
  ###################################################################################
6
  #
7
  #
8
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
9
- # Version 1.0
10
  #
11
- # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1342
12
  #
13
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
14
  #
@@ -21,19 +19,19 @@ r'''############################################################################
21
  #
22
  ###################################################################################
23
  ###################################################################################
24
- # Copyright 2025 Project Los Angeles / Tegridy Code
25
  #
26
- # Licensed under the Apache License, Version 2.0 (the "License");
27
- # you may not use this file except in compliance with the License.
28
- # You may obtain a copy of the License at
29
  #
30
- # http://www.apache.org/licenses/LICENSE-2.0
31
  #
32
- # Unless required by applicable law or agreed to in writing, software
33
- # distributed under the License is distributed on an "AS IS" BASIS,
34
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
- # See the License for the specific language governing permissions and
36
- # limitations under the License.
37
  ###################################################################################
38
  ###################################################################################
39
  #
@@ -48,9 +46,23 @@ r'''############################################################################
48
  # Copyright 2020 Peter Billam
49
  #
50
  ###################################################################################
51
- ###################################################################################'''
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  import sys, struct, copy
 
54
  Version = '6.7'
55
  VersionDate = '20201120'
56
 
@@ -1440,14 +1452,14 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
1440
  ###################################################################################
1441
  #
1442
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
1443
- # Version 1.0
1444
  #
1445
  # Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
1446
  # pjb.com.au
1447
  #
1448
  # Project Los Angeles
1449
- # Tegridy Code 2021
1450
- # https://github.com/Tegridy-Code/Project-Los-Angeles
 
1451
  #
1452
  ###################################################################################
1453
  ###################################################################################
@@ -1457,8 +1469,6 @@ import os
1457
 
1458
  import datetime
1459
 
1460
- import copy
1461
-
1462
  from datetime import datetime
1463
 
1464
  import secrets
@@ -1475,12 +1485,13 @@ import multiprocessing
1475
 
1476
  from itertools import zip_longest
1477
  from itertools import groupby
 
1478
  from collections import Counter
 
 
1479
 
1480
  from operator import itemgetter
1481
 
1482
- import sys
1483
-
1484
  from abc import ABC, abstractmethod
1485
 
1486
  from difflib import SequenceMatcher as SM
@@ -1490,6 +1501,21 @@ import math
1490
 
1491
  import matplotlib.pyplot as plt
1492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1493
  ###################################################################################
1494
  #
1495
  # Original TMIDI Tegridy helper functions
@@ -3842,7 +3868,10 @@ def chordify_score(score,
3842
  else:
3843
  return None
3844
 
3845
- def fix_monophonic_score_durations(monophonic_score):
 
 
 
3846
 
3847
  fixed_score = []
3848
 
@@ -3854,15 +3883,17 @@ def fix_monophonic_score_durations(monophonic_score):
3854
  nmt = monophonic_score[i+1][1]
3855
 
3856
  if note[1]+note[2] >= nmt:
3857
- note_dur = nmt-note[1]-1
3858
  else:
3859
  note_dur = note[2]
3860
 
3861
  new_note = [note[0], note[1], note_dur] + note[3:]
3862
-
3863
- fixed_score.append(new_note)
3864
-
3865
- fixed_score.append(monophonic_score[-1])
 
 
3866
 
3867
  elif type(monophonic_score[0][0]) == int:
3868
 
@@ -3872,15 +3903,17 @@ def fix_monophonic_score_durations(monophonic_score):
3872
  nmt = monophonic_score[i+1][0]
3873
 
3874
  if note[0]+note[1] >= nmt:
3875
- note_dur = nmt-note[0]-1
3876
  else:
3877
  note_dur = note[1]
3878
-
3879
  new_note = [note[0], note_dur] + note[2:]
3880
-
3881
- fixed_score.append(new_note)
3882
-
3883
- fixed_score.append(monophonic_score[-1])
 
 
3884
 
3885
  return fixed_score
3886
 
@@ -4142,15 +4175,16 @@ def tones_chord_to_pitches(tones_chord, base_pitch=60):
4142
  ###################################################################################
4143
 
4144
  def advanced_score_processor(raw_score,
4145
- patches_to_analyze=list(range(129)),
4146
- return_score_analysis=False,
4147
- return_enhanced_score=False,
4148
- return_enhanced_score_notes=False,
4149
- return_enhanced_monophonic_melody=False,
4150
- return_chordified_enhanced_score=False,
4151
- return_chordified_enhanced_score_with_lyrics=False,
4152
- return_score_tones_chords=False,
4153
- return_text_and_lyric_events=False
 
4154
  ):
4155
 
4156
  '''TMIDIX Advanced Score Processor'''
@@ -4179,6 +4213,20 @@ def advanced_score_processor(raw_score,
4179
  basic_single_track_score.append(ev)
4180
  num_tracks += 1
4181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4182
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4183
  basic_single_track_score.sort(key=lambda x: x[1])
4184
 
@@ -4193,7 +4241,7 @@ def advanced_score_processor(raw_score,
4193
  enhanced_single_track_score.append(event)
4194
  num_patch_changes += 1
4195
 
4196
- if event[0] == 'note':
4197
  if event[3] != 9:
4198
  event.extend([patches[event[3]]])
4199
  all_score_patches.extend([patches[event[3]]])
@@ -4693,7 +4741,8 @@ def augment_enhanced_score_notes(enhanced_score_notes,
4693
  ceil_timings=False,
4694
  round_timings=False,
4695
  legacy_timings=True,
4696
- sort_drums_last=False
 
4697
  ):
4698
 
4699
  esn = copy.deepcopy(enhanced_score_notes)
@@ -4736,6 +4785,16 @@ def augment_enhanced_score_notes(enhanced_score_notes,
4736
  e[4] = max(1, min(127, e[4] + pitch_shift))
4737
 
4738
  pe = enhanced_score_notes[i]
 
 
 
 
 
 
 
 
 
 
4739
 
4740
  if full_sorting:
4741
 
@@ -6676,12 +6735,23 @@ def find_next_bar(escore_notes, bar_time, start_note_idx, cur_bar):
6676
  def align_escore_notes_to_bars(escore_notes,
6677
  bar_time=4000,
6678
  trim_durations=False,
6679
- split_durations=False
 
6680
  ):
6681
 
6682
  #=============================================================================
 
 
 
 
 
 
 
 
 
 
6683
 
6684
- aligned_escore_notes = copy.deepcopy(escore_notes)
6685
 
6686
  abs_time = 0
6687
  nidx = 0
@@ -6693,13 +6763,13 @@ def align_escore_notes_to_bars(escore_notes,
6693
 
6694
  while next_bar:
6695
 
6696
- next_bar = find_next_bar(escore_notes, bar_time, nidx, bcount)
6697
 
6698
  if next_bar:
6699
-
6700
- gescore_notes = escore_notes[nidx:next_bar[1]]
6701
  else:
6702
- gescore_notes = escore_notes[nidx:]
6703
 
6704
  original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
6705
  adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
@@ -6714,7 +6784,8 @@ def align_escore_notes_to_bars(escore_notes,
6714
  nidx += 1
6715
 
6716
  if next_bar:
6717
- delta = escore_notes[next_bar[1]][1]-escore_notes[next_bar[1]-1][1]
 
6718
  bcount += 1
6719
 
6720
  #=============================================================================
@@ -11125,13 +11196,17 @@ def escore_notes_core(escore_notes, core_len=128):
11125
 
11126
  ###################################################################################
11127
 
11128
- def multiprocessing_wrapper(function, data_list):
11129
 
11130
  with multiprocessing.Pool() as pool:
11131
 
11132
  results = []
11133
 
11134
- for result in tqdm.tqdm(pool.imap_unordered(function, data_list), total=len(data_list)):
 
 
 
 
11135
  results.append(result)
11136
 
11137
  return results
@@ -11182,7 +11257,2171 @@ def rle_decode_ones(encoding, size=(128, 128)):
11182
  return matrix
11183
 
11184
  ###################################################################################
11185
- #
11186
- # This is the end of the TMIDI X Python module
11187
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11188
  ###################################################################################
 
1
  #! /usr/bin/python3
2
 
 
3
  r'''###############################################################################
4
  ###################################################################################
5
  #
6
  #
7
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 
8
  #
9
+ # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1450
10
  #
11
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
12
  #
 
19
  #
20
  ###################################################################################
21
  ###################################################################################
22
+ # Copyright 2025 Project Los Angeles / Tegridy Code
23
  #
24
+ # Licensed under the Apache License, Version 2.0 (the "License");
25
+ # you may not use this file except in compliance with the License.
26
+ # You may obtain a copy of the License at
27
  #
28
+ # http://www.apache.org/licenses/LICENSE-2.0
29
  #
30
+ # Unless required by applicable law or agreed to in writing, software
31
+ # distributed under the License is distributed on an "AS IS" BASIS,
32
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33
+ # See the License for the specific language governing permissions and
34
+ # limitations under the License.
35
  ###################################################################################
36
  ###################################################################################
37
  #
 
46
  # Copyright 2020 Peter Billam
47
  #
48
  ###################################################################################
49
+ ###################################################################################
50
+ '''
51
+
52
+ ###################################################################################
53
+
54
+ __version__ = "25.7.5"
55
+
56
+ print('=' * 70)
57
+ print('TMIDIX Python module')
58
+ print('Version:', __version__)
59
+ print('=' * 70)
60
+ print('Loading module...')
61
+
62
+ ###################################################################################
63
 
64
  import sys, struct, copy
65
+
66
  Version = '6.7'
67
  VersionDate = '20201120'
68
 
 
1452
  ###################################################################################
1453
  #
1454
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 
1455
  #
1456
  # Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
1457
  # pjb.com.au
1458
  #
1459
  # Project Los Angeles
1460
+ # Tegridy Code 2025
1461
+ #
1462
+ # https://github.com/Tegridy-Code/Project-Los-Angeles
1463
  #
1464
  ###################################################################################
1465
  ###################################################################################
 
1469
 
1470
  import datetime
1471
 
 
 
1472
  from datetime import datetime
1473
 
1474
  import secrets
 
1485
 
1486
  from itertools import zip_longest
1487
  from itertools import groupby
1488
+
1489
  from collections import Counter
1490
+ from collections import defaultdict
1491
+ from collections import OrderedDict
1492
 
1493
  from operator import itemgetter
1494
 
 
 
1495
  from abc import ABC, abstractmethod
1496
 
1497
  from difflib import SequenceMatcher as SM
 
1501
 
1502
  import matplotlib.pyplot as plt
1503
 
1504
+ import psutil
1505
+
1506
+ import json
1507
+
1508
+ from pathlib import Path
1509
+
1510
+ import shutil
1511
+
1512
+ import hashlib
1513
+
1514
+ from array import array
1515
+
1516
+ from pathlib import Path
1517
+ from fnmatch import fnmatch
1518
+
1519
  ###################################################################################
1520
  #
1521
  # Original TMIDI Tegridy helper functions
 
3868
  else:
3869
  return None
3870
 
3871
+ def fix_monophonic_score_durations(monophonic_score,
3872
+ min_notes_gap=1,
3873
+ min_notes_dur=1
3874
+ ):
3875
 
3876
  fixed_score = []
3877
 
 
3883
  nmt = monophonic_score[i+1][1]
3884
 
3885
  if note[1]+note[2] >= nmt:
3886
+ note_dur = max(1, nmt-note[1]-min_notes_gap)
3887
  else:
3888
  note_dur = note[2]
3889
 
3890
  new_note = [note[0], note[1], note_dur] + note[3:]
3891
+
3892
+ if new_note[2] >= min_notes_dur:
3893
+ fixed_score.append(new_note)
3894
+
3895
+ if monophonic_score[-1][2] >= min_notes_dur:
3896
+ fixed_score.append(monophonic_score[-1])
3897
 
3898
  elif type(monophonic_score[0][0]) == int:
3899
 
 
3903
  nmt = monophonic_score[i+1][0]
3904
 
3905
  if note[0]+note[1] >= nmt:
3906
+ note_dur = max(1, nmt-note[0]-min_notes_gap)
3907
  else:
3908
  note_dur = note[1]
3909
+
3910
  new_note = [note[0], note_dur] + note[2:]
3911
+
3912
+ if new_note[1] >= min_notes_dur:
3913
+ fixed_score.append(new_note)
3914
+
3915
+ if monophonic_score[-1][1] >= min_notes_dur:
3916
+ fixed_score.append(monophonic_score[-1])
3917
 
3918
  return fixed_score
3919
 
 
4175
  ###################################################################################
4176
 
4177
  def advanced_score_processor(raw_score,
4178
+ patches_to_analyze=list(range(129)),
4179
+ return_score_analysis=False,
4180
+ return_enhanced_score=False,
4181
+ return_enhanced_score_notes=False,
4182
+ return_enhanced_monophonic_melody=False,
4183
+ return_chordified_enhanced_score=False,
4184
+ return_chordified_enhanced_score_with_lyrics=False,
4185
+ return_score_tones_chords=False,
4186
+ return_text_and_lyric_events=False,
4187
+ apply_sustain=False
4188
  ):
4189
 
4190
  '''TMIDIX Advanced Score Processor'''
 
4213
  basic_single_track_score.append(ev)
4214
  num_tracks += 1
4215
 
4216
+ for e in basic_single_track_score:
4217
+
4218
+ if e[0] == 'note':
4219
+ e[3] = e[3] % 16
4220
+ e[4] = e[4] % 128
4221
+ e[5] = e[5] % 128
4222
+
4223
+ if e[0] == 'patch_change':
4224
+ e[2] = e[2] % 16
4225
+ e[3] = e[3] % 128
4226
+
4227
+ if apply_sustain:
4228
+ apply_sustain_to_ms_score([1000, basic_single_track_score])
4229
+
4230
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4231
  basic_single_track_score.sort(key=lambda x: x[1])
4232
 
 
4241
  enhanced_single_track_score.append(event)
4242
  num_patch_changes += 1
4243
 
4244
+ if event[0] == 'note':
4245
  if event[3] != 9:
4246
  event.extend([patches[event[3]]])
4247
  all_score_patches.extend([patches[event[3]]])
 
4741
  ceil_timings=False,
4742
  round_timings=False,
4743
  legacy_timings=True,
4744
+ sort_drums_last=False,
4745
+ even_timings=False
4746
  ):
4747
 
4748
  esn = copy.deepcopy(enhanced_score_notes)
 
4785
  e[4] = max(1, min(127, e[4] + pitch_shift))
4786
 
4787
  pe = enhanced_score_notes[i]
4788
+
4789
+
4790
+ if even_timings:
4791
+
4792
+ for e in esn:
4793
+ if e[1] % 2 != 0:
4794
+ e[1] += 1
4795
+
4796
+ if e[2] % 2 != 0:
4797
+ e[2] += 1
4798
 
4799
  if full_sorting:
4800
 
 
6735
  def align_escore_notes_to_bars(escore_notes,
6736
  bar_time=4000,
6737
  trim_durations=False,
6738
+ split_durations=False,
6739
+ even_timings=False
6740
  ):
6741
 
6742
  #=============================================================================
6743
+
6744
+ escore = copy.deepcopy(escore_notes)
6745
+
6746
+ if even_timings:
6747
+ for e in escore:
6748
+ if e[1] % 2 != 0:
6749
+ e[1] += 1
6750
+
6751
+ if e[2] % 2 != 0:
6752
+ e[2] += 1
6753
 
6754
+ aligned_escore_notes = copy.deepcopy(escore)
6755
 
6756
  abs_time = 0
6757
  nidx = 0
 
6763
 
6764
  while next_bar:
6765
 
6766
+ next_bar = find_next_bar(escore, bar_time, nidx, bcount)
6767
 
6768
  if next_bar:
6769
+ gescore_notes = escore[nidx:next_bar[1]]
6770
+
6771
  else:
6772
+ gescore_notes = escore[nidx:]
6773
 
6774
  original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
6775
  adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
 
6784
  nidx += 1
6785
 
6786
  if next_bar:
6787
+ delta = escore[next_bar[1]][1]-escore[next_bar[1]-1][1]
6788
+
6789
  bcount += 1
6790
 
6791
  #=============================================================================
 
11196
 
11197
  ###################################################################################
11198
 
11199
+ def multiprocessing_wrapper(function, data_list, verbose=True):
11200
 
11201
  with multiprocessing.Pool() as pool:
11202
 
11203
  results = []
11204
 
11205
+ for result in tqdm.tqdm(pool.imap_unordered(function, data_list),
11206
+ total=len(data_list),
11207
+ disable=not verbose
11208
+ ):
11209
+
11210
  results.append(result)
11211
 
11212
  return results
 
11257
  return matrix
11258
 
11259
  ###################################################################################
11260
+
11261
+ def vertical_list_search(list_of_lists, trg_list):
11262
+
11263
+ src_list = list_of_lists
11264
+
11265
+ if not src_list or not trg_list:
11266
+ return []
11267
+
11268
+ num_rows = len(src_list)
11269
+ k = len(trg_list)
11270
+
11271
+ row_sets = [set(row) for row in src_list]
11272
+
11273
+ results = []
11274
+
11275
+ for start in range(num_rows - k + 1):
11276
+ valid = True
11277
+
11278
+ for offset, target in enumerate(trg_list):
11279
+
11280
+ if target not in row_sets[start + offset]:
11281
+ valid = False
11282
+ break
11283
+
11284
+ if valid:
11285
+ results.append(list(range(start, start + k)))
11286
+
11287
+ return results
11288
+
11289
+ ###################################################################################
11290
+
11291
+ def smooth_values(values, window_size=3):
11292
+
11293
+ smoothed = []
11294
+
11295
+ for i in range(len(values)):
11296
+
11297
+ start = max(0, i - window_size // 2)
11298
+ end = min(len(values), i + window_size // 2 + 1)
11299
+
11300
+ window = values[start:end]
11301
+
11302
+ smoothed.append(int(sum(window) / len(window)))
11303
+
11304
+ return smoothed
11305
+
11306
+ ###################################################################################
11307
+
11308
+ def is_mostly_wide_peaks_and_valleys(values,
11309
+ min_range=32,
11310
+ threshold=0.7,
11311
+ smoothing_window=5
11312
+ ):
11313
+
11314
+ if not values:
11315
+ return False
11316
+
11317
+ smoothed_values = smooth_values(values, smoothing_window)
11318
+
11319
+ value_range = max(smoothed_values) - min(smoothed_values)
11320
+
11321
+ if value_range < min_range:
11322
+ return False
11323
+
11324
+ if all(v == smoothed_values[0] for v in smoothed_values):
11325
+ return False
11326
+
11327
+ trend_types = []
11328
+
11329
+ for i in range(1, len(smoothed_values)):
11330
+ if smoothed_values[i] > smoothed_values[i - 1]:
11331
+ trend_types.append(1)
11332
+
11333
+ elif smoothed_values[i] < smoothed_values[i - 1]:
11334
+ trend_types.append(-1)
11335
+
11336
+ else:
11337
+ trend_types.append(0)
11338
+
11339
+ trend_count = trend_types.count(1) + trend_types.count(-1)
11340
+
11341
+ proportion = trend_count / len(trend_types)
11342
+
11343
+ return proportion >= threshold
11344
+
11345
+ ###################################################################################
11346
+
11347
+ def system_memory_utilization(return_dict=False):
11348
+
11349
+ if return_dict:
11350
+ return dict(psutil.virtual_memory()._asdict())
11351
+
11352
+ else:
11353
+ print('RAM memory % used:', psutil.virtual_memory()[2])
11354
+ print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
11355
+
11356
+ ###################################################################################
11357
+
11358
+ def system_cpus_utilization(return_dict=False):
11359
+
11360
+ if return_dict:
11361
+ return {'num_cpus': psutil.cpu_count(),
11362
+ 'cpus_util': psutil.cpu_percent()
11363
+ }
11364
+
11365
+ else:
11366
+ print('Number of CPUs:', psutil.cpu_count())
11367
+ print('CPUs utilization:', psutil.cpu_percent())
11368
+
11369
+ ###################################################################################
11370
+
11371
+ def create_files_list(datasets_paths=['./'],
11372
+ files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'],
11373
+ max_num_files_per_dir=-1,
11374
+ randomize_dir_files=False,
11375
+ max_total_files=-1,
11376
+ randomize_files_list=True,
11377
+ check_for_dupes=False,
11378
+ use_md5_hashes=False,
11379
+ return_dupes=False,
11380
+ verbose=True
11381
+ ):
11382
+
11383
+ if verbose:
11384
+ print('=' * 70)
11385
+ print('Searching for files...')
11386
+ print('This may take a while on a large dataset in particular...')
11387
+ print('=' * 70)
11388
+
11389
+ files_exts = tuple(files_exts)
11390
+
11391
+ filez_set = defaultdict(None)
11392
+ dupes_list = []
11393
+
11394
+ for dataset_addr in datasets_paths:
11395
+
11396
+ print('=' * 70)
11397
+ print('Processing', dataset_addr)
11398
+ print('=' * 70)
11399
+
11400
+ for dirpath, dirnames, filenames in tqdm.tqdm(os.walk(dataset_addr), disable=not verbose):
11401
+
11402
+ if randomize_dir_files:
11403
+ random.shuffle(filenames)
11404
+
11405
+ if max_num_files_per_dir > 0:
11406
+ max_num_files = max_num_files_per_dir
11407
+
11408
+ else:
11409
+ max_num_files = len(filenames)
11410
+
11411
+ for file in filenames[:max_num_files]:
11412
+ if file.endswith(files_exts):
11413
+ if check_for_dupes:
11414
+
11415
+ if use_md5_hashes:
11416
+ md5_hash = hashlib.md5(open(os.path.join(dirpath, file), 'rb').read()).hexdigest()
11417
+
11418
+ if md5_hash not in filez_set:
11419
+ filez_set[md5_hash] = os.path.join(dirpath, file)
11420
+
11421
+ else:
11422
+ dupes_list.append(os.path.join(dirpath, file))
11423
+
11424
+ else:
11425
+ if file not in filez_set:
11426
+ filez_set[file] = os.path.join(dirpath, file)
11427
+
11428
+ else:
11429
+ dupes_list.append(os.path.join(dirpath, file))
11430
+ else:
11431
+ fpath = os.path.join(dirpath, file)
11432
+ filez_set[fpath] = fpath
11433
+
11434
+ filez = list(filez_set.values())
11435
+
11436
+ if verbose:
11437
+ print('Done!')
11438
+ print('=' * 70)
11439
+
11440
+ if filez:
11441
+ if randomize_files_list:
11442
+
11443
+ if verbose:
11444
+ print('Randomizing file list...')
11445
+
11446
+ random.shuffle(filez)
11447
+
11448
+ if verbose:
11449
+ print('Done!')
11450
+ print('=' * 70)
11451
+
11452
+ if verbose:
11453
+ print('Found', len(filez), 'files.')
11454
+ print('Skipped', len(dupes_list), 'duplicate files.')
11455
+ print('=' * 70)
11456
+
11457
+ else:
11458
+ if verbose:
11459
+ print('Could not find any files...')
11460
+ print('Please check dataset dirs and files extensions...')
11461
+ print('=' * 70)
11462
+
11463
+ if max_total_files > 0:
11464
+ if return_dupes:
11465
+ return filez[:max_total_files], dupes_list
11466
+
11467
+ else:
11468
+ return filez[:max_total_files]
11469
+
11470
+ else:
11471
+ if return_dupes:
11472
+ return filez, dupes_list
11473
+
11474
+ else:
11475
+ return filez
11476
+
11477
+ ###################################################################################
11478
+
11479
+ def has_consecutive_trend(nums, count):
11480
+
11481
+ if len(nums) < count:
11482
+ return False
11483
+
11484
+ increasing_streak = 1
11485
+ decreasing_streak = 1
11486
+
11487
+ for i in range(1, len(nums)):
11488
+ if nums[i] > nums[i - 1]:
11489
+ increasing_streak += 1
11490
+ decreasing_streak = 1
11491
+
11492
+ elif nums[i] < nums[i - 1]:
11493
+ decreasing_streak += 1
11494
+ increasing_streak = 1
11495
+
11496
+ else:
11497
+ increasing_streak = decreasing_streak = 1
11498
+
11499
+ if increasing_streak == count or decreasing_streak == count:
11500
+ return True
11501
+
11502
+ return False
11503
+
11504
+ ###################################################################################
11505
+
11506
+ def escore_notes_primary_features(escore_notes):
11507
+
11508
+ #=================================================================
11509
+
11510
+ def mean(values):
11511
+ return sum(values) / len(values) if values else None
11512
+
11513
+ def std(values):
11514
+ if not values:
11515
+ return None
11516
+ m = mean(values)
11517
+ return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
11518
+
11519
+ def skew(values):
11520
+ if not values:
11521
+ return None
11522
+ m = mean(values)
11523
+ s = std(values)
11524
+ if s is None or s == 0:
11525
+ return None
11526
+ return sum(((x - m) / s) ** 3 for x in values) / len(values)
11527
+
11528
+ def kurtosis(values):
11529
+ if not values:
11530
+ return None
11531
+ m = mean(values)
11532
+ s = std(values)
11533
+ if s is None or s == 0:
11534
+ return None
11535
+ return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
11536
+
11537
+ def median(values):
11538
+ if not values:
11539
+ return None
11540
+ srt = sorted(values)
11541
+ n = len(srt)
11542
+ mid = n // 2
11543
+ if n % 2 == 0:
11544
+ return (srt[mid - 1] + srt[mid]) / 2.0
11545
+ return srt[mid]
11546
+
11547
+ def percentile(values, p):
11548
+ if not values:
11549
+ return None
11550
+ srt = sorted(values)
11551
+ n = len(srt)
11552
+ k = (n - 1) * p / 100.0
11553
+ f = int(k)
11554
+ c = k - f
11555
+ if f + 1 < n:
11556
+ return srt[f] * (1 - c) + srt[f + 1] * c
11557
+ return srt[f]
11558
+
11559
+ def diff(values):
11560
+ if not values or len(values) < 2:
11561
+ return []
11562
+ return [values[i + 1] - values[i] for i in range(len(values) - 1)]
11563
+
11564
+ def mad(values):
11565
+ if not values:
11566
+ return None
11567
+ m = median(values)
11568
+ return median([abs(x - m) for x in values])
11569
+
11570
+ def entropy(values):
11571
+ if not values:
11572
+ return None
11573
+ freq = {}
11574
+ for v in values:
11575
+ freq[v] = freq.get(v, 0) + 1
11576
+ total = len(values)
11577
+ ent = 0.0
11578
+ for count in freq.values():
11579
+ p_val = count / total
11580
+ ent -= p_val * math.log2(p_val)
11581
+ return ent
11582
+
11583
+ def mode(values):
11584
+ if not values:
11585
+ return None
11586
+ freq = {}
11587
+ for v in values:
11588
+ freq[v] = freq.get(v, 0) + 1
11589
+ max_count = max(freq.values())
11590
+ modes = [k for k, count in freq.items() if count == max_count]
11591
+ return min(modes)
11592
+
11593
+
11594
+ #=================================================================
11595
+
11596
+ sp_score = solo_piano_escore_notes(escore_notes)
11597
+
11598
+ dscore = delta_score_notes(sp_score)
11599
+
11600
+ seq = []
11601
+
11602
+ for d in dscore:
11603
+ seq.extend([d[1], d[2], d[4]])
11604
+
11605
+ #=================================================================
11606
+
11607
+ n = len(seq)
11608
+ if n % 3 != 0:
11609
+ seq = seq[: n - (n % 3)]
11610
+ arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
11611
+
11612
+ #=================================================================
11613
+
11614
+ features = {}
11615
+
11616
+ delta_times = [row[0] for row in arr]
11617
+ if delta_times:
11618
+ features['delta_times_mean'] = mean(delta_times)
11619
+ features['delta_times_std'] = std(delta_times)
11620
+ features['delta_times_min'] = min(delta_times)
11621
+ features['delta_times_max'] = max(delta_times)
11622
+ features['delta_times_skew'] = skew(delta_times)
11623
+ features['delta_times_kurtosis'] = kurtosis(delta_times)
11624
+ delta_zero_count = sum(1 for x in delta_times if x == 0)
11625
+ features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
11626
+ nonzero_dt = [x for x in delta_times if x != 0]
11627
+ if nonzero_dt:
11628
+ features['delta_times_nonzero_mean'] = mean(nonzero_dt)
11629
+ features['delta_times_nonzero_std'] = std(nonzero_dt)
11630
+ else:
11631
+ features['delta_times_nonzero_mean'] = None
11632
+ features['delta_times_nonzero_std'] = None
11633
+ features['delta_times_mad'] = mad(delta_times)
11634
+ features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
11635
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11636
+ features['delta_times_entropy'] = entropy(delta_times)
11637
+ features['delta_times_range'] = max(delta_times) - min(delta_times)
11638
+ features['delta_times_median'] = median(delta_times)
11639
+ features['delta_times_quantile_25'] = percentile(delta_times, 25)
11640
+ features['delta_times_quantile_75'] = percentile(delta_times, 75)
11641
+ if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
11642
+ features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
11643
+ else:
11644
+ features['delta_times_iqr'] = None
11645
+ else:
11646
+ for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
11647
+ 'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
11648
+ 'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
11649
+ 'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
11650
+ 'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
11651
+ features[key] = None
11652
+
11653
+ #=================================================================
11654
+
11655
+ durations = [row[1] for row in arr]
11656
+ if durations:
11657
+ features['durations_mean'] = mean(durations)
11658
+ features['durations_std'] = std(durations)
11659
+ features['durations_min'] = min(durations)
11660
+ features['durations_max'] = max(durations)
11661
+ features['durations_skew'] = skew(durations)
11662
+ features['durations_kurtosis'] = kurtosis(durations)
11663
+ features['durations_mad'] = mad(durations)
11664
+ features['durations_cv'] = (features['durations_std'] / features['durations_mean']
11665
+ if features['durations_mean'] and features['durations_mean'] != 0 else None)
11666
+ features['durations_entropy'] = entropy(durations)
11667
+ features['durations_range'] = max(durations) - min(durations)
11668
+ features['durations_median'] = median(durations)
11669
+ features['durations_quantile_25'] = percentile(durations, 25)
11670
+ features['durations_quantile_75'] = percentile(durations, 75)
11671
+ if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
11672
+ features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
11673
+ else:
11674
+ features['durations_iqr'] = None
11675
+ else:
11676
+ for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
11677
+ 'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
11678
+ 'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
11679
+ 'durations_quantile_75', 'durations_iqr']:
11680
+ features[key] = None
11681
+
11682
+ #=================================================================
11683
+
11684
+ pitches = [row[2] for row in arr]
11685
+ if pitches:
11686
+ features['pitches_mean'] = mean(pitches)
11687
+ features['pitches_std'] = std(pitches)
11688
+ features['pitches_min'] = min(pitches)
11689
+ features['pitches_max'] = max(pitches)
11690
+ features['pitches_skew'] = skew(pitches)
11691
+ features['pitches_kurtosis'] = kurtosis(pitches)
11692
+ features['pitches_range'] = max(pitches) - min(pitches)
11693
+ features['pitches_median'] = median(pitches)
11694
+ features['pitches_quantile_25'] = percentile(pitches, 25)
11695
+ features['pitches_quantile_75'] = percentile(pitches, 75)
11696
+ if len(pitches) > 1:
11697
+ dps = diff(pitches)
11698
+ features['pitches_diff_mean'] = mean(dps)
11699
+ features['pitches_diff_std'] = std(dps)
11700
+ else:
11701
+ features['pitches_diff_mean'] = None
11702
+ features['pitches_diff_std'] = None
11703
+ features['pitches_mad'] = mad(pitches)
11704
+ if len(pitches) > 2:
11705
+ peaks = sum(1 for i in range(1, len(pitches)-1)
11706
+ if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
11707
+ valleys = sum(1 for i in range(1, len(pitches)-1)
11708
+ if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
11709
+ else:
11710
+ peaks, valleys = None, None
11711
+ features['pitches_peak_count'] = peaks
11712
+ features['pitches_valley_count'] = valleys
11713
+ if len(pitches) > 1:
11714
+ x = list(range(len(pitches)))
11715
+ denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
11716
+ if denominator != 0:
11717
+ slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
11718
+ sum(x) * sum(pitches)) / denominator
11719
+ else:
11720
+ slope = None
11721
+ features['pitches_trend_slope'] = slope
11722
+ else:
11723
+ features['pitches_trend_slope'] = None
11724
+
11725
+ features['pitches_unique_count'] = len(set(pitches))
11726
+ pitch_class_hist = {i: 0 for i in range(12)}
11727
+ for p in pitches:
11728
+ pitch_class_hist[p % 12] += 1
11729
+ total_pitch = len(pitches)
11730
+ for i in range(12):
11731
+ features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
11732
+
11733
+ max_asc = 0
11734
+ cur_asc = 0
11735
+ max_desc = 0
11736
+ cur_desc = 0
11737
+ for i in range(1, len(pitches)):
11738
+ if pitches[i] > pitches[i-1]:
11739
+ cur_asc += 1
11740
+ max_asc = max(max_asc, cur_asc)
11741
+ cur_desc = 0
11742
+ elif pitches[i] < pitches[i-1]:
11743
+ cur_desc += 1
11744
+ max_desc = max(max_desc, cur_desc)
11745
+ cur_asc = 0
11746
+ else:
11747
+ cur_asc = 0
11748
+ cur_desc = 0
11749
+ features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
11750
+ features['pitches_max_consecutive_descending'] = max_desc if pitches else None
11751
+ p_intervals = diff(pitches)
11752
+ features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
11753
+ if p_intervals:
11754
+ dc = sum(1 for i in range(1, len(p_intervals))
11755
+ if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
11756
+ features['pitches_direction_changes'] = dc
11757
+ else:
11758
+ features['pitches_direction_changes'] = None
11759
+ else:
11760
+ for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
11761
+ 'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
11762
+ 'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
11763
+ 'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
11764
+ 'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
11765
+ 'pitches_median_diff', 'pitches_direction_changes'] +
11766
+ [f'pitches_pc_{i}' for i in range(12)]):
11767
+ features[key] = None
11768
+
11769
+ #=================================================================
11770
+
11771
+ overall = [x for row in arr for x in row]
11772
+ if overall:
11773
+ features['overall_mean'] = mean(overall)
11774
+ features['overall_std'] = std(overall)
11775
+ features['overall_min'] = min(overall)
11776
+ features['overall_max'] = max(overall)
11777
+ features['overall_cv'] = (features['overall_std'] / features['overall_mean']
11778
+ if features['overall_mean'] and features['overall_mean'] != 0 else None)
11779
+ else:
11780
+ for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
11781
+ features[key] = None
11782
+
11783
+ #=================================================================
11784
+
11785
+ onsets = []
11786
+ cumulative = 0
11787
+ for dt in delta_times:
11788
+ onsets.append(cumulative)
11789
+ cumulative += dt
11790
+ if onsets and durations:
11791
+ overall_piece_duration = onsets[-1] + durations[-1]
11792
+ else:
11793
+ overall_piece_duration = None
11794
+ features['overall_piece_duration'] = overall_piece_duration
11795
+ features['overall_notes_density'] = (len(arr) / overall_piece_duration
11796
+ if overall_piece_duration and overall_piece_duration > 0 else None)
11797
+ features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
11798
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11799
+ features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
11800
+ features['overall_sum_durations'] = (sum(durations) if durations else None)
11801
+ features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
11802
+ if overall_piece_duration and durations else None)
11803
+ features['overall_onset_std'] = std(onsets) if onsets else None
11804
+
11805
+ #=================================================================
11806
+
11807
+ chords_raw = []
11808
+ chords_pc = []
11809
+ current_group = []
11810
+ for i, note in enumerate(arr):
11811
+ dt = note[0]
11812
+ if i == 0:
11813
+ current_group = [i]
11814
+ else:
11815
+ if dt == 0:
11816
+ current_group.append(i)
11817
+ else:
11818
+ if len(current_group) >= 2:
11819
+ chord_notes = [arr[j][2] for j in current_group]
11820
+ chords_raw.append(tuple(sorted(chord_notes)))
11821
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11822
+
11823
+ current_group = [i]
11824
+
11825
+ if current_group and len(current_group) >= 2:
11826
+ chord_notes = [arr[j][2] for j in current_group]
11827
+ chords_raw.append(tuple(sorted(chord_notes)))
11828
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11829
+
11830
+ if chords_raw:
11831
+ chord_count = len(chords_raw)
11832
+ features['chords_count'] = chord_count
11833
+ features['chords_density'] = (chord_count / overall_piece_duration
11834
+ if overall_piece_duration and chord_count is not None else None)
11835
+ chord_sizes = [len(ch) for ch in chords_raw]
11836
+ features['chords_size_mean'] = mean(chord_sizes)
11837
+ features['chords_size_std'] = std(chord_sizes)
11838
+ features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
11839
+ features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
11840
+ features['chords_unique_raw_count'] = len(set(chords_raw))
11841
+ features['chords_unique_pc_count'] = len(set(chords_pc))
11842
+ features['chords_entropy_raw'] = entropy(chords_raw)
11843
+ features['chords_entropy_pc'] = entropy(chords_pc)
11844
+ if len(chords_raw) > 1:
11845
+ rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
11846
+ features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
11847
+ else:
11848
+ features['chords_repeat_ratio_raw'] = None
11849
+ if len(chords_pc) > 1:
11850
+ rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
11851
+ features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
11852
+ else:
11853
+ features['chords_repeat_ratio_pc'] = None
11854
+ if len(chords_raw) > 1:
11855
+ bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
11856
+ features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
11857
+ else:
11858
+ features['chords_bigram_entropy_raw'] = None
11859
+ if len(chords_pc) > 1:
11860
+ bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
11861
+ features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
11862
+ else:
11863
+ features['chords_bigram_entropy_pc'] = None
11864
+ features['chords_mode_raw'] = mode(chords_raw)
11865
+ features['chords_mode_pc'] = mode(chords_pc)
11866
+ if chords_pc:
11867
+ pc_sizes = [len(ch) for ch in chords_pc]
11868
+ features['chords_pc_size_mean'] = mean(pc_sizes)
11869
+ else:
11870
+ features['chords_pc_size_mean'] = None
11871
+ else:
11872
+ for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
11873
+ 'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
11874
+ 'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
11875
+ 'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
11876
+ 'chords_pc_size_mean']:
11877
+ features[key] = None
11878
+
11879
+ #=================================================================
11880
+
11881
+ if delta_times:
11882
+ med_dt = features['delta_times_median']
11883
+ iqr_dt = features['delta_times_iqr']
11884
+ threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
11885
+ threshold_b = percentile(delta_times, 90)
11886
+ if threshold_a is not None and threshold_b is not None:
11887
+ phrase_threshold = max(threshold_a, threshold_b)
11888
+ elif threshold_a is not None:
11889
+ phrase_threshold = threshold_a
11890
+ elif threshold_b is not None:
11891
+ phrase_threshold = threshold_b
11892
+ else:
11893
+ phrase_threshold = None
11894
+ else:
11895
+ phrase_threshold = None
11896
+
11897
+ phrases = []
11898
+ current_phrase = []
11899
+ if onsets:
11900
+ current_phrase.append(0)
11901
+ for i in range(len(onsets) - 1):
11902
+ gap = onsets[i + 1] - onsets[i]
11903
+ if phrase_threshold is not None and gap > phrase_threshold:
11904
+ phrases.append(current_phrase)
11905
+ current_phrase = []
11906
+ current_phrase.append(i + 1)
11907
+ if current_phrase:
11908
+ phrases.append(current_phrase)
11909
+ if phrases:
11910
+ phrase_note_counts = []
11911
+ phrase_durations = []
11912
+ phrase_densities = []
11913
+ phrase_mean_pitches = []
11914
+ phrase_pitch_ranges = []
11915
+ phrase_start_times = []
11916
+ phrase_end_times = []
11917
+ for phrase in phrases:
11918
+ note_count = len(phrase)
11919
+ phrase_note_counts.append(note_count)
11920
+ ph_start = onsets[phrase[0]]
11921
+ ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
11922
+ phrase_start_times.append(ph_start)
11923
+ phrase_end_times.append(ph_end)
11924
+ ph_duration = ph_end - ph_start
11925
+ phrase_durations.append(ph_duration)
11926
+ density = note_count / ph_duration if ph_duration > 0 else None
11927
+ phrase_densities.append(density)
11928
+ ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
11929
+ phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
11930
+ phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
11931
+ if len(phrases) > 1:
11932
+ phrase_gaps = []
11933
+ for i in range(len(phrases) - 1):
11934
+ gap = phrase_start_times[i + 1] - phrase_end_times[i]
11935
+ phrase_gaps.append(gap if gap > 0 else 0)
11936
+ else:
11937
+ phrase_gaps = []
11938
+ features['phrases_count'] = len(phrases)
11939
+ features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
11940
+ features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
11941
+ features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
11942
+ features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
11943
+ features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
11944
+ features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
11945
+ features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
11946
+ features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
11947
+ features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
11948
+ features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
11949
+ features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
11950
+ features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
11951
+ if phrase_gaps:
11952
+ features['phrases_avg_gap'] = mean(phrase_gaps)
11953
+ features['phrases_std_gap'] = std(phrase_gaps)
11954
+ features['phrases_min_gap'] = min(phrase_gaps)
11955
+ features['phrases_max_gap'] = max(phrase_gaps)
11956
+ else:
11957
+ features['phrases_avg_gap'] = None
11958
+ features['phrases_std_gap'] = None
11959
+ features['phrases_min_gap'] = None
11960
+ features['phrases_max_gap'] = None
11961
+ features['phrases_threshold'] = phrase_threshold
11962
+ else:
11963
+ for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
11964
+ 'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
11965
+ 'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
11966
+ 'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
11967
+ 'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
11968
+ 'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
11969
+ features[key] = None
11970
+
11971
+ #=================================================================
11972
+
11973
+ return features
11974
+
11975
+ ###################################################################################
11976
+
11977
+ def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
11978
+
11979
+ #=================================================================
11980
+
11981
+ new_min, new_max = new_range
11982
+
11983
+ #=================================================================
11984
+
11985
+ def percentile(values, p):
11986
+
11987
+ srt = sorted(values)
11988
+ n = len(srt)
11989
+ if n == 1:
11990
+ return srt[0]
11991
+ k = (n - 1) * p / 100.0
11992
+ f = int(k)
11993
+ c = k - f
11994
+ if f + 1 < n:
11995
+ return srt[f] * (1 - c) + srt[f + 1] * c
11996
+
11997
+ return srt[f]
11998
+
11999
+ #=================================================================
12000
+
12001
+ q1 = percentile(data, 25)
12002
+ q3 = percentile(data, 75)
12003
+ iqr = q3 - q1
12004
+
12005
+ lower_bound_w = q1 - clip * iqr
12006
+ upper_bound_w = q3 + clip * iqr
12007
+
12008
+ data_min = min(data)
12009
+ data_max = max(data)
12010
+ effective_low = max(lower_bound_w, data_min)
12011
+ effective_high = min(upper_bound_w, data_max)
12012
+
12013
+ #=================================================================
12014
+
12015
+ if effective_high == effective_low:
12016
+
12017
+ if data_max == data_min:
12018
+ return [int(new_min)] * len(data)
12019
+
12020
+ normalized = [(x - data_min) / (data_max - data_min) for x in data]
12021
+
12022
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
12023
+
12024
+ #=================================================================
12025
+
12026
+ clipped = [x if x >= effective_low else effective_low for x in data]
12027
+ clipped = [x if x <= effective_high else effective_high for x in clipped]
12028
+
12029
+ normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
12030
+
12031
+ #=================================================================
12032
+
12033
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
12034
+
12035
+ ###################################################################################
12036
+
12037
+ def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
12038
+
12039
+ values = []
12040
+ tokens = []
12041
+
12042
+ #=================================================================
12043
+
12044
+ def process_value(val):
12045
+
12046
+ if isinstance(val, (int, float)):
12047
+ return int(round(abs(val)))
12048
+
12049
+ elif isinstance(val, (list, tuple)):
12050
+ return int(round(abs(sum(val) / len(val))))
12051
+
12052
+ else:
12053
+ return int(abs(hash(val)) % (10 ** 8))
12054
+
12055
+ #=================================================================
12056
+
12057
+ for key in sorted(features.keys()):
12058
+
12059
+ value = features[key]
12060
+
12061
+ if value is None:
12062
+ tokens.append(none_token)
12063
+ values.append(none_token)
12064
+
12065
+ else:
12066
+ tokens.append(process_value(value))
12067
+
12068
+ if isinstance(value, (list, tuple)):
12069
+ values.append(sum(value) / len(value))
12070
+
12071
+ else:
12072
+ values.append(value)
12073
+
12074
+ #=================================================================
12075
+
12076
+ norm_tokens = winsorized_normalize(tokens, new_range, clip)
12077
+
12078
+ #=================================================================
12079
+
12080
+ return values, tokens, norm_tokens
12081
+
12082
+ ###################################################################################
12083
+
12084
+ def write_jsonl(records_dicts_list,
12085
+ file_name='data',
12086
+ file_ext='.jsonl',
12087
+ file_mode='w',
12088
+ line_sep='\n',
12089
+ verbose=True
12090
+ ):
12091
+
12092
+ if verbose:
12093
+ print('=' * 70)
12094
+ print('Writing', len(records_dicts_list), 'records to jsonl file...')
12095
+ print('=' * 70)
12096
+
12097
+ if not os.path.splitext(file_name)[1]:
12098
+ file_name += file_ext
12099
+
12100
+ l_count = 0
12101
+
12102
+ with open(file_name, mode=file_mode) as f:
12103
+ for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
12104
+ f.write(json.dumps(record) + line_sep)
12105
+ l_count += 1
12106
+
12107
+ f.close()
12108
+
12109
+ if verbose:
12110
+ print('=' * 70)
12111
+ print('Written total of', l_count, 'jsonl records.')
12112
+ print('=' * 70)
12113
+ print('Done!')
12114
+ print('=' * 70)
12115
+
12116
+ ###################################################################################
12117
+
12118
+ def read_jsonl(file_name='data',
12119
+ file_ext='.jsonl',
12120
+ verbose=True
12121
+ ):
12122
+
12123
+ if verbose:
12124
+ print('=' * 70)
12125
+ print('Reading jsonl file...')
12126
+ print('=' * 70)
12127
+
12128
+ if not os.path.splitext(file_name)[1]:
12129
+ file_name += file_ext
12130
+
12131
+ with open(file_name, 'r') as f:
12132
+
12133
+ records = []
12134
+ gl_count = 0
12135
+
12136
+ for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
12137
+
12138
+ try:
12139
+ record = json.loads(line)
12140
+ records.append(record)
12141
+ gl_count += 1
12142
+
12143
+ except KeyboardInterrupt:
12144
+ if verbose:
12145
+ print('=' * 70)
12146
+ print('Stoping...')
12147
+ print('=' * 70)
12148
+
12149
+ f.close()
12150
+
12151
+ return records
12152
+
12153
+ except json.JSONDecodeError:
12154
+ if verbose:
12155
+ print('=' * 70)
12156
+ print('[ERROR] Line', i, 'is corrupted! Skipping it...')
12157
+ print('=' * 70)
12158
+
12159
+ continue
12160
+
12161
+ f.close()
12162
+
12163
+ if verbose:
12164
+ print('=' * 70)
12165
+ print('Loaded total of', gl_count, 'jsonl records.')
12166
+ print('=' * 70)
12167
+ print('Done!')
12168
+ print('=' * 70)
12169
+
12170
+ return records
12171
+
12172
+ ###################################################################################
12173
+
12174
+ def read_jsonl_lines(lines_indexes_list,
12175
+ file_name='data',
12176
+ file_ext='.jsonl',
12177
+ verbose=True
12178
+ ):
12179
+
12180
+ if verbose:
12181
+ print('=' * 70)
12182
+ print('Reading jsonl file...')
12183
+ print('=' * 70)
12184
+
12185
+ if not os.path.splitext(file_name)[1]:
12186
+ file_name += file_ext
12187
+
12188
+ records = []
12189
+ l_count = 0
12190
+
12191
+ lines_indexes_list.sort(reverse=True)
12192
+
12193
+ with open(file_name, 'r') as f:
12194
+ for current_line_number, line in tqdm.tqdm(enumerate(f)):
12195
+
12196
+ try:
12197
+ if current_line_number in lines_indexes_list:
12198
+ record = json.loads(line)
12199
+ records.append(record)
12200
+ lines_indexes_list = lines_indexes_list[:-1]
12201
+ l_count += 1
12202
+
12203
+ if not lines_indexes_list:
12204
+ break
12205
+
12206
+ except KeyboardInterrupt:
12207
+ if verbose:
12208
+ print('=' * 70)
12209
+ print('Stoping...')
12210
+ print('=' * 70)
12211
+
12212
+ f.close()
12213
+
12214
+ return records
12215
+
12216
+ except json.JSONDecodeError:
12217
+ if verbose:
12218
+ print('=' * 70)
12219
+ print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
12220
+ print('=' * 70)
12221
+
12222
+ continue
12223
+
12224
+ f.close()
12225
+
12226
+ if verbose:
12227
+ print('=' * 70)
12228
+ print('Loaded total of', l_count, 'jsonl records.')
12229
+ print('=' * 70)
12230
+ print('Done!')
12231
+ print('=' * 70)
12232
+
12233
+ return records
12234
+
12235
+ ###################################################################################
12236
+
12237
+ def compute_base(x: int, n: int) -> int:
12238
+
12239
+ if x < 0:
12240
+ raise ValueError("x must be non-negative.")
12241
+ if x == 0:
12242
+ return 2
12243
+
12244
+ b = max(2, int(x ** (1 / n)))
12245
+
12246
+ if b ** n <= x:
12247
+ b += 1
12248
+
12249
+ return b
12250
+
12251
+ ###################################################################################
12252
+
12253
+ def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
12254
+
12255
+ base = compute_base(x, n)
12256
+ digits = [0] * n
12257
+
12258
+ for i in range(n - 1, -1, -1):
12259
+ digits[i] = x % base
12260
+ x //= base
12261
+
12262
+ return base, digits
12263
+
12264
+ ###################################################################################
12265
+
12266
+ def decode_int_auto(base: int, digits: list[int]) -> int:
12267
+
12268
+ x = 0
12269
+ for digit in digits:
12270
+ if digit < 0 or digit >= base:
12271
+ raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
12272
+
12273
+ x = x * base + digit
12274
+
12275
+ return x
12276
+
12277
+ ###################################################################################
12278
+
12279
+ def encode_int_manual(x, base, n):
12280
+
12281
+ digits = [0] * n
12282
+
12283
+ for i in range(n - 1, -1, -1):
12284
+ digits[i] = x % base
12285
+ x //= base
12286
+
12287
+ return digits
12288
+
12289
+ ###################################################################################
12290
+
12291
+ def escore_notes_pitches_chords_signature(escore_notes,
12292
+ max_patch=128,
12293
+ sort_by_counts=False,
12294
+ use_full_chords=False
12295
+ ):
12296
+
12297
+ if use_full_chords:
12298
+ CHORDS = ALL_CHORDS_FULL
12299
+
12300
+ else:
12301
+ CHORDS = ALL_CHORDS_SORTED
12302
+
12303
+ max_patch = max(0, min(128, max_patch))
12304
+
12305
+ escore_notes = [e for e in escore_notes if e[6] <= max_patch]
12306
+
12307
+ if escore_notes:
12308
+
12309
+ cscore = chordify_score([1000, escore_notes])
12310
+
12311
+ sig = []
12312
+ dsig = []
12313
+
12314
+ drums_offset = len(CHORDS) + 128
12315
+
12316
+ bad_chords_counter = 0
12317
+
12318
+ for c in cscore:
12319
+
12320
+ all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
12321
+ chord = sorted(set(all_pitches))
12322
+
12323
+ pitches = sorted([p for p in chord if p < 128], reverse=True)
12324
+ drums = [(d+drums_offset)-128 for d in chord if d > 127]
12325
+
12326
+ if pitches:
12327
+ if len(pitches) > 1:
12328
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12329
+
12330
+ try:
12331
+ sig_token = CHORDS.index(tones_chord) + 128
12332
+ except:
12333
+ checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
12334
+ sig_token = CHORDS.index(checked_tones_chord) + 128
12335
+ bad_chords_counter += 1
12336
+
12337
+ elif len(pitches) == 1:
12338
+ sig_token = pitches[0]
12339
+
12340
+ sig.append(sig_token)
12341
+
12342
+ if drums:
12343
+ dsig.extend(drums)
12344
+
12345
+ sig_p = {}
12346
+
12347
+ for item in sig+dsig:
12348
+
12349
+ if item in sig_p:
12350
+ sig_p[item] += 1
12351
+
12352
+ else:
12353
+ sig_p[item] = 1
12354
+
12355
+ sig_p[-1] = bad_chords_counter
12356
+
12357
+ fsig = [list(v) for v in sig_p.items()]
12358
+
12359
+ if sort_by_counts:
12360
+ fsig.sort(key=lambda x: x[1], reverse=True)
12361
+
12362
+ return fsig
12363
+
12364
+ else:
12365
+ return []
12366
+
12367
+ ###################################################################################
12368
+
12369
+ def compute_sustain_intervals(events):
12370
+
12371
+ intervals = []
12372
+ pedal_on = False
12373
+ current_start = None
12374
+
12375
+ for t, cc in events:
12376
+ if not pedal_on and cc >= 64:
12377
+
12378
+ pedal_on = True
12379
+ current_start = t
12380
+ elif pedal_on and cc < 64:
12381
+
12382
+ pedal_on = False
12383
+ intervals.append((current_start, t))
12384
+ current_start = None
12385
+
12386
+ if pedal_on:
12387
+ intervals.append((current_start, float('inf')))
12388
+
12389
+ merged = []
12390
+
12391
+ for interval in intervals:
12392
+ if merged and interval[0] <= merged[-1][1]:
12393
+ merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1]))
12394
+ else:
12395
+ merged.append(interval)
12396
+ return merged
12397
+
12398
+ ###################################################################################
12399
+
12400
+ def apply_sustain_to_ms_score(score):
12401
+
12402
+ sustain_by_channel = {}
12403
+
12404
+ for track in score[1:]:
12405
+ for event in track:
12406
+ if event[0] == 'control_change' and event[3] == 64:
12407
+ channel = event[2]
12408
+ sustain_by_channel.setdefault(channel, []).append((event[1], event[4]))
12409
+
12410
+ sustain_intervals_by_channel = {}
12411
+
12412
+ for channel, events in sustain_by_channel.items():
12413
+ events.sort(key=lambda x: x[0])
12414
+ sustain_intervals_by_channel[channel] = compute_sustain_intervals(events)
12415
+
12416
+ global_max_off = 0
12417
+
12418
+ for track in score[1:]:
12419
+ for event in track:
12420
+ if event[0] == 'note':
12421
+ global_max_off = max(global_max_off, event[1] + event[2])
12422
+
12423
+ for channel, intervals in sustain_intervals_by_channel.items():
12424
+ updated_intervals = []
12425
+ for start, end in intervals:
12426
+ if end == float('inf'):
12427
+ end = global_max_off
12428
+ updated_intervals.append((start, end))
12429
+ sustain_intervals_by_channel[channel] = updated_intervals
12430
+
12431
+ if sustain_intervals_by_channel:
12432
+
12433
+ for track in score[1:]:
12434
+ for event in track:
12435
+ if event[0] == 'note':
12436
+ start = event[1]
12437
+ nominal_dur = event[2]
12438
+ nominal_off = start + nominal_dur
12439
+ channel = event[3]
12440
+
12441
+ intervals = sustain_intervals_by_channel.get(channel, [])
12442
+ effective_off = nominal_off
12443
+
12444
+ for intv_start, intv_end in intervals:
12445
+ if intv_start < nominal_off < intv_end:
12446
+ effective_off = intv_end
12447
+ break
12448
+
12449
+ effective_dur = effective_off - start
12450
+
12451
+ event[2] = effective_dur
12452
+
12453
+ return score
12454
+
12455
+ ###################################################################################
12456
+
12457
+ def copy_file(src_file: str, trg_dir: str, add_subdir: bool = False, verbose: bool = False):
12458
+
12459
+ src_path = Path(src_file)
12460
+ target_directory = Path(trg_dir)
12461
+
12462
+ if not src_path.is_file():
12463
+ if verbose:
12464
+ print("Source file does not exist or is not a file.")
12465
+
12466
+ return None
12467
+
12468
+ target_directory.mkdir(parents=True, exist_ok=True)
12469
+
12470
+ if add_subdir:
12471
+ first_letter = src_path.name[0]
12472
+ target_directory = target_directory / first_letter
12473
+ target_directory.mkdir(parents=True, exist_ok=True)
12474
+
12475
+ destination = target_directory / src_path.name
12476
+
12477
+ try:
12478
+ shutil.copy2(src_path, destination)
12479
+
12480
+ except:
12481
+ if verbose:
12482
+ print('File could not be copied!')
12483
+
12484
+ return None
12485
+
12486
+ if verbose:
12487
+ print('File copied!')
12488
+
12489
+ return None
12490
+
12491
+ ###################################################################################
12492
+
12493
+ def escore_notes_even_timings(escore_notes, in_place=True):
12494
+
12495
+ if in_place:
12496
+ for e in escore_notes:
12497
+ if e[1] % 2 != 0:
12498
+ e[1] += 1
12499
+
12500
+ if e[2] % 2 != 0:
12501
+ e[2] += 1
12502
+
12503
+ return []
12504
+
12505
+ else:
12506
+ escore = copy.deepcopy(escore_notes)
12507
+
12508
+ for e in escore:
12509
+ if e[1] % 2 != 0:
12510
+ e[1] += 1
12511
+
12512
+ if e[2] % 2 != 0:
12513
+ e[2] += 1
12514
+
12515
+ return escore
12516
+
12517
+ ###################################################################################
12518
+
12519
+ def both_chords(chord1, chord2, merge_threshold=2):
12520
+
12521
+ if len(chord1) > 1 and len(chord2) > 0 and chord2[0][1]-chord1[0][1] <= merge_threshold:
12522
+ return True
12523
+
12524
+ elif len(chord1) > 0 and len(chord2) > 1 and chord2[0][1]-chord1[0][1] <= merge_threshold:
12525
+ return True
12526
+
12527
+ else:
12528
+ return False
12529
+
12530
+ def merge_chords(chord1, chord2, sort_drums_last=False):
12531
+
12532
+ mchord = chord1
12533
+
12534
+ seen = []
12535
+
12536
+ for e in chord2:
12537
+ if tuple([e[4], e[6]]) not in seen:
12538
+ mchord.append(e)
12539
+ seen.append(tuple([e[4], e[6]]))
12540
+
12541
+ for e in mchord[1:]:
12542
+ e[1] = mchord[0][1]
12543
+
12544
+ if sort_drums_last:
12545
+ mchord.sort(key=lambda x: (-x[4], x[6]) if x[6] != 128 else (x[6], -x[4]))
12546
+
12547
+ else:
12548
+ mchord.sort(key=lambda x: (-x[4], x[6]))
12549
+
12550
+ return mchord
12551
+
12552
+ def merge_escore_notes(escore_notes, merge_threshold=2, sort_drums_last=False):
12553
+
12554
+ cscore = chordify_score([1000, escore_notes])
12555
+
12556
+ merged_chords = []
12557
+ merged_chord = cscore[0]
12558
+
12559
+ for i in range(1, len(cscore)):
12560
+
12561
+ cchord = cscore[i]
12562
+
12563
+ if both_chords(merged_chord, cchord, merge_threshold=merge_threshold):
12564
+ merged_chord = merge_chords(merged_chord, cchord, sort_drums_last=sort_drums_last)
12565
+
12566
+ else:
12567
+ merged_chords.append(merged_chord)
12568
+ merged_chord = cchord
12569
+
12570
+ return flatten(merged_chords)
12571
+
12572
+ ###################################################################################
12573
+
12574
+ def solo_piano_escore_notes_tokenized(escore_notes,
12575
+ compress_start_times=True,
12576
+ encode_velocities=False,
12577
+ verbose=False
12578
+ ):
12579
+
12580
+ if verbose:
12581
+ print('=' * 70)
12582
+ print('Encoding MIDI...')
12583
+
12584
+ sp_escore_notes = solo_piano_escore_notes(escore_notes)
12585
+ zscore = recalculate_score_timings(sp_escore_notes)
12586
+ dscore = delta_score_notes(zscore, timings_clip_value=127)
12587
+
12588
+ score = []
12589
+
12590
+ notes_counter = 0
12591
+ chords_counter = 1
12592
+
12593
+ for i, e in enumerate(dscore):
12594
+
12595
+ dtime = e[1]
12596
+ dur = e[2]
12597
+ ptc = e[4]
12598
+ vel = e[5]
12599
+
12600
+ if compress_start_times:
12601
+
12602
+ if i == 0:
12603
+ score.extend([0, dur+128, ptc+256])
12604
+
12605
+ if encode_velocities:
12606
+ score.append(vel+384)
12607
+
12608
+ else:
12609
+ if dtime == 0:
12610
+ score.extend([dur+128, ptc+256])
12611
+
12612
+ else:
12613
+ score.extend([dtime, dur+128, ptc+256])
12614
+
12615
+ if encode_velocities:
12616
+ score.append(vel+384)
12617
+
12618
+ if dtime != 0:
12619
+ chords_counter += 1
12620
+
12621
+ else:
12622
+ score.extend([dtime, dur+128, ptc+256])
12623
+
12624
+ if encode_velocities:
12625
+ score.append(vel+384)
12626
+
12627
+ if dtime != 0:
12628
+ chords_counter += 1
12629
+
12630
+ notes_counter += 1
12631
+
12632
+ if verbose:
12633
+ print('Done!')
12634
+ print('=' * 70)
12635
+
12636
+ print('Source MIDI composition has', len(zscore), 'notes')
12637
+ print('Source MIDI composition has', len([d[1] for d in dscore if d[1] !=0 ])+1, 'chords')
12638
+ print('-' * 70)
12639
+ print('Encoded sequence has', notes_counter, 'pitches')
12640
+ print('Encoded sequence has', chords_counter, 'chords')
12641
+ print('-' * 70)
12642
+ print('Final encoded sequence has', len(score), 'tokens')
12643
+ print('=' * 70)
12644
+
12645
+ return score
12646
+
12647
+ ###################################################################################
12648
+
12649
+ def equalize_closest_elements_dynamic(seq,
12650
+ min_val=128,
12651
+ max_val=256,
12652
+ splitting_factor=1.5,
12653
+ tightness_threshold=0.15
12654
+ ):
12655
+
12656
+ candidates = [(i, x) for i, x in enumerate(seq) if min_val <= x <= max_val]
12657
+
12658
+ if len(candidates) < 2:
12659
+ return seq.copy()
12660
+
12661
+ sorted_candidates = sorted(candidates, key=lambda pair: pair[1])
12662
+ candidate_values = [val for _, val in sorted_candidates]
12663
+
12664
+ differences = [candidate_values[i+1] - candidate_values[i] for i in range(len(candidate_values)-1)]
12665
+
12666
+ def median(lst):
12667
+
12668
+ n = len(lst)
12669
+ sorted_lst = sorted(lst)
12670
+ mid = n // 2
12671
+
12672
+ if n % 2 == 0:
12673
+ return (sorted_lst[mid - 1] + sorted_lst[mid]) / 2.0
12674
+
12675
+ else:
12676
+ return sorted_lst[mid]
12677
+
12678
+ med_diff = median(differences)
12679
+
12680
+ split_indices = [i for i, diff in enumerate(differences) if diff > splitting_factor * med_diff]
12681
+
12682
+ clusters = []
12683
+
12684
+ if split_indices:
12685
+ start = 0
12686
+ for split_index in split_indices:
12687
+ clusters.append(sorted_candidates[start:split_index+1])
12688
+ start = split_index + 1
12689
+ clusters.append(sorted_candidates[start:])
12690
+
12691
+ else:
12692
+ clusters = [sorted_candidates]
12693
+
12694
+
12695
+ valid_clusters = [cluster for cluster in clusters if len(cluster) >= 2]
12696
+ if not valid_clusters:
12697
+ return seq.copy()
12698
+
12699
+ def cluster_spread(cluster):
12700
+ values = [val for (_, val) in cluster]
12701
+ return max(values) - min(values)
12702
+
12703
+ valid_clusters.sort(key=lambda cluster: (len(cluster), -cluster_spread(cluster)), reverse=True)
12704
+ selected_cluster = valid_clusters[0]
12705
+
12706
+ allowed_range_width = max_val - min_val
12707
+ spread = cluster_spread(selected_cluster)
12708
+ ratio = spread / allowed_range_width
12709
+
12710
+ if ratio > tightness_threshold:
12711
+ return seq.copy()
12712
+
12713
+ cluster_values = [val for (_, val) in selected_cluster]
12714
+ equal_value = sum(cluster_values) // len(cluster_values)
12715
+
12716
+
12717
+ result = list(seq)
12718
+ for idx, _ in selected_cluster:
12719
+ result[idx] = equal_value
12720
+
12721
+ return result
12722
+
12723
+ ###################################################################################
12724
+
12725
+ def chunk_list(lst, chunk_size):
12726
+ return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
12727
+
12728
+ ###################################################################################
12729
+
12730
+ def compress_tokens_sequence(seq,
12731
+ min_val=128,
12732
+ max_val=256,
12733
+ group_size=2,
12734
+ splitting_factor=1.5,
12735
+ tightness_threshold=0.15
12736
+ ):
12737
+
12738
+ comp_seq = equalize_closest_elements_dynamic(seq,
12739
+ min_val,
12740
+ max_val,
12741
+ splitting_factor=splitting_factor,
12742
+ tightness_threshold=tightness_threshold
12743
+ )
12744
+
12745
+ seq_split = sorted(chunk_list(comp_seq, group_size), key=lambda x: (-x[0], -x[1]))
12746
+
12747
+ seq_grouped = [[[k]] + [vv[1:] for vv in v] for k, v in groupby(seq_split, key=lambda x: x[0])]
12748
+
12749
+ return flatten(flatten(sorted(seq_grouped, key=lambda x: -x[1][0])))
12750
+
12751
+ ###################################################################################
12752
+
12753
+ def merge_adjacent_pairs(values_counts):
12754
+
12755
+ merged = []
12756
+ i = 0
12757
+
12758
+ while i < len(values_counts):
12759
+
12760
+ if i < len(values_counts) - 1:
12761
+ value1, count1 = values_counts[i]
12762
+ value2, count2 = values_counts[i + 1]
12763
+
12764
+ if value2 - value1 == 1:
12765
+ if count2 > count1:
12766
+ merged_value = value2
12767
+
12768
+ else:
12769
+ merged_value = value1
12770
+
12771
+ merged_count = count1 + count2
12772
+ merged.append((merged_value, merged_count))
12773
+
12774
+ i += 2
12775
+
12776
+ continue
12777
+
12778
+ merged.append(values_counts[i])
12779
+
12780
+ i += 1
12781
+
12782
+ return merged
12783
+
12784
+ ###################################################################################
12785
+
12786
+ def merge_escore_notes_start_times(escore_notes, num_merges=1):
12787
+
12788
+ new_dscore = delta_score_notes(escore_notes)
12789
+
12790
+ times = [e[1] for e in new_dscore if e[1] != 0]
12791
+ times_counts = sorted(Counter(times).most_common())
12792
+
12793
+ prev_counts = []
12794
+ new_times_counts = times_counts
12795
+
12796
+ mcount = 0
12797
+
12798
+ while prev_counts != new_times_counts:
12799
+ prev_counts = new_times_counts
12800
+ new_times_counts = merge_adjacent_pairs(new_times_counts)
12801
+
12802
+ mcount += 1
12803
+
12804
+ if mcount == num_merges:
12805
+ break
12806
+
12807
+ gtimes = [r[0] for r in new_times_counts]
12808
+
12809
+ for e in new_dscore:
12810
+ if e[1] > 0:
12811
+ e[1] = find_closest_value(gtimes, e[1])[0]
12812
+ e[2] -= num_merges
12813
+
12814
+ return delta_score_to_abs_score(new_dscore)
12815
+
12816
+ ###################################################################################
12817
+
12818
+ def multi_instrumental_escore_notes_tokenized(escore_notes, compress_seq=False):
12819
+
12820
+ melody_chords = []
12821
+
12822
+ pe = escore_notes[0]
12823
+
12824
+ for i, e in enumerate(escore_notes):
12825
+
12826
+ dtime = max(0, min(255, e[1]-pe[1]))
12827
+
12828
+ dur = max(0, min(255, e[2]))
12829
+
12830
+ cha = max(0, min(15, e[3]))
12831
+
12832
+ if cha == 9:
12833
+ pat = 128
12834
+
12835
+ else:
12836
+ pat = max(0, min(127, e[6]))
12837
+
12838
+ ptc = max(0, min(127, e[4]))
12839
+
12840
+ vel = max(8, min(127, e[5]))
12841
+ velocity = round(vel / 15)-1
12842
+
12843
+ dur_vel = (8 * dur) + velocity
12844
+ pat_ptc = (129 * pat) + ptc
12845
+
12846
+ if compress_seq:
12847
+ if dtime != 0 or i == 0:
12848
+ melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
12849
+
12850
+ else:
12851
+ melody_chords.extend([dur_vel+256, pat_ptc+2304])
12852
+
12853
+ else:
12854
+ melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
12855
+
12856
+ pe = e
12857
+
12858
+ return melody_chords
12859
+
12860
+ ###################################################################################
12861
+
12862
+ def merge_counts(data, return_lists=True):
12863
+
12864
+ merged = defaultdict(int)
12865
+
12866
+ for value, count in data:
12867
+ merged[value] += count
12868
+
12869
+ if return_lists:
12870
+ return [[k, v] for k, v in merged.items()]
12871
+
12872
+ else:
12873
+ return list(merged.items())
12874
+
12875
+ ###################################################################################
12876
+
12877
+ def convert_escore_notes_pitches_chords_signature(signature, convert_to_full_chords=True):
12878
+
12879
+ if convert_to_full_chords:
12880
+ SRC_CHORDS = ALL_CHORDS_SORTED
12881
+ TRG_CHORDS = ALL_CHORDS_FULL
12882
+
12883
+ else:
12884
+ SRC_CHORDS = ALL_CHORDS_FULL
12885
+ TRG_CHORDS = ALL_CHORDS_SORTED
12886
+
12887
+ cdiff = len(TRG_CHORDS) - len(SRC_CHORDS)
12888
+
12889
+ pitches_counts = [c for c in signature if -1 < c[0] < 128]
12890
+ chords_counts = [c for c in signature if 127 < c[0] < len(SRC_CHORDS)+128]
12891
+ drums_counts = [[c[0]+cdiff, c[1]] for c in signature if len(SRC_CHORDS)+127 < c[0] < len(SRC_CHORDS)+256]
12892
+ bad_chords_count = [c for c in signature if c[0] == -1]
12893
+
12894
+ new_chords_counts = []
12895
+
12896
+ for c in chords_counts:
12897
+ tones_chord = SRC_CHORDS[c[0]-128]
12898
+
12899
+ if tones_chord not in TRG_CHORDS:
12900
+ tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=convert_to_full_chords)
12901
+ bad_chords_count[0][1] += 1
12902
+
12903
+ new_chords_counts.append([TRG_CHORDS.index(tones_chord)+128, c[1]])
12904
+
12905
+ return pitches_counts + merge_counts(new_chords_counts) + drums_counts + bad_chords_count
12906
+
12907
+ ###################################################################################
12908
+
12909
+ def convert_bytes_in_nested_list(lst, encoding='utf-8', errors='ignore'):
12910
+
12911
+ new_list = []
12912
+
12913
+ for item in lst:
12914
+ if isinstance(item, list):
12915
+ new_list.append(convert_bytes_in_nested_list(item))
12916
+
12917
+ elif isinstance(item, bytes):
12918
+ new_list.append(item.decode(encoding, errors=errors))
12919
+
12920
+ else:
12921
+ new_list.append(item)
12922
+
12923
+ return new_list
12924
+
12925
+ ###################################################################################
12926
+
12927
+ def mult_pitches(pitches, min_oct=4, max_oct=6):
12928
+
12929
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12930
+
12931
+ mult_ptcs = []
12932
+
12933
+ for t in tones_chord:
12934
+ for i in range(min_oct, max_oct):
12935
+ mult_ptcs.append((i*12)+t)
12936
+
12937
+ return mult_ptcs
12938
+
12939
+ ###################################################################################
12940
+
12941
+ def find_next(pitches, cur_ptc):
12942
+
12943
+ i = 0
12944
+
12945
+ for i, p in enumerate(pitches):
12946
+ if p != cur_ptc:
12947
+ break
12948
+
12949
+ return i
12950
+
12951
+ ###################################################################################
12952
+
12953
+ def ordered_groups_unsorted(data, key_index):
12954
+
12955
+ def keyfunc(sublist):
12956
+ return sublist[key_index]
12957
+
12958
+ groups = []
12959
+
12960
+ for key, group in groupby(data, key=keyfunc):
12961
+ groups.append((key, list(group)))
12962
+
12963
+ return groups
12964
+
12965
+ ###################################################################################
12966
+
12967
+ def ordered_groups(data, ptc_idx, pat_idx):
12968
+
12969
+ groups = OrderedDict()
12970
+
12971
+ for sublist in data:
12972
+ key = tuple([sublist[ptc_idx], sublist[pat_idx]])
12973
+
12974
+ if key not in groups:
12975
+ groups[key] = []
12976
+
12977
+ groups[key].append(sublist)
12978
+
12979
+ return list(groups.items())
12980
+
12981
+ ###################################################################################
12982
+
12983
+ def merge_melody_notes(escore_notes, pitches_idx=4, max_dur=255, last_dur=128):
12984
+
12985
+ groups = ordered_groups_unsorted(escore_notes, pitches_idx)
12986
+
12987
+ merged_melody_notes = []
12988
+
12989
+ for i, (k, g) in enumerate(groups[:-1]):
12990
+
12991
+ if len(g) == 1:
12992
+ merged_melody_notes.extend(g)
12993
+
12994
+ else:
12995
+ dur = min(max_dur, groups[i+1][1][0][1] - g[0][1])
12996
+
12997
+ merged_melody_notes.append(['note',
12998
+ g[0][1],
12999
+ dur,
13000
+ g[0][3],
13001
+ g[0][4],
13002
+ g[0][5],
13003
+ g[0][6]
13004
+ ])
13005
+
13006
+ merged_melody_notes.append(['note',
13007
+ groups[-1][1][0][1],
13008
+ last_dur,
13009
+ groups[-1][1][0][3],
13010
+ groups[-1][1][0][4],
13011
+ groups[-1][1][0][5],
13012
+ groups[-1][1][0][6]
13013
+ ])
13014
+
13015
+ return merged_melody_notes
13016
+
13017
+ ###################################################################################
13018
+
13019
+ def add_expressive_melody_to_enhanced_score_notes(escore_notes,
13020
+ melody_start_chord=0,
13021
+ melody_prime_pitch=60,
13022
+ melody_step=1,
13023
+ melody_channel=3,
13024
+ melody_patch=40,
13025
+ melody_notes_max_duration=255,
13026
+ melody_last_note_dur=128,
13027
+ melody_clip_max_min_durs=[],
13028
+ melody_max_velocity=120,
13029
+ acc_max_velocity=90,
13030
+ return_melody=False
13031
+ ):
13032
+
13033
+
13034
+ score = copy.deepcopy(escore_notes)
13035
+
13036
+ adjust_score_velocities(score, acc_max_velocity)
13037
+
13038
+ cscore = chordify_score([1000, score])
13039
+
13040
+ melody_pitches = [melody_prime_pitch]
13041
+
13042
+ for i, c in enumerate(cscore[melody_start_chord:]):
13043
+
13044
+ if i % melody_step == 0:
13045
+
13046
+ pitches = [e[4] for e in c if e[3] != 9]
13047
+
13048
+ if pitches:
13049
+ cptc = find_closest_value(mult_pitches(pitches), melody_pitches[-1])[0]
13050
+ melody_pitches.append(cptc)
13051
+
13052
+ song_f = []
13053
+ mel_f = []
13054
+
13055
+ idx = 1
13056
+
13057
+ for i, c in enumerate(cscore[:-melody_step]):
13058
+ pitches = [e[4] for e in c if e[3] != 9]
13059
+
13060
+ if pitches and i >= melody_start_chord and i % melody_step == 0:
13061
+ dur = min(cscore[i+melody_step][0][1] - c[0][1], melody_notes_max_duration)
13062
+
13063
+ mel_f.append(['note',
13064
+ c[0][1],
13065
+ dur,
13066
+ melody_channel,
13067
+ 60+(melody_pitches[idx] % 24),
13068
+ 100 + ((melody_pitches[idx] % 12) * 2),
13069
+ melody_patch
13070
+ ])
13071
+ idx += 1
13072
+
13073
+ song_f.extend(c)
13074
+
13075
+ song_f.extend(flatten(cscore[-melody_step:]))
13076
+
13077
+ if len(melody_clip_max_min_durs) == 2:
13078
+ for e in mel_f:
13079
+ if e[2] >= melody_clip_max_min_durs[0]:
13080
+ e[2] = melody_clip_max_min_durs[1]
13081
+
13082
+ adjust_score_velocities(mel_f, melody_max_velocity)
13083
+
13084
+ merged_melody_notes = merge_melody_notes(mel_f,
13085
+ max_dur=melody_notes_max_duration,
13086
+ last_dur=melody_last_note_dur
13087
+ )
13088
+
13089
+ song_f = sorted(merged_melody_notes + song_f,
13090
+ key=lambda x: x[1]
13091
+ )
13092
+
13093
+ if return_melody:
13094
+ return mel_f
13095
+
13096
+ else:
13097
+ return song_f
13098
+
13099
+ ###################################################################################
13100
+
13101
+ def list_md5_hash(ints_list):
13102
+
13103
+ arr = array('H', ints_list)
13104
+ binary_data = arr.tobytes()
13105
+
13106
+ return hashlib.md5(binary_data).hexdigest()
13107
+
13108
+ ###################################################################################
13109
+
13110
+ def fix_escore_notes_durations(escore_notes,
13111
+ min_notes_gap=1,
13112
+ min_notes_dur=1,
13113
+ times_idx=1,
13114
+ durs_idx=2,
13115
+ channels_idx = 3,
13116
+ pitches_idx=4,
13117
+ patches_idx=6
13118
+ ):
13119
+
13120
+ notes = [e for e in escore_notes if e[channels_idx] != 9]
13121
+ drums = [e for e in escore_notes if e[channels_idx] == 9]
13122
+
13123
+ escore_groups = ordered_groups(notes, pitches_idx, patches_idx)
13124
+
13125
+ merged_score = []
13126
+
13127
+ for k, g in escore_groups:
13128
+ if len(g) > 2:
13129
+ fg = fix_monophonic_score_durations(g,
13130
+ min_notes_gap=min_notes_gap,
13131
+ min_notes_dur=min_notes_dur
13132
+ )
13133
+ merged_score.extend(fg)
13134
+
13135
+ elif len(g) == 2:
13136
+
13137
+ if g[0][times_idx]+g[0][durs_idx] >= g[1][times_idx]:
13138
+ g[0][durs_idx] = max(1, g[1][times_idx] - g[0][times_idx] - min_notes_gap)
13139
+
13140
+ merged_score.extend(g)
13141
+
13142
+ else:
13143
+ merged_score.extend(g)
13144
+
13145
+ return sorted(merged_score + drums, key=lambda x: x[times_idx])
13146
+
13147
+ ###################################################################################
13148
+
13149
+ def create_nested_chords_tree(chords_list):
13150
+
13151
+ tree = {}
13152
+
13153
+ for chord in chords_list:
13154
+
13155
+ node = tree
13156
+
13157
+ for semitone in chord:
13158
+ if semitone not in node:
13159
+ node[semitone] = {}
13160
+
13161
+ node = node[semitone]
13162
+
13163
+ node.setdefault(-1, []).append(chord)
13164
+
13165
+ return tree
13166
+
13167
+ ###################################################################################
13168
+
13169
+ def get_chords_with_prefix(nested_chords_tree, prefix):
13170
+
13171
+ node = nested_chords_tree
13172
+
13173
+ for semitone in prefix:
13174
+ if semitone in node:
13175
+ node = node[semitone]
13176
+
13177
+ else:
13178
+ return []
13179
+
13180
+ collected_chords = []
13181
+
13182
+ def recursive_collect(subnode):
13183
+ if -1 in subnode:
13184
+ collected_chords.extend(subnode[-1])
13185
+
13186
+ for key, child in subnode.items():
13187
+ if key != -1:
13188
+ recursive_collect(child)
13189
+
13190
+ recursive_collect(node)
13191
+
13192
+ return collected_chords
13193
+
13194
+ ###################################################################################
13195
+
13196
+ def get_chords_by_semitones(chords_list, chord_semitones):
13197
+
13198
+ query_set = set(chord_semitones)
13199
+ results = []
13200
+
13201
+ for chord in chords_list:
13202
+
13203
+ chord_set = set(chord)
13204
+
13205
+ if query_set.issubset(chord_set):
13206
+ results.append(sorted(set(chord)))
13207
+
13208
+ return results
13209
+
13210
+ ###################################################################################
13211
+
13212
+ def remove_duplicate_pitches_from_escore_notes(escore_notes,
13213
+ pitches_idx=4,
13214
+ patches_idx=6,
13215
+ return_dupes_count=False
13216
+ ):
13217
+
13218
+ cscore = chordify_score([1000, escore_notes])
13219
+
13220
+ new_escore = []
13221
+
13222
+ bp_count = 0
13223
+
13224
+ for c in cscore:
13225
+
13226
+ cho = []
13227
+ seen = []
13228
+
13229
+ for cc in c:
13230
+ if [cc[pitches_idx], cc[patches_idx]] not in seen:
13231
+ cho.append(cc)
13232
+ seen.append([cc[pitches_idx], cc[patches_idx]])
13233
+
13234
+ else:
13235
+ bp_count += 1
13236
+
13237
+ new_escore.extend(cho)
13238
+
13239
+ if return_dupes_count:
13240
+ return bp_count
13241
+
13242
+ else:
13243
+ return new_escore
13244
+
13245
+ ###################################################################################
13246
+
13247
+ def chunks_shuffle(lst,
13248
+ min_len=1,
13249
+ max_len=3,
13250
+ seed=None
13251
+ ):
13252
+
13253
+ rnd = random.Random(seed)
13254
+ chunks = []
13255
+ i, n = 0, len(lst)
13256
+
13257
+ while i < n:
13258
+ size = rnd.randint(min_len, max_len)
13259
+ size = min(size, n - i)
13260
+ chunks.append(lst[i : i + size])
13261
+ i += size
13262
+
13263
+ rnd.shuffle(chunks)
13264
+
13265
+ flattened = []
13266
+ for chunk in chunks:
13267
+ flattened.extend(chunk)
13268
+
13269
+ return flattened
13270
+
13271
+ ###################################################################################
13272
+
13273
+ def convert_bytes_in_nested_list(lst,
13274
+ encoding='utf-8',
13275
+ errors='ignore',
13276
+ return_changed_events_count=False
13277
+ ):
13278
+
13279
+ new_list = []
13280
+
13281
+ ce_count = 0
13282
+
13283
+ for item in lst:
13284
+ if isinstance(item, list):
13285
+ new_list.append(convert_bytes_in_nested_list(item))
13286
+
13287
+ elif isinstance(item, bytes):
13288
+ new_list.append(item.decode(encoding, errors=errors))
13289
+ ce_count += 1
13290
+
13291
+ else:
13292
+ new_list.append(item)
13293
+
13294
+ if return_changed_events_count:
13295
+ return new_list, ce_count
13296
+
13297
+ else:
13298
+ return new_list
13299
+
13300
+ ###################################################################################
13301
+
13302
+ def find_deepest_midi_dirs(roots,
13303
+ marker_file="midi_score.mid",
13304
+ suffixes=None,
13305
+ randomize=False,
13306
+ seed=None,
13307
+ verbose=False
13308
+ ):
13309
+
13310
+ try:
13311
+ iter(roots)
13312
+ if isinstance(roots, (str, Path)):
13313
+ root_list = [roots]
13314
+ else:
13315
+ root_list = list(roots)
13316
+
13317
+ except TypeError:
13318
+ root_list = [roots]
13319
+
13320
+ if isinstance(marker_file, (list, tuple)):
13321
+ patterns = [p.lower() for p in marker_file if p]
13322
+
13323
+ else:
13324
+ patterns = [marker_file.lower()] if marker_file else []
13325
+
13326
+ allowed = {s.lower() for s in (suffixes or ['.mid', '.midi', '.kar'])}
13327
+
13328
+ if verbose:
13329
+ print("Settings:")
13330
+ print(" Roots:", [str(r) for r in root_list])
13331
+ print(" Marker patterns:", patterns or "<no marker filter>")
13332
+ print(" Allowed suffixes:", allowed)
13333
+ print(f" Randomize={randomize}, Seed={seed}")
13334
+
13335
+ results = defaultdict(list)
13336
+ rng = random.Random(seed)
13337
+
13338
+ for root in root_list:
13339
+
13340
+ root_path = Path(root)
13341
+
13342
+ if not root_path.is_dir():
13343
+ print(f"Warning: '{root_path}' is not a valid directory, skipping.")
13344
+ continue
13345
+
13346
+ if verbose:
13347
+ print(f"\nScanning root: {str(root_path)}")
13348
+
13349
+ all_dirs = list(root_path.rglob("*"))
13350
+ dirs_iter = tqdm.tqdm(all_dirs, desc=f"Dirs in {root_path.name}", disable=not verbose)
13351
+
13352
+ for dirpath in dirs_iter:
13353
+ if not dirpath.is_dir():
13354
+ continue
13355
+
13356
+ children = list(dirpath.iterdir())
13357
+ if any(child.is_dir() for child in children):
13358
+ if verbose:
13359
+ print(f"Skipping non-leaf: {str(dirpath)}")
13360
+ continue
13361
+
13362
+ files = [f for f in children if f.is_file()]
13363
+ names = [f.name.lower() for f in files]
13364
+
13365
+ if patterns:
13366
+ matched = any(fnmatch(name, pat) for name in names for pat in patterns)
13367
+ if not matched:
13368
+ if verbose:
13369
+ print(f"No marker in: {str(dirpath)}")
13370
+ continue
13371
+
13372
+ if verbose:
13373
+ print(f"Marker found in: {str(dirpath)}")
13374
+
13375
+ else:
13376
+ if verbose:
13377
+ print(f"Including leaf (no marker): {str(dirpath)}")
13378
+
13379
+ for f in files:
13380
+ if f.suffix.lower() in allowed:
13381
+ results[str(dirpath)].append(str(f))
13382
+
13383
+ if verbose:
13384
+ print(f" Collected: {f.name}")
13385
+
13386
+ all_leaves = list(results.keys())
13387
+ if randomize:
13388
+ if verbose:
13389
+ print("\nShuffling leaf directories")
13390
+
13391
+ rng.shuffle(all_leaves)
13392
+
13393
+ else:
13394
+ all_leaves.sort()
13395
+
13396
+ final_dict = {}
13397
+
13398
+ for leaf in all_leaves:
13399
+ file_list = results[leaf][:]
13400
+ if randomize:
13401
+ if verbose:
13402
+ print(f"Shuffling files in: {leaf}")
13403
+
13404
+ rng.shuffle(file_list)
13405
+
13406
+ else:
13407
+ file_list.sort()
13408
+
13409
+ final_dict[leaf] = file_list
13410
+
13411
+ if verbose:
13412
+ print("\nScan complete. Found directories:")
13413
+ for d, fl in final_dict.items():
13414
+ print(f" {d} -> {len(fl)} files")
13415
+
13416
+ return final_dict
13417
+
13418
+ ###################################################################################
13419
+
13420
+ print('Module loaded!')
13421
+ print('=' * 70)
13422
+ print('Enjoy! :)')
13423
+ print('=' * 70)
13424
+
13425
+ ###################################################################################
13426
+ # This is the end of the TMIDI X Python module
13427
  ###################################################################################