File size: 35,283 Bytes
9bedde5
ac7919b
9bedde5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac7919b
9bedde5
 
 
 
 
 
 
 
ac7919b
9bedde5
 
 
 
 
 
 
ac7919b
9bedde5
 
 
 
 
ac7919b
9bedde5
 
 
ac7919b
9bedde5
 
 
 
 
 
 
 
 
ac7919b
9bedde5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac7919b
 
 
9bedde5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
"""UltraSinger uses AI to automatically create UltraStar song files"""

import copy
import getopt
import os
import sys
import re

import Levenshtein
import librosa

from tqdm import tqdm
from packaging import version

import soundfile as sf

from modules import os_helper
from modules.Audio.denoise import ffmpeg_reduce_noise
from modules.Audio.separation import separate_audio
from modules.Audio.vocal_chunks import (
    export_chunks_from_transcribed_data,
    export_chunks_from_ultrastar_data,
)
from modules.Audio.silence_processing import remove_silence_from_transcription_data, get_silence_sections
from modules.csv_handler import export_transcribed_data_to_csv
from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_wav_to_mp3
from modules.Audio.youtube import (
    download_youtube_audio,
    download_youtube_thumbnail,
    download_youtube_video,
    get_youtube_title,
)
from modules.DeviceDetection.device_detection import check_gpu_support
from modules.console_colors import (
    ULTRASINGER_HEAD,
    blue_highlighted,
    gold_highlighted,
    light_blue_highlighted,
    red_highlighted,
)
from modules.Midi import midi_creator
from modules.Midi.midi_creator import (
    convert_frequencies_to_notes,
    create_midi_notes_from_pitched_data,
    most_frequent,
)
from modules.Pitcher.pitcher import (
    get_frequencies_with_high_confidence,
    get_pitch_with_crepe_file,
)
from modules.Pitcher.pitched_data import PitchedData
from modules.Speech_Recognition.hyphenation import hyphenation, language_check, create_hyphenator
from modules.Speech_Recognition.Whisper import transcribe_with_whisper
from modules.Ultrastar import ultrastar_score_calculator, ultrastar_writer, ultrastar_converter, ultrastar_parser
from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
from Settings import Settings
from modules.Speech_Recognition.TranscribedData import TranscribedData
from modules.plot import plot, plot_spectrogram
from modules.musicbrainz_client import get_music_infos

settings = Settings()


def convert_midi_notes_to_ultrastar_notes(midi_notes: list[str]) -> list[int]:
    """Convert midi notes to ultrastar notes"""
    print(f"{ULTRASINGER_HEAD} Creating Ultrastar notes from midi data")

    ultrastar_note_numbers = []
    for i in enumerate(midi_notes):
        pos = i[0]
        note_number_librosa = librosa.note_to_midi(midi_notes[pos])
        pitch = ultrastar_converter.midi_note_to_ultrastar_note(
            note_number_librosa
        )
        ultrastar_note_numbers.append(pitch)
        # todo: Progress?
        # print(
        #    f"Note: {midi_notes[i]} midi_note: {str(note_number_librosa)} pitch: {str(pitch)}"
        # )
    return ultrastar_note_numbers


def pitch_each_chunk_with_crepe(directory: str) -> list[str]:
    """Pitch each chunk with crepe and return midi notes"""
    print(
        f"{ULTRASINGER_HEAD} Pitching each chunk with {blue_highlighted('crepe')}"
    )

    midi_notes = []
    for filename in sorted(
        [f for f in os.listdir(directory) if f.endswith(".wav")],
        key=lambda x: int(x.split("_")[1]),
    ):
        filepath = os.path.join(directory, filename)
        # todo: stepsize = duration? then when shorter than "it" it should take the duration. Otherwise there a more notes
        pitched_data = get_pitch_with_crepe_file(
            filepath,
            settings.crepe_model_capacity,
            settings.crepe_step_size,
            settings.tensorflow_device,
        )
        conf_f = get_frequencies_with_high_confidence(
            pitched_data.frequencies, pitched_data.confidence
        )

        notes = convert_frequencies_to_notes(conf_f)
        note = most_frequent(notes)[0][0]

        midi_notes.append(note)
        # todo: Progress?
        # print(filename + " f: " + str(mean))

    return midi_notes


def add_hyphen_to_data(transcribed_data: list[TranscribedData], hyphen_words: list[list[str]]):
    """Add hyphen to transcribed data return new data list"""
    new_data = []

    for i, data in enumerate(transcribed_data):
        if not hyphen_words[i]:
            new_data.append(data)
        else:
            chunk_duration = data.end - data.start
            chunk_duration = chunk_duration / (len(hyphen_words[i]))

            next_start = data.start
            for j in enumerate(hyphen_words[i]):
                hyphenated_word_index = j[0]
                dup = copy.copy(data)
                dup.start = next_start
                next_start = data.end - chunk_duration * (
                    len(hyphen_words[i]) - 1 - hyphenated_word_index
                )
                dup.end = next_start
                dup.word = hyphen_words[i][hyphenated_word_index]
                dup.is_hyphen = True
                if hyphenated_word_index == len(hyphen_words[i]) - 1:
                    dup.is_word_end = True
                else:
                    dup.is_word_end = False
                new_data.append(dup)

    return new_data


def get_bpm_from_data(data, sampling_rate):
    """Get real bpm from audio data"""
    onset_env = librosa.onset.onset_strength(y=data, sr=sampling_rate)
    wav_tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sampling_rate)

    print(
        f"{ULTRASINGER_HEAD} BPM is {blue_highlighted(str(round(wav_tempo[0], 2)))}"
    )
    return wav_tempo[0]


def get_bpm_from_file(wav_file: str) -> float:
    """Get real bpm from audio file"""
    data, sampling_rate = librosa.load(wav_file, sr=None)
    return get_bpm_from_data(data, sampling_rate)


def correct_words(recognized_words, word_list_file):
    """Docstring"""
    with open(word_list_file, "r", encoding="utf-8") as file:
        text = file.read()
    word_list = text.split()

    for i, rec_word in enumerate(recognized_words):
        if rec_word.word in word_list:
            continue

        closest_word = min(
            word_list, key=lambda x: Levenshtein.distance(rec_word.word, x)
        )
        print(recognized_words[i].word + " - " + closest_word)
        recognized_words[i].word = closest_word
    return recognized_words


def print_help() -> None:
    """Print help text"""
    help_string = """
    UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]
    
    [opt]
    -h      This help text.
    -i      Ultrastar.txt
            audio like .mp3, .wav, youtube link
    -o      Output folder
    
    [mode]
    ## INPUT is audio ##
    default  Creates all
    
    # Single file creation selection is in progress, you currently getting all!
    (-u      Create ultrastar txt file) # In Progress
    (-m      Create midi file) # In Progress
    (-s      Create sheet file) # In Progress
    
    ## INPUT is ultrastar.txt ##
    default  Creates all

    # Single selection is in progress, you currently getting all!
    (-r      repitch Ultrastar.txt (input has to be audio)) # In Progress
    (-p      Check pitch of Ultrastar.txt input) # In Progress
    (-m      Create midi file) # In Progress

    [transcription]
    # Default is whisper
    --whisper               Multilingual model > tiny|base|small|medium|large-v1|large-v2  >> ((default) is large-v2
                            English-only model > tiny.en|base.en|small.en|medium.en
    --whisper_align_model   Use other languages model for Whisper provided from huggingface.co
    --language              Override the language detected by whisper, does not affect transcription but steps after transcription
    --whisper_batch_size    Reduce if low on GPU mem >> ((default) is 16)
    --whisper_compute_type  Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu)
    
    [pitcher]
    # Default is crepe
    --crepe            tiny|full >> ((default) is full)
    --crepe_step_size  unit is miliseconds >> ((default) is 10)
    
    [extra]
    --hyphenation           True|False >> ((default) is True)
    --disable_separation    True|False >> ((default) is False)
    --disable_karaoke       True|False >> ((default) is False)
    --create_audio_chunks   True|False >> ((default) is False)
    --keep_cache            True|False >> ((default) is False)
    --plot                  True|False >> ((default) is False)
    --format_version        0.3.0|1.0.0|1.1.0 >> ((default) is 1.0.0)
    
    [device]
    --force_cpu             True|False >> ((default) is False)  All steps will be forced to cpu
    --force_whisper_cpu     True|False >> ((default) is False)  Only whisper will be forced to cpu
    --force_crepe_cpu       True|False >> ((default) is False)  Only crepe will be forced to cpu
    """
    print(help_string)


def remove_unecessary_punctuations(transcribed_data: list[TranscribedData]) -> None:
    """Remove unecessary punctuations from transcribed data"""
    punctuation = ".,"
    for i, data in enumerate(transcribed_data):
        data.word = data.word.translate(
            {ord(i): None for i in punctuation}
        )


def hyphenate_each_word(language: str, transcribed_data: list[TranscribedData]) -> list[list[str]] | None:
    """Hyphenate each word in the transcribed data."""
    lang_region = language_check(language)
    if lang_region is None:
        print(
            f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}"
        )
        return None

    hyphenated_word = []
    try:
        hyphenator = create_hyphenator(lang_region)
        for i in tqdm(enumerate(transcribed_data)):
            pos = i[0]
            hyphenated_word.append(
                hyphenation(transcribed_data[pos].word, hyphenator)
            )
    except:
        print(f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}")
        return None

    return hyphenated_word


def print_support() -> None:
    """Print support text"""
    print()
    print(
        f"{ULTRASINGER_HEAD} {gold_highlighted('Do you like UltraSinger? Want it to be even better? Then help with your')} {light_blue_highlighted('support')}{gold_highlighted('!')}"
    )
    print(
        f"{ULTRASINGER_HEAD} See project page -> https://github.com/rakuri255/UltraSinger"
    )
    print(
        f"{ULTRASINGER_HEAD} {gold_highlighted('This will help a lot to keep this project alive and improved.')}"
    )

def print_version() -> None:
    """Print version text"""
    print()
    print(
        f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}"
    )
    print(
        f"{ULTRASINGER_HEAD} {gold_highlighted('UltraSinger Version:')} {light_blue_highlighted(settings.APP_VERSION)}"
    )
    print(
        f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}"
    )

def run() -> None:
    """The processing function of this program"""
    is_audio = ".txt" not in settings.input_file_path
    ultrastar_class = None
    real_bpm = None
    (title, artist, year, genre) = (None, None, None, None)

    if not is_audio:  # Parse Ultrastar txt
        print(
            f"{ULTRASINGER_HEAD} {gold_highlighted('re-pitch mode')}"
        )
        (
            basename_without_ext,
            real_bpm,
            song_output,
            ultrastar_audio_input_path,
            ultrastar_class,
        ) = parse_ultrastar_txt()
    elif settings.input_file_path.startswith("https:"):  # Youtube
        print(
            f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}"
        )
        (
            basename_without_ext,
            song_output,
            ultrastar_audio_input_path,
            (title, artist, year, genre)
        ) = download_from_youtube()
    else:  # Audio File
        print(
            f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}"
        )
        (
            basename_without_ext,
            song_output,
            ultrastar_audio_input_path,
            (title, artist, year, genre)
        ) = infos_from_audio_input_file()

    cache_path = os.path.join(song_output, "cache")
    settings.processing_audio_path = os.path.join(
        cache_path, basename_without_ext + ".wav"
    )
    os_helper.create_folder(cache_path)

    # Separate vocal from audio
    audio_separation_path = separate_vocal_from_audio(
        basename_without_ext, cache_path, ultrastar_audio_input_path
    )
    vocals_path = os.path.join(audio_separation_path, "vocals.wav")
    instrumental_path = os.path.join(audio_separation_path, "no_vocals.wav")

    # Move instrumental and vocals
    if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"):
        karaoke_output_path = os.path.join(song_output, basename_without_ext + " [Karaoke].mp3")
        convert_wav_to_mp3(instrumental_path, karaoke_output_path)

    if version.parse(settings.format_version) >= version.parse("1.1.0"):
        instrumental_output_path = os.path.join(song_output, basename_without_ext + " [Instrumental].mp3")
        convert_wav_to_mp3(instrumental_path, instrumental_output_path)
        vocals_output_path = os.path.join(song_output, basename_without_ext + " [Vocals].mp3")
        convert_wav_to_mp3(vocals_path, vocals_output_path)

    if settings.use_separated_vocal:
        input_path = vocals_path
    else:
        input_path = ultrastar_audio_input_path

    # Denoise vocal audio
    denoised_output_path = os.path.join(
        cache_path, basename_without_ext + "_denoised.wav"
    )
    denoise_vocal_audio(input_path, denoised_output_path)

    # Convert to mono audio
    mono_output_path = os.path.join(
        cache_path, basename_without_ext + "_mono.wav"
    )
    convert_audio_to_mono_wav(denoised_output_path, mono_output_path)

    # Mute silence sections
    mute_output_path = os.path.join(
        cache_path, basename_without_ext + "_mute.wav"
    )
    mute_no_singing_parts(mono_output_path, mute_output_path)

    # Define the audio file to process
    settings.processing_audio_path = mute_output_path

    # Audio transcription
    transcribed_data = None
    language = settings.language
    if is_audio:
        detected_language, transcribed_data = transcribe_audio()
        if language is None:
            language = detected_language

        remove_unecessary_punctuations(transcribed_data)

        if settings.hyphenation:
            hyphen_words = hyphenate_each_word(language, transcribed_data)
            if hyphen_words is not None:
                transcribed_data = add_hyphen_to_data(transcribed_data, hyphen_words)

        transcribed_data = remove_silence_from_transcription_data(
            settings.processing_audio_path, transcribed_data
        )

        # todo: do we need to correct words?
        # lyric = 'input/faber_lyric.txt'
        # --corrected_words = correct_words(vosk_speech, lyric)

    # Create audio chunks
    if settings.create_audio_chunks:
        create_audio_chunks(
            cache_path,
            is_audio,
            transcribed_data,
            ultrastar_audio_input_path,
            ultrastar_class,
        )

    # Pitch the audio
    midi_notes, pitched_data, ultrastar_note_numbers = pitch_audio(
        is_audio, transcribed_data, ultrastar_class
    )

    # Create plot
    if settings.create_plot:
        vocals_path = os.path.join(audio_separation_path, "vocals.wav")
        plot_spectrogram(vocals_path, song_output, "vocals.wav")
        plot_spectrogram(settings.processing_audio_path, song_output, "processing audio")
        plot(pitched_data, song_output, transcribed_data, ultrastar_class, midi_notes)

    # Write Ultrastar txt
    if is_audio:
        real_bpm, ultrastar_file_output = create_ultrastar_txt_from_automation(
            basename_without_ext,
            song_output,
            transcribed_data,
            ultrastar_audio_input_path,
            ultrastar_note_numbers,
            language,
            title,
            artist,
            year,
            genre
        )
    else:
        ultrastar_file_output = create_ultrastar_txt_from_ultrastar_data(
            song_output, ultrastar_class, ultrastar_note_numbers
        )

    # Calc Points
    ultrastar_class, simple_score, accurate_score = calculate_score_points(
        is_audio, pitched_data, ultrastar_class, ultrastar_file_output
    )

    # Add calculated score to Ultrastar txt #Todo: Missing Karaoke
    ultrastar_writer.add_score_to_ultrastar_txt(
        ultrastar_file_output, simple_score
    )

    # Midi
    if settings.create_midi:
        create_midi_file(real_bpm, song_output, ultrastar_class, basename_without_ext)

    # Cleanup
    if not settings.keep_cache:
        remove_cache_folder(cache_path)

    # Print Support
    print_support()


def mute_no_singing_parts(mono_output_path, mute_output_path):
    print(
        f"{ULTRASINGER_HEAD} Mute audio parts with no singing"
    )
    silence_sections = get_silence_sections(mono_output_path)
    y, sr = librosa.load(mono_output_path, sr=None)
    # Mute the parts of the audio with no singing
    for i in silence_sections:
        # Define the time range to mute

        start_time = i[0]  # Start time in seconds
        end_time = i[1]  # End time in seconds

        # Convert time to sample indices
        start_sample = int(start_time * sr)
        end_sample = int(end_time * sr)

        y[start_sample:end_sample] = 0
    sf.write(mute_output_path, y, sr)


def get_unused_song_output_dir(path: str) -> str:
    """Get an unused song output dir"""
    # check if dir exists and add (i) if it does
    i = 1
    if os_helper.check_if_folder_exists(path):
        path = f"{path} ({i})"
    else:
        return path

    while os_helper.check_if_folder_exists(path):
        path = path.replace(f"({i - 1})", f"({i})")
        i += 1
        if i > 999:
            print(
                f"{ULTRASINGER_HEAD} {red_highlighted('Error: Could not create output folder! (999) is the maximum number of tries.')}"
            )
            sys.exit(1)
    return path


def transcribe_audio() -> (str, list[TranscribedData]):
    """Transcribe audio with AI"""
    if settings.transcriber == "whisper":
        device = "cpu" if settings.force_whisper_cpu else settings.pytorch_device
        transcribed_data, detected_language = transcribe_with_whisper(
            settings.processing_audio_path,
            settings.whisper_model,
            device,
            settings.whisper_align_model,
            settings.whisper_batch_size,
            settings.whisper_compute_type,
            settings.language,
        )
    else:
        raise NotImplementedError
    return detected_language, transcribed_data


def separate_vocal_from_audio(
        basename_without_ext: str, cache_path: str, ultrastar_audio_input_path: str
) -> str:
    """Separate vocal from audio"""
    audio_separation_path = os.path.join(
        cache_path, "separated", "htdemucs", basename_without_ext
    )

    if settings.use_separated_vocal or settings.create_karaoke:
        separate_audio(ultrastar_audio_input_path, cache_path, settings.pytorch_device)

    return audio_separation_path

def calculate_score_points(
    is_audio: bool, pitched_data: PitchedData, ultrastar_class: UltrastarTxtValue, ultrastar_file_output: str
):
    """Calculate score points"""
    if is_audio:
        ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
            ultrastar_file_output
        )
        (
            simple_score,
            accurate_score,
        ) = ultrastar_score_calculator.calculate_score(
            pitched_data, ultrastar_class
        )
        ultrastar_score_calculator.print_score_calculation(
            simple_score, accurate_score
        )
    else:
        print(
            f"{ULTRASINGER_HEAD} {blue_highlighted('Score of original Ultrastar txt')}"
        )
        (
            simple_score,
            accurate_score,
        ) = ultrastar_score_calculator.calculate_score(
            pitched_data, ultrastar_class
        )
        ultrastar_score_calculator.print_score_calculation(
            simple_score, accurate_score
        )
        print(
            f"{ULTRASINGER_HEAD} {blue_highlighted('Score of re-pitched Ultrastar txt')}"
        )
        ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
            ultrastar_file_output
        )
        (
            simple_score,
            accurate_score,
        ) = ultrastar_score_calculator.calculate_score(
            pitched_data, ultrastar_class
        )
        ultrastar_score_calculator.print_score_calculation(
            simple_score, accurate_score
        )
    return ultrastar_class, simple_score, accurate_score


def create_ultrastar_txt_from_ultrastar_data(
    song_output: str, ultrastar_class: UltrastarTxtValue, ultrastar_note_numbers: list[int]
) -> str:
    """Create Ultrastar txt from Ultrastar data"""
    output_repitched_ultrastar = os.path.join(
        song_output, ultrastar_class.title + ".txt"
    )
    ultrastar_writer.create_repitched_txt_from_ultrastar_data(
        settings.input_file_path,
        ultrastar_note_numbers,
        output_repitched_ultrastar,
    )
    return output_repitched_ultrastar


def create_ultrastar_txt_from_automation(
    basename_without_ext: str,
    song_output: str,
    transcribed_data: list[TranscribedData],
    ultrastar_audio_input_path: str,
    ultrastar_note_numbers: list[int],
    language: str,
    title: str,
    artist: str,
    year: str,
    genre: str
):
    """Create Ultrastar txt from automation"""
    ultrastar_header = UltrastarTxtValue()
    ultrastar_header.version = settings.format_version
    ultrastar_header.title = basename_without_ext
    ultrastar_header.artist = basename_without_ext
    ultrastar_header.mp3 = basename_without_ext + ".mp3"
    ultrastar_header.audio = basename_without_ext + ".mp3"
    ultrastar_header.vocals = basename_without_ext + " [Vocals].mp3"
    ultrastar_header.instrumental = basename_without_ext + " [Instrumental].mp3"
    ultrastar_header.video = basename_without_ext + ".mp4"
    ultrastar_header.language = language
    cover = basename_without_ext + " [CO].jpg"
    ultrastar_header.cover = (
        cover
        if os_helper.check_file_exists(os.path.join(song_output, cover))
        else None
    )
    ultrastar_header.creator = f"{ultrastar_header.creator} {Settings.APP_VERSION}"
    ultrastar_header.comment = f"{ultrastar_header.comment} {Settings.APP_VERSION}"

    # Additional data
    if title is not None:
        ultrastar_header.title = title
    if artist is not None:
        ultrastar_header.artist = artist
    if year is not None:
        ultrastar_header.year = extract_year(year)
    if genre is not None:
        ultrastar_header.genre = format_separated_string(genre)

    real_bpm = get_bpm_from_file(ultrastar_audio_input_path)
    ultrastar_file_output = os.path.join(
        song_output, basename_without_ext + ".txt"
    )
    ultrastar_writer.create_ultrastar_txt_from_automation(
        transcribed_data,
        ultrastar_note_numbers,
        ultrastar_file_output,
        ultrastar_header,
        real_bpm,
    )
    if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"):
        title = basename_without_ext + " [Karaoke]"
        ultrastar_header.title = title
        ultrastar_header.mp3 = title + ".mp3"
        karaoke_output_path = os.path.join(song_output, title)
        karaoke_txt_output_path = karaoke_output_path + ".txt"
        ultrastar_writer.create_ultrastar_txt_from_automation(
            transcribed_data,
            ultrastar_note_numbers,
            karaoke_txt_output_path,
            ultrastar_header,
            real_bpm,
        )
    return real_bpm, ultrastar_file_output

def extract_year(date: str) -> str:
    match = re.search(r'\b\d{4}\b', date)
    if match:
        return match.group(0)
    else:
        return date

def format_separated_string(data: str) -> str:
    temp = re.sub(r'[;/]', ',', data)
    words = temp.split(',')
    words = [s for s in words if s.strip()]

    for i, word in enumerate(words):
        if "-" not in word:
            words[i] = word.strip().capitalize() + ', '
        else:
            dash_words = word.split('-')
            capitalized_dash_words = [dash_word.strip().capitalize() for dash_word in dash_words]
            formatted_dash_word = '-'.join(capitalized_dash_words) + ', '
            words[i] = formatted_dash_word

    formatted_string = ''.join(words)

    if formatted_string.endswith(', '):
        formatted_string = formatted_string[:-2]

    return formatted_string

def infos_from_audio_input_file() -> tuple[str, str, str, tuple[str, str, str, str]]:
    """Infos from audio input file"""
    basename = os.path.basename(settings.input_file_path)
    basename_without_ext = os.path.splitext(basename)[0]

    artist, title = None, None
    if " - " in basename_without_ext:
        artist, title = basename_without_ext.split(" - ", 1)
        search_string = f"{artist} - {title}"
    else:
        search_string = basename_without_ext

    # Get additional data for song
    (title_info, artist_info, year_info, genre_info) = get_music_infos(search_string)

    if title_info is not None:
        title = title_info
        artist = artist_info

    if artist is not None and title is not None:
        basename_without_ext = f"{artist} - {title}"
        extension = os.path.splitext(basename)[1]
        basename = f"{basename_without_ext}{extension}"

    song_output = os.path.join(settings.output_file_path, basename_without_ext)
    song_output = get_unused_song_output_dir(song_output)
    os_helper.create_folder(song_output)
    os_helper.copy(settings.input_file_path, song_output)
    os_helper.rename(os.path.join(song_output, os.path.basename(settings.input_file_path)), os.path.join(song_output, basename))
    ultrastar_audio_input_path = os.path.join(song_output, basename)
    return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info)


FILENAME_REPLACEMENTS = (('?:"', ""), ("<", "("), (">", ")"), ("/\\|*", "-"))


def sanitize_filename(fname: str) -> str:
    """Sanitize filename"""
    for old, new in FILENAME_REPLACEMENTS:
        for char in old:
            fname = fname.replace(char, new)
    if fname.endswith("."):
        fname = fname.rstrip(" .")  # Windows does not like trailing periods
    return fname


def download_from_youtube() -> tuple[str, str, str, tuple[str, str, str, str]]:
    """Download from YouTube"""
    (artist, title) = get_youtube_title(settings.input_file_path)

    # Get additional data for song
    (title_info, artist_info, year_info, genre_info) = get_music_infos(f"{artist} - {title}")

    if title_info is not None:
        title = title_info
        artist = artist_info

    basename_without_ext = sanitize_filename(f"{artist} - {title}")
    basename = basename_without_ext + ".mp3"
    song_output = os.path.join(settings.output_file_path, basename_without_ext)
    song_output = get_unused_song_output_dir(song_output)
    os_helper.create_folder(song_output)
    download_youtube_audio(
        settings.input_file_path, basename_without_ext, song_output
    )
    download_youtube_video(
        settings.input_file_path, basename_without_ext, song_output
    )
    download_youtube_thumbnail(
        settings.input_file_path, basename_without_ext, song_output
    )
    ultrastar_audio_input_path = os.path.join(song_output, basename)
    return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info)


def parse_ultrastar_txt() -> tuple[str, float, str, str, UltrastarTxtValue]:
    """Parse Ultrastar txt"""
    ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
        settings.input_file_path
    )
    real_bpm = ultrastar_converter.ultrastar_bpm_to_real_bpm(
        float(ultrastar_class.bpm.replace(",", "."))
    )
    ultrastar_mp3_name = ultrastar_class.mp3
    basename_without_ext = os.path.splitext(ultrastar_mp3_name)[0]
    dirname = os.path.dirname(settings.input_file_path)
    ultrastar_audio_input_path = os.path.join(dirname, ultrastar_mp3_name)
    song_output = os.path.join(
        settings.output_file_path,
        ultrastar_class.artist.strip() + " - " + ultrastar_class.title.strip(),
    )
    song_output = get_unused_song_output_dir(str(song_output))
    os_helper.create_folder(song_output)

    return (
        str(basename_without_ext),
        real_bpm,
        song_output,
        str(ultrastar_audio_input_path),
        ultrastar_class,
    )


def create_midi_file(real_bpm: float,
                     song_output: str,
                     ultrastar_class: UltrastarTxtValue,
                     basename_without_ext: str) -> None:
    """Create midi file"""
    print(
        f"{ULTRASINGER_HEAD} Creating Midi with {blue_highlighted('pretty_midi')}"
    )

    voice_instrument = [
        midi_creator.convert_ultrastar_to_midi_instrument(ultrastar_class)
    ]
    midi_output = os.path.join(song_output, f"{basename_without_ext}.mid")
    midi_creator.instruments_to_midi(
        voice_instrument, real_bpm, midi_output
    )


def pitch_audio(is_audio: bool, transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue) -> tuple[
    list[str], PitchedData, list[int]]:
    """Pitch audio"""
    # todo: chunk pitching as option?
    # midi_notes = pitch_each_chunk_with_crepe(chunk_folder_name)
    device = "cpu" if settings.force_crepe_cpu else settings.tensorflow_device
    pitched_data = get_pitch_with_crepe_file(
        settings.processing_audio_path,
        settings.crepe_model_capacity,
        settings.crepe_step_size,
        device,
    )
    if is_audio:
        start_times = []
        end_times = []
        for i, data in enumerate(transcribed_data):
            start_times.append(data.start)
            end_times.append(data.end)
        midi_notes = create_midi_notes_from_pitched_data(
            start_times, end_times, pitched_data
        )

    else:
        midi_notes = create_midi_notes_from_pitched_data(
            ultrastar_class.startTimes, ultrastar_class.endTimes, pitched_data
        )
    ultrastar_note_numbers = convert_midi_notes_to_ultrastar_notes(midi_notes)
    return midi_notes, pitched_data, ultrastar_note_numbers


def create_audio_chunks(
    cache_path: str,
    is_audio: bool,
    transcribed_data: list[TranscribedData],
    ultrastar_audio_input_path: str,
    ultrastar_class: UltrastarTxtValue
) -> None:
    """Create audio chunks"""
    audio_chunks_path = os.path.join(
        cache_path, settings.audio_chunk_folder_name
    )
    os_helper.create_folder(audio_chunks_path)
    if is_audio:  # and csv
        csv_filename = os.path.join(audio_chunks_path, "_chunks.csv")
        export_chunks_from_transcribed_data(
            settings.processing_audio_path, transcribed_data, audio_chunks_path
        )
        export_transcribed_data_to_csv(transcribed_data, csv_filename)
    else:
        export_chunks_from_ultrastar_data(
            ultrastar_audio_input_path, ultrastar_class, audio_chunks_path
        )

def denoise_vocal_audio(input_path: str, output_path: str) -> None:
    """Denoise vocal audio"""
    ffmpeg_reduce_noise(input_path, output_path)


def main(argv: list[str]) -> None:
    """Main function"""
    print_version()
    init_settings(argv)
    run()
    sys.exit()

def remove_cache_folder(cache_path: str) -> None:
    """Remove cache folder"""
    os_helper.remove_folder(cache_path)

def init_settings(argv: list[str]) -> None:
    """Init settings"""
    long, short = arg_options()
    opts, args = getopt.getopt(argv, short, long)
    if len(opts) == 0:
        print_help()
        sys.exit()
    for opt, arg in opts:
        if opt == "-h":
            print_help()
            sys.exit()
        elif opt in ("-i", "--ifile"):
            settings.input_file_path = arg
        elif opt in ("-o", "--ofile"):
            settings.output_file_path = arg
        elif opt in ("--whisper"):
            settings.transcriber = "whisper"
            settings.whisper_model = arg
        elif opt in ("--whisper_align_model"):
            settings.whisper_align_model = arg
        elif opt in ("--whisper_batch_size"):
            settings.whisper_batch_size = int(arg)
        elif opt in ("--whisper_compute_type"):
            settings.whisper_compute_type = arg
        elif opt in ("--language"):
            settings.language = arg
        elif opt in ("--crepe"):
            settings.crepe_model_capacity = arg
        elif opt in ("--crepe_step_size"):
            settings.crepe_step_size = int(arg)
        elif opt in ("--plot"):
            settings.create_plot = arg in ["True", "true"]
        elif opt in ("--midi"):
            settings.create_midi = arg in ["True", "true"]
        elif opt in ("--hyphenation"):
            settings.hyphenation = eval(arg.title())
        elif opt in ("--disable_separation"):
            settings.use_separated_vocal = not arg
        elif opt in ("--disable_karaoke"):
            settings.create_karaoke = not arg
        elif opt in ("--create_audio_chunks"):
            settings.create_audio_chunks = arg
        elif opt in ("--force_cpu"):
            settings.force_cpu = arg
            if settings.force_cpu:
                os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
        elif opt in ("--force_whisper_cpu"):
            settings.force_whisper_cpu = eval(arg.title())
        elif opt in ("--force_crepe_cpu"):
            settings.force_crepe_cpu = eval(arg.title())
        elif opt in ("--format_version"):
            if arg != '0.3.0' and arg != '1.0.0' and arg != '1.1.0':
                print(
                    f"{ULTRASINGER_HEAD} {red_highlighted('Error: Format version')} {blue_highlighted(arg)} {red_highlighted('is not supported.')}"
                )
                sys.exit(1)
            settings.format_version = arg
        elif opt in ("--keep_cache"):
            settings.keep_cache = arg
    if settings.output_file_path == "":
        if settings.input_file_path.startswith("https:"):
            dirname = os.getcwd()
        else:
            dirname = os.path.dirname(settings.input_file_path)
        settings.output_file_path = os.path.join(dirname, "output")

    if not settings.force_cpu:
        settings.tensorflow_device, settings.pytorch_device = check_gpu_support()


def arg_options():
    short = "hi:o:amv:"
    long = [
        "ifile=",
        "ofile=",
        "crepe=",
        "crepe_step_size=",
        "whisper=",
        "whisper_align_model=",
        "whisper_batch_size=",
        "whisper_compute_type=",
        "language=",
        "plot=",
        "midi=",
        "hyphenation=",
        "disable_separation=",
        "disable_karaoke=",
        "create_audio_chunks=",
        "force_cpu=",
        "force_whisper_cpu=",
        "force_crepe_cpu=",
        "format_version=",
        "keep_cache"
    ]
    return long, short


if __name__ == "__main__":
    main(sys.argv[1:])