DataBassist commited on
Commit
dae5b5d
โ€ข
1 Parent(s): 9551e0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -101
app.py CHANGED
@@ -1,137 +1,98 @@
1
- import os # ํŒŒ์ผ ๋ฐ ๋””๋ ‰ํ† ๋ฆฌ ์ž‘์—…์„ ์œ„ํ•œ ๋ชจ๋“ˆ
2
- import torch # ๋”ฅ๋Ÿฌ๋‹ ํ”„๋ ˆ์ž„์›Œํฌ PyTorch
3
- import librosa # ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ๋ชจ๋“ˆ
4
- import binascii # ์ด์ง„ ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
5
- import warnings # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋Š” ๋ชจ๋“ˆ
6
- import midi2audio # MIDI ํŒŒ์ผ์„ WAV ํŒŒ์ผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๋ชจ๋“ˆ
7
- import numpy as np # ๋‹ค์ฐจ์› ๋ฐฐ์—ด์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
8
- import pytube as pt # YouTube ๋™์˜์ƒ์„ ๋‹ค์šด๋กœ๋“œํ•˜๋Š” ๋ชจ๋“ˆ
9
- import gradio as gr # ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒํ•œ UI๋ฅผ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ ๋ชจ๋“ˆ
10
- import soundfile as sf # ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
11
- from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor # Pop2Piano ๋ชจ๋ธ๊ณผ ์ „์ฒ˜๋ฆฌ๊ธฐ
12
 
13
- yt_video_dir = "./yt_dir" # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
14
- outputs_dir = "./midi_wav_outputs" # MIDI ๋ฐ WAV ํŒŒ์ผ ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
15
- os.makedirs(outputs_dir, exist_ok=True) # ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
16
- os.makedirs(yt_video_dir, exist_ok=True) # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
 
17
 
18
- device = "cuda" if torch.cuda.is_available() else "cpu" # CUDA๊ฐ€ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ GPU๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ๊ทธ๋ ‡์ง€ ์•Š์€ ๊ฒฝ์šฐ CPU๋ฅผ ์‚ฌ์šฉ
19
- model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device) # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ๋ชจ๋ธ ๋กœ๋“œ
20
- processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano") # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ์ „์ฒ˜๋ฆฌ๊ธฐ ๋กœ๋“œ
21
- composers = model.generation_config.composer_to_feature_token.keys() # ์ž‘๊ณก๊ฐ€ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ
 
22
 
 
23
  def get_audio_from_yt_video(yt_link):
24
  try:
25
- yt = pt.YouTube(yt_link) # YouTube ๋™์˜์ƒ ๊ฐ์ฒด ์ƒ์„ฑ
26
- t = yt.streams.filter(only_audio=True) # ์˜ค๋””์˜ค ์ŠคํŠธ๋ฆผ ํ•„ํ„ฐ๋ง
27
- filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4") # ๋žœ๋ค ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
28
- t[0].download(filename=filename) # ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ
29
  except:
30
- warnings.warn(f"Video Not Found at {yt_link}") # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
31
  filename = None
32
-
33
  return filename, filename
34
-
35
- def inference(file_uploaded, composer):
36
- waveform, sr = librosa.load(file_uploaded, sr=None) # ํŒŒ์ผ์—์„œ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ์™€ ์ƒ˜ํ”Œ๋ง ์ฃผํŒŒ์ˆ˜ ๋กœ๋“œ
37
-
38
- inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ
39
- model_output = model.generate(input_features=inputs["input_features"], composer=composer) # ๋ชจ๋ธ์— ์ž…๋ ฅํ•˜์—ฌ ์ถœ๋ ฅ ์ƒ์„ฑ
40
- tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ํ† ํฐ ๋””์ฝ”๋”ฉ
41
-
42
- return prepare_output_file(tokenizer_output, sr) # ์ถœ๋ ฅ ํŒŒ์ผ ์ค€๋น„ ํ•จ์ˆ˜ ํ˜ธ์ถœ
43
 
44
- def prepare_output_file(tokenizer_output, sr):
45
- output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode() # ๋žœ๋ค ์ถœ๋ ฅ ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
46
- midi_output = os.path.join(outputs_dir, output_file_name + ".mid") # MIDI ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
47
-
48
- tokenizer_output[0].write(midi_output) # MIDI ํŒŒ์ผ ์ž‘์„ฑ
49
-
50
- wav_output = midi_output.replace(".mid", ".wav") # WAV ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
51
- midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDI๋ฅผ WAV๋กœ ๋ณ€ํ™˜
52
-
53
- return wav_output, wav_output, midi_output # WAV ๋ฐ MIDI ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
54
 
55
- def get_stereo(pop_path, midi, pop_scale=0.5):
56
- pop_y, sr = librosa.load(pop_path, sr=None) # ํŒ ์Œ์•… ํŒŒ์ผ ๋กœ๋“œ
57
- midi_y, _ = librosa.load(midi.name, sr=None) # MIDI ํŒŒ์ผ ๋กœ๋“œ
58
 
59
- if len(pop_y) > len(midi_y):
60
- midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y))) # MIDI ๊ธธ์ด๋ฅผ ํŒ ์Œ์•… ๊ธธ์ด์— ๋งž์ถค
61
- elif len(pop_y) < len(midi_y):
62
- pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y))) # ํŒ ์Œ์•… ๊ธธ์ด๋ฅผ MIDI ๊ธธ์ด์— ๋งž์ถค
63
- stereo = np.stack((midi_y, pop_y * pop_scale)) # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ์ƒ์„ฑ
64
-
65
- stereo_mix_path = pop_path.replace("output", "output_stereo_mix") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ
66
- sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ์ž‘์„ฑ
67
-
68
- return stereo_mix_path, stereo_mix_path # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
69
 
70
- block = gr.Blocks("Taithrah/Minimal") # Gradio ๋ธ”๋ก ์ƒ์„ฑ
 
71
 
72
  with block:
73
  gr.HTML(
74
  """
75
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
76
- <div
77
- style="
78
- display: inline-flex;
79
- align-items: center;
80
- gap: 0.8rem;
81
- font-size: 1.75rem;
82
- "
83
- >
84
- <h1 style="font-weight: 900; margin-bottom: 12px;">
85
- ๐ŸŽน Pop2Piano : ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ์ƒ์„ฑ๊ธฐ ๐ŸŽน
86
- </h1>
87
- </div>
88
  <p style="margin-bottom: 12px; font-size: 90%">
89
- A demo for Pop2Piano: Pop Audio-based Piano Cover Generation. <br>
90
- Please select the composer (Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
91
  </p>
92
  </div>
93
  """
94
  )
95
  with gr.Group():
96
- with gr.Row(equal_height=True):
97
  with gr.Column():
98
  file_uploaded = gr.Audio(label="์˜ค๋””์˜ค ์—…๋กœ๋“œ", type="filepath")
99
  with gr.Column():
100
  with gr.Row():
101
  yt_link = gr.Textbox(label="์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.", autofocus=True, lines=3)
102
  yt_btn = gr.Button("์œ ํŠœ๋ธŒ ๋งํฌ์—์„œ ์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด ๋ฐ›์Šต๋‹ˆ๋‹ค.", size="lg")
103
-
104
- yt_audio_path = gr.Audio(label="์œ ํŠœ๋ธŒ ๋™์˜์ƒ์—์„œ ์ถ”์ถœํ•œ ์˜ค๋””์˜ค", interactive=False)
105
- yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
106
 
107
  with gr.Group():
108
  with gr.Column():
109
- composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
110
  generate_btn = gr.Button("๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋งŒ๋“ค๊ธฐ๐ŸŽน๐ŸŽต")
111
-
112
-
113
-
114
- with gr.Row().style(mobile_collapse=False, equal_height=True):
115
  wav_output2 = gr.File(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก์„ ๋‹ค์šด๋กœ๋“œ (.wav)")
116
  wav_output1 = gr.Audio(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋“ฃ๊ธฐ")
117
- midi_output = gr.File(label="์ƒ์„ฑํ•œ MIDI ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ (.mid)")
118
- generate_btn.click(inference,
119
- inputs=[file_uploaded, composer],
120
- outputs=[wav_output1, wav_output2, midi_output])
121
-
122
-
123
-
124
-
125
-
126
- gr.HTML(
127
- """
128
- <div class="footer">
129
- <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
130
- <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
131
- <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
132
- </p>
133
- </div>
134
- """
135
- )
136
 
137
- block.launch(debug=False)
 
1
+ import os
2
+ import torch
3
+ import librosa
4
+ import binascii
5
+ import warnings
6
+ import midi2audio # MIDI ํŒŒ์ผ์„ WAV ํŒŒ์ผ๋กœ ๋ณ€ํ™˜
7
+ import numpy as np
8
+ import pytube as pt # YouTube ๋น„๋””์˜ค๋ฅผ ์˜ค๋””์˜ค๋กœ ๋‹ค์šด๋กœ๋“œ
9
+ import gradio as gr
10
+ import soundfile as sf
11
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
12
 
13
+ # ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
14
+ yt_video_dir = "./yt_dir" # ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค ๋‹ค์šด๋กœ๋“œ ๊ฒฝ๋กœ
15
+ outputs_dir = "./midi_wav_outputs" # ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
16
+ os.makedirs(outputs_dir, exist_ok=True)
17
+ os.makedirs(yt_video_dir, exist_ok=True)
18
 
19
+ # ๋ชจ๋ธ ์„ค์ •
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
22
+ processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
23
+ composers = model.generation_config.composer_to_feature_token.keys()
24
 
25
+ # ์œ ํŠœ๋ธŒ ๋น„๋””์˜ค์—์„œ ์˜ค๋””์˜ค ์ถ”์ถœ ํ•จ์ˆ˜
26
  def get_audio_from_yt_video(yt_link):
27
  try:
28
+ yt = pt.YouTube(yt_link)
29
+ t = yt.streams.filter(only_audio=True)
30
+ filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
31
+ t[0].download(filename=filename)
32
  except:
33
+ warnings.warn(f"Video Not Found at {yt_link}")
34
  filename = None
35
+
36
  return filename, filename
 
 
 
 
 
 
 
 
 
37
 
38
+ # ๋ชจ๋ธ ์ถ”๋ก  ํ•จ์ˆ˜
39
+ def inference(file_uploaded, composer):
40
+ waveform, sr = librosa.load(file_uploaded, sr=None)
41
+ inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
42
+ model_output = model.generate(input_features=inputs["input_features"], composer=composer)
43
+ tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
 
 
 
 
44
 
45
+ return prepare_output_file(tokenizer_output, sr)
 
 
46
 
47
+ # ์ถœ๋ ฅ ํŒŒ์ผ ์ค€๋น„ ํ•จ์ˆ˜
48
+ def prepare_output_file(tokenizer_output, sr):
49
+ output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
50
+ midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
51
+ tokenizer_output[0].write(midi_output)
52
+ wav_output = midi_output.replace(".mid", ".wav")
53
+ midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
54
+
55
+ return wav_output, wav_output, midi_output
 
56
 
57
+ # Gradio UI ์„ค์ •
58
+ block = gr.Blocks(theme="Taithrah/Minimal")
59
 
60
  with block:
61
  gr.HTML(
62
  """
63
  <div style="text-align: center; max-width: 800px; margin: 0 auto;">
64
+ <h1 style="font-weight: 900; margin-bottom: 12px;">
65
+ ๐ŸŽน Pop2Piano : ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ์ƒ์„ฑ๊ธฐ ๐ŸŽน
66
+ </h1>
 
 
 
 
 
 
 
 
 
67
  <p style="margin-bottom: 12px; font-size: 90%">
68
+ Pop2Piano ๋ฐ๋ชจ: ํŒ ์˜ค๋””์˜ค ๊ธฐ๋ฐ˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ์ƒ์„ฑ. <br>
69
+ ์ž‘๊ณก๊ฐ€(ํŽธ๊ณก์ž)๋ฅผ ์„ ํƒํ•˜๊ณ  ํŒ ์˜ค๋””์˜ค๋ฅผ ์—…๋กœ๋“œํ•˜๊ฑฐ๋‚˜ ์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•œ ํ›„ ์ƒ์„ฑ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜์„ธ์š”.
70
  </p>
71
  </div>
72
  """
73
  )
74
  with gr.Group():
75
+ with gr.Row():
76
  with gr.Column():
77
  file_uploaded = gr.Audio(label="์˜ค๋””์˜ค ์—…๋กœ๋“œ", type="filepath")
78
  with gr.Column():
79
  with gr.Row():
80
  yt_link = gr.Textbox(label="์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.", autofocus=True, lines=3)
81
  yt_btn = gr.Button("์œ ํŠœ๋ธŒ ๋งํฌ์—์„œ ์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด ๋ฐ›์Šต๋‹ˆ๋‹ค.", size="lg")
82
+ yt_audio_path = gr.Audio(label="์œ ํŠœ๋ธŒ ๋™์˜์ƒ์—์„œ ์ถ”์ถœํ•œ ์˜ค๋””์˜ค", interactive=False)
83
+ yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
 
84
 
85
  with gr.Group():
86
  with gr.Column():
87
+ composer = gr.Dropdown(label="ํŽธ๊ณก์ž", choices=composers, value="composer1")
88
  generate_btn = gr.Button("๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋งŒ๋“ค๊ธฐ๐ŸŽน๐ŸŽต")
89
+ with gr.Row():
 
 
 
90
  wav_output2 = gr.File(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก์„ ๋‹ค์šด๋กœ๋“œ (.wav)")
91
  wav_output1 = gr.Audio(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋“ฃ๊ธฐ")
92
+ midi_output = gr.File(label="์ƒ์„ฑํ•œ midi ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ (.mid)")
93
+ generate_btn.click(
94
+ inference,
95
+ inputs=[file_uploaded, composer],
96
+ outputs=[wav_output1, wav_output2, midi_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ block.launch(debug=False)