sweetcocoa commited on
Commit
db4880c
1 Parent(s): 3a31819

formatting and remove midi2audio

Browse files
Files changed (2) hide show
  1. app.py +78 -43
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import os
2
- import torch
3
- import librosa
4
  import binascii
5
  import warnings
6
- import midi2audio # to convert midi to wav
 
 
7
  import numpy as np
8
- import pytube as pt # to download the youtube videos as audios
9
  import gradio as gr
10
- import soundfile as sf # to make the stereo mix
 
 
11
  from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
12
 
13
 
@@ -28,37 +30,41 @@ def get_audio_from_yt_video(yt_link):
28
  t = yt.streams.filter(only_audio=True)
29
  filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
30
  t[0].download(filename=filename)
31
- except:
32
- warnings.warn(f"Video Not Found at {yt_link}")
33
  filename = None
34
-
35
  return filename, filename
36
-
 
37
  def inference(file_uploaded, composer):
38
  # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
39
  # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
40
- waveform, sr = librosa.load(file_uploaded, sr=None)
41
-
42
  inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
43
  model_output = model.generate(input_features=inputs["input_features"], composer=composer)
44
- tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
 
 
 
 
45
 
46
- return prepare_output_file(tokenizer_output, sr)
47
 
48
- def prepare_output_file(tokenizer_output, sr):
49
  # Add some random values so that no two file names are same
50
  output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
51
  midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
52
-
53
- # write the .mid file
54
  tokenizer_output[0].write(midi_output)
55
-
56
- # convert .mid file to .wav using `midi2audio`
57
- wav_output = midi_output.replace(".mid", ".wav")
58
- midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
59
-
60
  return wav_output, wav_output, midi_output
61
 
 
62
  def get_stereo(pop_path, midi, pop_scale=0.5):
63
  pop_y, sr = librosa.load(pop_path, sr=None)
64
  midi_y, _ = librosa.load(midi.name, sr=None)
@@ -68,10 +74,15 @@ def get_stereo(pop_path, midi, pop_scale=0.5):
68
  elif len(pop_y) < len(midi_y):
69
  pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
70
  stereo = np.stack((midi_y, pop_y * pop_scale))
71
-
72
- stereo_mix_path = pop_path.replace("output", "output_stereo_mix")
73
- sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav",)
74
-
 
 
 
 
 
75
  return stereo_mix_path, stereo_mix_path
76
 
77
 
@@ -108,12 +119,20 @@ with block:
108
  file_uploaded = gr.Audio(label="Upload an audio", type="filepath")
109
  with gr.Column():
110
  with gr.Row():
111
- yt_link = gr.Textbox(label="Enter YouTube Link of the Video", autofocus=True, lines=3)
 
 
112
  yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
113
 
114
- yt_audio_path = gr.Audio(label="Audio Extracted from the YouTube Video", interactive=False)
115
- yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
116
-
 
 
 
 
 
 
117
  with gr.Group():
118
  with gr.Column():
119
  composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
@@ -123,32 +142,48 @@ with block:
123
  wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
124
  wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
125
  midi_output = gr.File(label="Download the Generated MIDI (.mid)")
126
- generate_btn.click(inference,
127
- inputs=[file_uploaded, composer],
128
- outputs=[wav_output1, wav_output2, midi_output])
129
-
 
 
130
  with gr.Group():
131
  gr.HTML(
132
  """
133
  <div> <h3> <center> Get the Stereo Mix from the Pop Music and Generated MIDI </h3> </div>
134
  """
135
  )
136
- pop_scale = gr.Slider(0, 1, value=0.5, label="Choose the ratio between Pop and MIDI", info="1.0 = Only Pop, 0.0=Only MIDI", interactive=True),
 
 
 
 
 
 
 
 
 
137
  stereo_btn = gr.Button("Get Stereo Mix")
138
  with gr.Row():
139
  stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
140
  stereo_mix2 = gr.File(label="Download the Stereo Mix")
141
-
142
- stereo_btn.click(get_stereo, inputs=[file_uploaded, wav_output2, pop_scale[0]], outputs=[stereo_mix1, stereo_mix2])
143
-
 
 
 
 
144
  with gr.Group():
145
- gr.Examples([
146
- ["./examples/custom_song.mp3", "composer1"],
147
- ],
 
148
  fn=inference,
149
  inputs=[file_uploaded, composer],
150
  outputs=[wav_output1, wav_output2, midi_output],
151
- cache_examples=True
152
  )
153
  gr.HTML(
154
  """
@@ -157,7 +192,7 @@ with block:
157
  </div>
158
  """
159
  )
160
-
161
  gr.HTML(
162
  """
163
  <div class="footer">
@@ -169,4 +204,4 @@ with block:
169
  """
170
  )
171
 
172
- block.launch(debug=False)
 
1
  import os
 
 
2
  import binascii
3
  import warnings
4
+
5
+ import torch
6
+ import librosa
7
  import numpy as np
8
+ import pytube as pt # to download the youtube videos as audios
9
  import gradio as gr
10
+ import soundfile as sf # to make the stereo mix
11
+
12
+ from pytube.exceptions import VideoUnavailable
13
  from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
14
 
15
 
 
30
  t = yt.streams.filter(only_audio=True)
31
  filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
32
  t[0].download(filename=filename)
33
+ except VideoUnavailable as e:
34
+ warnings.warn(f"Video Not Found at {yt_link} ({e})")
35
  filename = None
36
+
37
  return filename, filename
38
+
39
+
40
  def inference(file_uploaded, composer):
41
  # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
42
  # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
43
+ waveform, sr = librosa.load(file_uploaded, sr=None)
44
+
45
  inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
46
  model_output = model.generate(input_features=inputs["input_features"], composer=composer)
47
+ tokenizer_output = processor.batch_decode(
48
+ token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu")
49
+ )["pretty_midi_objects"]
50
+
51
+ return prepare_output_file(tokenizer_output, sr)
52
 
 
53
 
54
+ def prepare_output_file(tokenizer_output, sr:int):
55
  # Add some random values so that no two file names are same
56
  output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
57
  midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
58
+
59
+ # write the .mid and its wav files
60
  tokenizer_output[0].write(midi_output)
61
+ midi_wav:np.ndarray = tokenizer_output[0].fluidsynth(sr)
62
+ wav_output:str = midi_output.replace(".mid", ".wav")
63
+ sf.write(wav_output, midi_wav, samplerate=sr)
64
+
 
65
  return wav_output, wav_output, midi_output
66
 
67
+
68
  def get_stereo(pop_path, midi, pop_scale=0.5):
69
  pop_y, sr = librosa.load(pop_path, sr=None)
70
  midi_y, _ = librosa.load(midi.name, sr=None)
 
74
  elif len(pop_y) < len(midi_y):
75
  pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
76
  stereo = np.stack((midi_y, pop_y * pop_scale))
77
+
78
+ stereo_mix_path = pop_path.replace("output", "output_stereo_mix")
79
+ sf.write(
80
+ file=stereo_mix_path,
81
+ data=stereo.T,
82
+ samplerate=sr,
83
+ format="wav",
84
+ )
85
+
86
  return stereo_mix_path, stereo_mix_path
87
 
88
 
 
119
  file_uploaded = gr.Audio(label="Upload an audio", type="filepath")
120
  with gr.Column():
121
  with gr.Row():
122
+ yt_link = gr.Textbox(
123
+ label="Enter YouTube Link of the Video", autofocus=True, lines=3
124
+ )
125
  yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
126
 
127
+ yt_audio_path = gr.Audio(
128
+ label="Audio Extracted from the YouTube Video", interactive=False
129
+ )
130
+ yt_btn.click(
131
+ get_audio_from_yt_video,
132
+ inputs=[yt_link],
133
+ outputs=[yt_audio_path, file_uploaded],
134
+ )
135
+
136
  with gr.Group():
137
  with gr.Column():
138
  composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
 
142
  wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
143
  wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
144
  midi_output = gr.File(label="Download the Generated MIDI (.mid)")
145
+ generate_btn.click(
146
+ inference,
147
+ inputs=[file_uploaded, composer],
148
+ outputs=[wav_output1, wav_output2, midi_output],
149
+ )
150
+
151
  with gr.Group():
152
  gr.HTML(
153
  """
154
  <div> <h3> <center> Get the Stereo Mix from the Pop Music and Generated MIDI </h3> </div>
155
  """
156
  )
157
+ pop_scale = (
158
+ gr.Slider(
159
+ 0,
160
+ 1,
161
+ value=0.5,
162
+ label="Choose the ratio between Pop and MIDI",
163
+ info="1.0 = Only Pop, 0.0=Only MIDI",
164
+ interactive=True,
165
+ ),
166
+ )
167
  stereo_btn = gr.Button("Get Stereo Mix")
168
  with gr.Row():
169
  stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
170
  stereo_mix2 = gr.File(label="Download the Stereo Mix")
171
+
172
+ stereo_btn.click(
173
+ get_stereo,
174
+ inputs=[file_uploaded, wav_output2, pop_scale[0]],
175
+ outputs=[stereo_mix1, stereo_mix2],
176
+ )
177
+
178
  with gr.Group():
179
+ gr.Examples(
180
+ [
181
+ ["./examples/custom_song.mp3", "composer1"],
182
+ ],
183
  fn=inference,
184
  inputs=[file_uploaded, composer],
185
  outputs=[wav_output1, wav_output2, midi_output],
186
+ cache_examples=True,
187
  )
188
  gr.HTML(
189
  """
 
192
  </div>
193
  """
194
  )
195
+
196
  gr.HTML(
197
  """
198
  <div class="footer">
 
204
  """
205
  )
206
 
207
+ block.launch(debug=False)
requirements.txt CHANGED
@@ -4,7 +4,6 @@ pretty-midi==0.2.9
4
  essentia==2.1b6.dev1034
5
  pyFluidSynth==1.3.0
6
  git+https://github.com/huggingface/transformers
7
- midi2audio
8
  pytube
9
  gradio
10
  resampy
 
4
  essentia==2.1b6.dev1034
5
  pyFluidSynth==1.3.0
6
  git+https://github.com/huggingface/transformers
 
7
  pytube
8
  gradio
9
  resampy