susnato commited on
Commit
c8ce7ce
1 Parent(s): 09e9bd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -20
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
2
  import torch
3
- import shutil
4
  import librosa
5
  import binascii
6
  import warnings
7
- import midi2audio
 
8
  import pytube as pt # to download the youtube videos as audios
9
  import gradio as gr
 
10
  from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
11
 
12
 
@@ -31,8 +32,19 @@ def get_audio_from_yt_video(yt_link):
31
  filename = None
32
 
33
  return filename, filename
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def prepare_output_file(tokenizer_output):
36
  # Add some random values so that no two file names are same
37
  output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
38
  midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
@@ -43,19 +55,23 @@ def prepare_output_file(tokenizer_output):
43
  # convert .mid file to .wav using `midi2audio`
44
  wav_output = midi_output.replace(".mid", ".wav")
45
  midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
46
-
47
  return wav_output, wav_output, midi_output
 
 
 
 
 
 
 
 
 
 
48
 
49
- def inference(file_uploaded, composer):
50
- # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
51
- # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
52
- waveform, sr = librosa.load(file_uploaded, sr=None)
53
 
54
- inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
55
- model_output = model.generate(input_features=inputs["input_features"], composer=composer)
56
- tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
57
-
58
- return prepare_output_file(tokenizer_output)
59
 
60
 
61
  # Thanks a lot to "https://huggingface.co/Taithrah" for this theme.
@@ -100,14 +116,30 @@ with block:
100
  with gr.Group():
101
  with gr.Column():
102
  composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
103
- btn = gr.Button("Generate")
104
 
105
  with gr.Group():
106
  with gr.Row().style(mobile_collapse=False, equal_height=True):
107
  wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
108
  wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
109
  midi_output = gr.File(label="Download the Generated MIDI (.mid)")
110
- btn.click(inference, inputs=[file_uploaded, composer], outputs=[wav_output1, wav_output2, midi_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  with gr.Group():
113
  gr.Examples([
@@ -124,7 +156,6 @@ with block:
124
  """
125
  <div class="footer">
126
  <center>The design for this Space is taken from <a href="https://huggingface.co/spaces/NoCrypt/miku"> NoCrypt/miku </a>
127
- </p>
128
  </div>
129
  """
130
  )
@@ -134,7 +165,7 @@ with block:
134
  <div class="footer">
135
  <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
136
  <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
137
- <center><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Github</a>
138
 
139
 
140
  </p>
@@ -142,6 +173,4 @@ with block:
142
  """
143
  )
144
 
145
- block.launch(debug=False)
146
- shutil.rmtree("./midi_wav_outputs")
147
- shutil.rmtree("./yt_dir")
 
1
  import os
2
  import torch
 
3
  import librosa
4
  import binascii
5
  import warnings
6
+ import midi2audio # to convert midi to wav
7
+ import numpy as np
8
  import pytube as pt # to download the youtube videos as audios
9
  import gradio as gr
10
+ import soundfile as sf # to make the stereo mix
11
  from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
12
 
13
 
 
32
  filename = None
33
 
34
  return filename, filename
35
+
36
+ def inference(file_uploaded, composer):
37
+ # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
38
+ # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
39
+ waveform, sr = librosa.load(file_uploaded, sr=None)
40
+
41
+ inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
42
+ model_output = model.generate(input_features=inputs["input_features"], composer=composer)
43
+ tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
44
+
45
+ return prepare_output_file(tokenizer_output, sr)
46
 
47
+ def prepare_output_file(tokenizer_output, sr):
48
  # Add some random values so that no two file names are same
49
  output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
50
  midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
 
55
  # convert .mid file to .wav using `midi2audio`
56
  wav_output = midi_output.replace(".mid", ".wav")
57
  midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
58
+
59
  return wav_output, wav_output, midi_output
60
+
61
+ def get_stereo(pop_path, midi, pop_scale=0.99):
62
+ pop_y, sr = librosa.load(pop_path, sr=None)
63
+ midi_y, _ = librosa.load(midi.name, sr=None)
64
+
65
+ if len(pop_y) > len(midi_y):
66
+ midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y)))
67
+ elif len(pop_y) < len(midi_y):
68
+ pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
69
+ stereo = np.stack((midi_y, pop_y * pop_scale))
70
 
71
+ stereo_mix_path = pop_path.replace("output", "output_stereo_mix")
72
+ sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav",)
 
 
73
 
74
+ return stereo_mix_path, stereo_mix_path
 
 
 
 
75
 
76
 
77
  # Thanks a lot to "https://huggingface.co/Taithrah" for this theme.
 
116
  with gr.Group():
117
  with gr.Column():
118
  composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
119
+ generate_btn = gr.Button("Generate")
120
 
121
  with gr.Group():
122
  with gr.Row().style(mobile_collapse=False, equal_height=True):
123
  wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
124
  wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
125
  midi_output = gr.File(label="Download the Generated MIDI (.mid)")
126
+ generate_btn.click(inference,
127
+ inputs=[file_uploaded, composer],
128
+ outputs=[wav_output1, wav_output2, midi_output])
129
+
130
+ with gr.Group():
131
+ gr.HTML(
132
+ """
133
+ <div> <h3> <center> Get the Stereo Mix from the Pop Music and Generated MIDI </h3> </div>
134
+ """
135
+ )
136
+ gr.Slider(0, 1, value=0.5, label="Choose the ratio between Pop and MIDI", info="1.0 = Only Pop, 0.0=Only MIDI"),
137
+ stereo_btn = gr.Button("Get Stereo Mix")
138
+ with gr.Row():
139
+ stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
140
+ stereo_mix2 = gr.File(label="Download the Stereo Mix")
141
+
142
+ stereo_btn.click(get_stereo, inputs=[file_uploaded, wav_output2], outputs=[stereo_mix1, stereo_mix2])
143
 
144
  with gr.Group():
145
  gr.Examples([
 
156
  """
157
  <div class="footer">
158
  <center>The design for this Space is taken from <a href="https://huggingface.co/spaces/NoCrypt/miku"> NoCrypt/miku </a>
 
159
  </div>
160
  """
161
  )
 
165
  <div class="footer">
166
  <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
167
  <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
168
+ <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
169
 
170
 
171
  </p>
 
173
  """
174
  )
175
 
176
+ block.launch(debug=False)