Spaces:

susnato
/

pop2piano_dev

Runtime error

App Files Files Community

susnato commited on Sep 4, 2023

Commit

c8ce7ce

•

1 Parent(s): 09e9bd4

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -20

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import torch
-import shutil
 import librosa
 import binascii
 import warnings
-import midi2audio
 import pytube as pt    # to download the youtube videos as audios
 import gradio as gr
 from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
@@ -31,8 +32,19 @@ def get_audio_from_yt_video(yt_link):
         filename = None
     return filename, filename
-def prepare_output_file(tokenizer_output):
     # Add some random values so that no two file names are same
     output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
     midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
@@ -43,19 +55,23 @@ def prepare_output_file(tokenizer_output):
     # convert .mid file to .wav using `midi2audio`
     wav_output = midi_output.replace(".mid", ".wav")
     midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
     return wav_output, wav_output, midi_output
-def inference(file_uploaded, composer):
-    # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
-    # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
-    waveform, sr = librosa.load(file_uploaded, sr=None)
-    inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
-    model_output = model.generate(input_features=inputs["input_features"], composer=composer)
-    tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
-    return prepare_output_file(tokenizer_output)
 # Thanks a lot to "https://huggingface.co/Taithrah" for this theme.
@@ -100,14 +116,30 @@ with block:
     with gr.Group():
         with gr.Column():
             composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
-            btn = gr.Button("Generate")
     with gr.Group():
         with gr.Row().style(mobile_collapse=False, equal_height=True):
             wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
             wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
             midi_output = gr.File(label="Download the Generated MIDI (.mid)")
-            btn.click(inference, inputs=[file_uploaded, composer], outputs=[wav_output1, wav_output2, midi_output])
     with gr.Group():
         gr.Examples([
@@ -124,7 +156,6 @@ with block:
             """
         <div class="footer">
                     <center>The design for this Space is taken from <a href="https://huggingface.co/spaces/NoCrypt/miku"> NoCrypt/miku </a>
-                    </p>
         </div>
         """
         )
@@ -134,7 +165,7 @@ with block:
         <div class="footer">
                     <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
                     <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
-                    <center><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Github</a>
                     </p>
@@ -142,6 +173,4 @@ with block:
         """
         )
-block.launch(debug=False)
-shutil.rmtree("./midi_wav_outputs")
-shutil.rmtree("./yt_dir")

 import os
 import torch
 import librosa
 import binascii
 import warnings
+import midi2audio      # to convert midi to wav
+import numpy as np
 import pytube as pt    # to download the youtube videos as audios
 import gradio as gr
+import soundfile as sf # to make the stereo mix
 from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
         filename = None
     return filename, filename
+def inference(file_uploaded, composer):
+    # to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
+    # generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
+    waveform, sr = librosa.load(file_uploaded, sr=None)
+    inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
+    model_output = model.generate(input_features=inputs["input_features"], composer=composer)
+    tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
+    return prepare_output_file(tokenizer_output, sr)
+def prepare_output_file(tokenizer_output, sr):
     # Add some random values so that no two file names are same
     output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
     midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
     # convert .mid file to .wav using `midi2audio`
     wav_output = midi_output.replace(".mid", ".wav")
     midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
     return wav_output, wav_output, midi_output
+def get_stereo(pop_path, midi, pop_scale=0.99):
+    pop_y, sr = librosa.load(pop_path, sr=None)
+    midi_y, _ = librosa.load(midi.name, sr=None)
+    if len(pop_y) > len(midi_y):
+        midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y)))
+    elif len(pop_y) < len(midi_y):
+        pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
+    stereo = np.stack((midi_y, pop_y * pop_scale))
+    stereo_mix_path = pop_path.replace("output", "output_stereo_mix")
+    sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav",)
+    return stereo_mix_path, stereo_mix_path
 # Thanks a lot to "https://huggingface.co/Taithrah" for this theme.
     with gr.Group():
         with gr.Column():
             composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
+            generate_btn = gr.Button("Generate")
     with gr.Group():
         with gr.Row().style(mobile_collapse=False, equal_height=True):
             wav_output2 = gr.File(label="Download the Generated MIDI (.wav)")
             wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
             midi_output = gr.File(label="Download the Generated MIDI (.mid)")
+            generate_btn.click(inference,
+                               inputs=[file_uploaded, composer],
+                               outputs=[wav_output1, wav_output2, midi_output])
+    with gr.Group():
+        gr.HTML(
+            """
+            <div> <h3> <center> Get the Stereo Mix from the Pop Music and Generated MIDI </h3> </div>
+            """
+        )
+        gr.Slider(0, 1, value=0.5, label="Choose the ratio between Pop and MIDI", info="1.0 = Only Pop, 0.0=Only MIDI"),
+        stereo_btn = gr.Button("Get Stereo Mix")
+        with gr.Row():
+            stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
+            stereo_mix2 = gr.File(label="Download the Stereo Mix")
+        stereo_btn.click(get_stereo, inputs=[file_uploaded, wav_output2], outputs=[stereo_mix1, stereo_mix2])
     with gr.Group():
         gr.Examples([
             """
         <div class="footer">
                     <center>The design for this Space is taken from <a href="https://huggingface.co/spaces/NoCrypt/miku"> NoCrypt/miku </a>
         </div>
         """
         )
         <div class="footer">
                     <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
                     <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
+                    <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
                     </p>
         """
         )
+block.launch(debug=False)