Spaces:
Sleeping
Sleeping
sweetcocoa
commited on
Commit
•
5332e66
1
Parent(s):
d8a0f82
update libs
Browse files- README.md +3 -1
- app.py +20 -17
- pyproject.toml +4 -0
- requirements.txt +2 -2
- utils.py +48 -2
README.md
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
---
|
2 |
title: Pop2Piano Demo
|
3 |
emoji: 🎹
|
|
|
|
|
4 |
sdk: gradio
|
5 |
-
sdk_version:
|
6 |
app_file: app.py
|
7 |
pinned: true
|
8 |
---
|
|
|
1 |
---
|
2 |
title: Pop2Piano Demo
|
3 |
emoji: 🎹
|
4 |
+
python_version: 3.10
|
5 |
+
models: ["sweetcocoa/pop2piano"]
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.39.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
---
|
app.py
CHANGED
@@ -1,18 +1,15 @@
|
|
1 |
-
import os
|
2 |
import binascii
|
3 |
-
import
|
4 |
|
5 |
import gradio as gr
|
6 |
import librosa
|
7 |
import numpy as np
|
8 |
-
import torch
|
9 |
import pretty_midi
|
10 |
-
import
|
11 |
-
|
12 |
-
from pytube.exceptions import VideoUnavailable
|
13 |
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
14 |
|
15 |
-
from utils import mp3_write, normalize
|
16 |
|
17 |
yt_video_dir = "./yt_dir"
|
18 |
outputs_dir = "./midi_wav_outputs"
|
@@ -26,14 +23,20 @@ composers = model.generation_config.composer_to_feature_token.keys()
|
|
26 |
|
27 |
|
28 |
def get_audio_from_yt_video(yt_link: str):
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
return filename, filename
|
39 |
|
@@ -82,7 +85,7 @@ block = gr.Blocks()
|
|
82 |
with block:
|
83 |
gr.HTML(
|
84 |
"""
|
85 |
-
<div style="text-align: center; max-width:
|
86 |
<div
|
87 |
style="
|
88 |
display: inline-flex;
|
@@ -131,7 +134,7 @@ with block:
|
|
131 |
<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
|
132 |
"""
|
133 |
)
|
134 |
-
with gr.Row(
|
135 |
stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
|
136 |
wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
|
137 |
|
|
|
|
|
1 |
import binascii
|
2 |
+
import os
|
3 |
|
4 |
import gradio as gr
|
5 |
import librosa
|
6 |
import numpy as np
|
|
|
7 |
import pretty_midi
|
8 |
+
import torch
|
9 |
+
import yt_dlp
|
|
|
10 |
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
11 |
|
12 |
+
from utils import cli_to_api, mp3_write, normalize
|
13 |
|
14 |
yt_video_dir = "./yt_dir"
|
15 |
outputs_dir = "./midi_wav_outputs"
|
|
|
23 |
|
24 |
|
25 |
def get_audio_from_yt_video(yt_link: str):
|
26 |
+
filename = binascii.hexlify(os.urandom(8)).decode() + ".mp3"
|
27 |
+
filename = os.path.join(yt_video_dir, filename)
|
28 |
+
yt_opt = cli_to_api(
|
29 |
+
[
|
30 |
+
"--extract-audio",
|
31 |
+
"--audio-format",
|
32 |
+
"mp3",
|
33 |
+
"--restrict-filenames",
|
34 |
+
"-o",
|
35 |
+
filename,
|
36 |
+
]
|
37 |
+
)
|
38 |
+
with yt_dlp.YoutubeDL(yt_opt) as ydl:
|
39 |
+
ydl.download([yt_link])
|
40 |
|
41 |
return filename, filename
|
42 |
|
|
|
85 |
with block:
|
86 |
gr.HTML(
|
87 |
"""
|
88 |
+
<div style="text-align: center; max-width: 400px; margin: 0 auto;">
|
89 |
<div
|
90 |
style="
|
91 |
display: inline-flex;
|
|
|
134 |
<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
|
135 |
"""
|
136 |
)
|
137 |
+
with gr.Row(equal_height=True):
|
138 |
stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
|
139 |
wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
|
140 |
|
pyproject.toml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.ruff]
|
2 |
+
line-length=100
|
3 |
+
select = ["F", "I"]
|
4 |
+
ignore = []
|
requirements.txt
CHANGED
@@ -4,7 +4,7 @@ pretty-midi==0.2.9
|
|
4 |
essentia==2.1b6.dev1034
|
5 |
pyFluidSynth==1.3.0
|
6 |
transformers
|
7 |
-
|
8 |
-
gradio
|
9 |
resampy
|
10 |
pydub
|
|
|
4 |
essentia==2.1b6.dev1034
|
5 |
pyFluidSynth==1.3.0
|
6 |
transformers
|
7 |
+
yt-dlp>=2024.7.25
|
8 |
+
gradio
|
9 |
resampy
|
10 |
pydub
|
utils.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
import pydub
|
|
|
|
|
3 |
|
4 |
|
5 |
def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
@@ -8,14 +10,58 @@ def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
|
8 |
y = np.int16(x * 2**15)
|
9 |
else:
|
10 |
y = np.int16(x)
|
11 |
-
song = pydub.AudioSegment(
|
|
|
|
|
12 |
song.export(f, format="mp3", bitrate="256k")
|
13 |
|
14 |
|
15 |
-
def normalize(
|
|
|
|
|
16 |
max_y -= eps
|
17 |
min_y += eps
|
18 |
amax = audio.max()
|
19 |
amin = audio.min()
|
20 |
audio = (max_y - min_y) * (audio - amin) / (amax - amin) + min_y
|
21 |
return audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
import pydub
|
3 |
+
import yt_dlp
|
4 |
+
import yt_dlp.options
|
5 |
|
6 |
|
7 |
def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
|
|
10 |
y = np.int16(x * 2**15)
|
11 |
else:
|
12 |
y = np.int16(x)
|
13 |
+
song = pydub.AudioSegment(
|
14 |
+
y.tobytes(), frame_rate=sr, sample_width=2, channels=channels
|
15 |
+
)
|
16 |
song.export(f, format="mp3", bitrate="256k")
|
17 |
|
18 |
|
19 |
+
def normalize(
|
20 |
+
audio: np.ndarray, min_y: float = -1.0, max_y: float = 1.0, eps: float = 1e-8
|
21 |
+
):
|
22 |
max_y -= eps
|
23 |
min_y += eps
|
24 |
amax = audio.max()
|
25 |
amin = audio.min()
|
26 |
audio = (max_y - min_y) * (audio - amin) / (amax - amin) + min_y
|
27 |
return audio
|
28 |
+
|
29 |
+
|
30 |
+
# yt_dlp script copied from https://github.com/yt-dlp/yt-dlp/blob/28d485714fef88937c82635438afba5db81f9089/devscripts/cli_to_api.py
|
31 |
+
create_parser = yt_dlp.options.create_parser
|
32 |
+
|
33 |
+
|
34 |
+
def parse_patched_options(opts):
|
35 |
+
patched_parser = create_parser()
|
36 |
+
patched_parser.defaults.update(
|
37 |
+
{
|
38 |
+
"ignoreerrors": False,
|
39 |
+
"retries": 0,
|
40 |
+
"fragment_retries": 0,
|
41 |
+
"extract_flat": False,
|
42 |
+
"concat_playlist": "never",
|
43 |
+
}
|
44 |
+
)
|
45 |
+
yt_dlp.options.create_parser = lambda: patched_parser
|
46 |
+
try:
|
47 |
+
return yt_dlp.parse_options(opts)
|
48 |
+
finally:
|
49 |
+
yt_dlp.options.create_parser = create_parser
|
50 |
+
|
51 |
+
|
52 |
+
default_opts = parse_patched_options([]).ydl_opts
|
53 |
+
|
54 |
+
|
55 |
+
def cli_to_api(opts, cli_defaults=False):
|
56 |
+
opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(
|
57 |
+
opts
|
58 |
+
).ydl_opts
|
59 |
+
|
60 |
+
diff = {k: v for k, v in opts.items() if default_opts[k] != v}
|
61 |
+
if "postprocessors" in diff:
|
62 |
+
diff["postprocessors"] = [
|
63 |
+
pp
|
64 |
+
for pp in diff["postprocessors"]
|
65 |
+
if pp not in default_opts["postprocessors"]
|
66 |
+
]
|
67 |
+
return diff
|