Spaces:
Runtime error
Runtime error
pengdaqian
commited on
Commit
•
737aeb3
1
Parent(s):
27d3bc5
fix
Browse files- app.py +70 -20
- music/__init__.py +0 -0
- music/search.py +90 -0
- requirements.txt +2 -0
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from vits.models import SynthesizerInfer
|
2 |
from omegaconf import OmegaConf
|
3 |
import torchcrepe
|
@@ -8,6 +9,7 @@ import gradio as gr
|
|
8 |
import librosa
|
9 |
import numpy as np
|
10 |
import soundfile
|
|
|
11 |
|
12 |
import logging
|
13 |
|
@@ -78,7 +80,6 @@ model.to(device)
|
|
78 |
|
79 |
|
80 |
def svc_change(argswave, argsspk):
|
81 |
-
|
82 |
argsppg = "svc_tmp.ppg.npy"
|
83 |
os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
|
84 |
|
@@ -132,7 +133,7 @@ def svc_change(argswave, argsspk):
|
|
132 |
sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
|
133 |
sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
|
134 |
sub_har = source[:, :, cut_s *
|
135 |
-
|
136 |
sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
|
137 |
sub_out = sub_out[0, 0].data.cpu().detach().numpy()
|
138 |
|
@@ -170,31 +171,80 @@ def svc_main(sid, input_audio):
|
|
170 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
171 |
if sampling_rate != 16000:
|
172 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
173 |
-
if
|
174 |
-
audio = audio[:16000*100]
|
175 |
wav_path = "temp.wav"
|
176 |
soundfile.write(wav_path, audio, 16000, format="wav")
|
177 |
out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
|
178 |
return "Success", (48000, out_audio)
|
179 |
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
app = gr.Blocks()
|
182 |
with app:
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
app.launch()
|
|
|
1 |
+
from music.music import get_random_spit, get_albums
|
2 |
from vits.models import SynthesizerInfer
|
3 |
from omegaconf import OmegaConf
|
4 |
import torchcrepe
|
|
|
9 |
import librosa
|
10 |
import numpy as np
|
11 |
import soundfile
|
12 |
+
import random
|
13 |
|
14 |
import logging
|
15 |
|
|
|
80 |
|
81 |
|
82 |
def svc_change(argswave, argsspk):
|
|
|
83 |
argsppg = "svc_tmp.ppg.npy"
|
84 |
os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
|
85 |
|
|
|
133 |
sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
|
134 |
sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
|
135 |
sub_har = source[:, :, cut_s *
|
136 |
+
hop_size:cut_e * hop_size].to(device)
|
137 |
sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
|
138 |
sub_out = sub_out[0, 0].data.cpu().detach().numpy()
|
139 |
|
|
|
171 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
172 |
if sampling_rate != 16000:
|
173 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
174 |
+
if len(audio) > 16000 * 100:
|
175 |
+
audio = audio[:16000 * 100]
|
176 |
wav_path = "temp.wav"
|
177 |
soundfile.write(wav_path, audio, 16000, format="wav")
|
178 |
out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
|
179 |
return "Success", (48000, out_audio)
|
180 |
|
181 |
|
182 |
+
def auto_search(name):
|
183 |
+
config = {'logfilepath': 'musicdl.log', 'savedir': 'downloaded', 'search_size_per_source': 5, 'proxies': {}}
|
184 |
+
albums = get_albums(keywords=name, config=config)
|
185 |
+
album = random.choice(albums)
|
186 |
+
save_path = get_random_spit(album)
|
187 |
+
return save_path
|
188 |
+
|
189 |
+
|
190 |
app = gr.Blocks()
|
191 |
with app:
|
192 |
+
title = "Singer Voice Clone 0.1 Demo"
|
193 |
+
desc = """ small singer voice clone Demo App. <br />
|
194 |
+
Enter keywords auto search music to clone or upload music yourself
|
195 |
+
It's just a simplified demo, you can use more advanced features optimize music quality <br />"""
|
196 |
+
tutorial_link = "https://docs.cworld.ai"
|
197 |
+
|
198 |
+
gr.HTML(
|
199 |
+
f"""
|
200 |
+
<div style="text-align: center; margin: 0 auto;">
|
201 |
+
<div
|
202 |
+
style="
|
203 |
+
display: inline-flex;
|
204 |
+
align-items: center;
|
205 |
+
gap: 0.8rem;
|
206 |
+
font-size: 1.75rem;
|
207 |
+
"
|
208 |
+
>
|
209 |
+
<svg height="100%" stroke-miterlimit="10" style="fill-rule:nonzero;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;" version="1.1" viewBox="0 0 100 100" width="100%" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
210 |
+
<defs/>
|
211 |
+
<clipPath id="ArtboardFrame">
|
212 |
+
<rect height="100" width="100" x="0" y="0"/>
|
213 |
+
</clipPath>
|
214 |
+
<g clip-path="url(#ArtboardFrame)" id="SvgjsG2907">
|
215 |
+
<g opacity="1">
|
216 |
+
<g opacity="1">
|
217 |
+
<path d="M49.5597 6.74187C73.4486 6.74187 92.893 26.1863 92.893 50.0752C92.893 73.9641 73.4486 93.4085 49.5597 93.4085C25.6708 93.4085 6.22637 73.9641 6.22637 50.0752C6.22637 26.1863 25.6708 6.74187 49.5597 6.74187M49.5597 0.075206C21.893 0.075206-0.440293 22.4085-0.440293 50.0752C-0.440293 77.7419 21.893 100.075 49.5597 100.075C77.2264 100.075 99.5597 77.7419 99.5597 50.0752C99.5597 22.4085 77.2264 0.075206 49.5597 0.075206L49.5597 0.075206Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
218 |
+
<path d="M55.1153 77.853L44.0042 77.853L44.0042 72.2974C44.0042 69.1863 46.4486 66.7419 49.5597 66.7419L49.5597 66.7419C52.6708 66.7419 55.1153 69.1863 55.1153 72.2974L55.1153 77.853Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
219 |
+
<path d="M21.7819 33.4085L32.893 33.4085L32.893 33.4085L32.893 55.6308L32.893 55.6308L21.7819 55.6308L21.7819 55.6308L21.7819 33.4085L21.7819 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
220 |
+
<path d="M66.2264 33.4085L77.3375 33.4085L77.3375 33.4085L77.3375 55.6308L77.3375 55.6308L66.2264 55.6308L66.2264 55.6308L66.2264 33.4085L66.2264 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
|
221 |
+
</g>
|
222 |
+
</g>
|
223 |
+
</g>
|
224 |
+
</svg>
|
225 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
|
226 |
+
{title}
|
227 |
+
</h1>
|
228 |
+
</div>
|
229 |
+
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
|
230 |
+
{desc}
|
231 |
+
There is the <a href="{tutorial_link}"> tutorial </a>
|
232 |
+
</p>
|
233 |
+
</div>
|
234 |
+
"""
|
235 |
+
)
|
236 |
+
|
237 |
+
sid = gr.Dropdown(label="Singer", choices=["22", "33", "47", "51"], value="47")
|
238 |
+
|
239 |
+
vc_input2 = gr.Textbox(label="Music Name")
|
240 |
+
vc_search = gr.Button("Auto Search", variant="primary")
|
241 |
+
vc_input3 = gr.Audio(label="Upload Music Yourself")
|
242 |
+
|
243 |
+
vc_search.click(auto_search, [vc_input2], [vc_input3])
|
244 |
+
|
245 |
+
vc_submit = gr.Button("Convert", variant="primary")
|
246 |
+
vc_output1 = gr.Textbox(label="Run Status")
|
247 |
+
vc_output2 = gr.Audio(label="Result Audio")
|
248 |
+
vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
|
249 |
|
250 |
app.launch()
|
music/__init__.py
ADDED
File without changes
|
music/search.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os.path
|
2 |
+
import random
|
3 |
+
|
4 |
+
from musicdl import musicdl
|
5 |
+
from musicdl.modules import Downloader
|
6 |
+
from pydub import AudioSegment
|
7 |
+
|
8 |
+
def is_integer(string):
|
9 |
+
if string.isdigit():
|
10 |
+
return int(string)
|
11 |
+
else:
|
12 |
+
return 0
|
13 |
+
|
14 |
+
|
15 |
+
def is_numeric(string):
|
16 |
+
if string.isdigit():
|
17 |
+
return True
|
18 |
+
if string.count('.') == 1:
|
19 |
+
integer_part, decimal_part = string.split('.')
|
20 |
+
if integer_part.isdigit() and decimal_part.isdigit():
|
21 |
+
return True
|
22 |
+
return False
|
23 |
+
|
24 |
+
|
25 |
+
def time_to_seconds(time_string):
|
26 |
+
hours, minutes, seconds = map(lambda x: is_integer(x), time_string.split(':'))
|
27 |
+
total_seconds = hours * 3600 + minutes * 60 + seconds
|
28 |
+
return total_seconds
|
29 |
+
|
30 |
+
|
31 |
+
def size_to_int(size_string):
|
32 |
+
prefix_size_str = size_string[:-2] # 去除最后的单位部分,转换为浮点数
|
33 |
+
if not is_numeric(prefix_size_str):
|
34 |
+
return 5.1 * 1024 * 1024
|
35 |
+
unit = size_string[-2:] # 获取单位部分
|
36 |
+
size = float(prefix_size_str)
|
37 |
+
if unit == 'KB':
|
38 |
+
size *= 1024 # 转换为字节
|
39 |
+
elif unit == 'MB':
|
40 |
+
size *= 1024 * 1024
|
41 |
+
elif unit == 'GB':
|
42 |
+
size *= 1024 * 1024 * 1024
|
43 |
+
elif unit == 'TB':
|
44 |
+
size *= 1024 * 1024 * 1024 * 1024
|
45 |
+
|
46 |
+
return int(size) # 转换为整数
|
47 |
+
|
48 |
+
|
49 |
+
def get_albums(keywords, config):
|
50 |
+
target_srcs = [
|
51 |
+
'kugou', 'kuwo', 'qqmusic', 'qianqian', 'fivesing',
|
52 |
+
'netease', 'migu', 'joox', 'yiting',
|
53 |
+
]
|
54 |
+
client = musicdl.musicdl(config=config)
|
55 |
+
results = client.search(keywords, target_srcs)
|
56 |
+
albums_set = set()
|
57 |
+
valid_albums = []
|
58 |
+
for albums in results.values():
|
59 |
+
if len(albums) == 0:
|
60 |
+
continue
|
61 |
+
for album in albums:
|
62 |
+
if album['songname'] in albums_set:
|
63 |
+
continue
|
64 |
+
if album['ext'] != 'mp3':
|
65 |
+
continue
|
66 |
+
if size_to_int(album['filesize']) > 5 * 1024 * 1024:
|
67 |
+
continue
|
68 |
+
if time_to_seconds(album['duration']) > 300:
|
69 |
+
continue
|
70 |
+
else:
|
71 |
+
albums_set.add(album['songname'])
|
72 |
+
valid_albums.append(album)
|
73 |
+
return valid_albums
|
74 |
+
|
75 |
+
|
76 |
+
def get_random_spit(songinfo):
|
77 |
+
d = Downloader(songinfo)
|
78 |
+
d.start()
|
79 |
+
save_path = os.path.join(songinfo["savedir"], f"{songinfo['savename']}.{songinfo['ext']}")
|
80 |
+
song = AudioSegment.from_mp3(save_path)
|
81 |
+
# pydub does things in milliseconds
|
82 |
+
length = len(song)
|
83 |
+
left_idx = length / 2 - 15 * 1000
|
84 |
+
right_idx = length / 2 + 15 * 1000
|
85 |
+
if left_idx < 0:
|
86 |
+
left_idx = 0
|
87 |
+
if right_idx > length:
|
88 |
+
right_idx = length
|
89 |
+
middle_30s = song[left_idx:right_idx]
|
90 |
+
middle_30s.export(save_path, format="mp3")
|
requirements.txt
CHANGED
@@ -13,3 +13,5 @@ torchcrepe
|
|
13 |
transformers
|
14 |
tqdm
|
15 |
librosa
|
|
|
|
|
|
13 |
transformers
|
14 |
tqdm
|
15 |
librosa
|
16 |
+
pydub
|
17 |
+
musicdl
|