pengdaqian commited on
Commit
737aeb3
1 Parent(s): 27d3bc5
Files changed (4) hide show
  1. app.py +70 -20
  2. music/__init__.py +0 -0
  3. music/search.py +90 -0
  4. requirements.txt +2 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from vits.models import SynthesizerInfer
2
  from omegaconf import OmegaConf
3
  import torchcrepe
@@ -8,6 +9,7 @@ import gradio as gr
8
  import librosa
9
  import numpy as np
10
  import soundfile
 
11
 
12
  import logging
13
 
@@ -78,7 +80,6 @@ model.to(device)
78
 
79
 
80
  def svc_change(argswave, argsspk):
81
-
82
  argsppg = "svc_tmp.ppg.npy"
83
  os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
84
 
@@ -132,7 +133,7 @@ def svc_change(argswave, argsspk):
132
  sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
133
  sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
134
  sub_har = source[:, :, cut_s *
135
- hop_size:cut_e * hop_size].to(device)
136
  sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
137
  sub_out = sub_out[0, 0].data.cpu().detach().numpy()
138
 
@@ -170,31 +171,80 @@ def svc_main(sid, input_audio):
170
  audio = librosa.to_mono(audio.transpose(1, 0))
171
  if sampling_rate != 16000:
172
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
173
- if (len(audio) > 16000*100):
174
- audio = audio[:16000*100]
175
  wav_path = "temp.wav"
176
  soundfile.write(wav_path, audio, 16000, format="wav")
177
  out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
178
  return "Success", (48000, out_audio)
179
 
180
 
 
 
 
 
 
 
 
 
181
  app = gr.Blocks()
182
  with app:
183
- with gr.Tabs():
184
- with gr.TabItem("sovits 5.0"):
185
- gr.Markdown(value="""
186
- 基于开源数据:Multi-Singer
187
-
188
- https://github.com/Multi-Singer/Multi-Singer.github.io
189
-
190
- [轻度伴奏可以无需去伴奏]就能直接进行歌声转换的SVC库
191
- """)
192
- sid = gr.Dropdown(label="音色", choices=[
193
- "22", "33", "47", "51"], value="47")
194
- vc_input3 = gr.Audio(label="上传音频")
195
- vc_submit = gr.Button("转换", variant="primary")
196
- vc_output1 = gr.Textbox(label="状态信息")
197
- vc_output2 = gr.Audio(label="转换音频")
198
- vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  app.launch()
 
1
+ from music.music import get_random_spit, get_albums
2
  from vits.models import SynthesizerInfer
3
  from omegaconf import OmegaConf
4
  import torchcrepe
 
9
  import librosa
10
  import numpy as np
11
  import soundfile
12
+ import random
13
 
14
  import logging
15
 
 
80
 
81
 
82
  def svc_change(argswave, argsspk):
 
83
  argsppg = "svc_tmp.ppg.npy"
84
  os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
85
 
 
133
  sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
134
  sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
135
  sub_har = source[:, :, cut_s *
136
+ hop_size:cut_e * hop_size].to(device)
137
  sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
138
  sub_out = sub_out[0, 0].data.cpu().detach().numpy()
139
 
 
171
  audio = librosa.to_mono(audio.transpose(1, 0))
172
  if sampling_rate != 16000:
173
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
174
+ if len(audio) > 16000 * 100:
175
+ audio = audio[:16000 * 100]
176
  wav_path = "temp.wav"
177
  soundfile.write(wav_path, audio, 16000, format="wav")
178
  out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
179
  return "Success", (48000, out_audio)
180
 
181
 
182
+ def auto_search(name):
183
+ config = {'logfilepath': 'musicdl.log', 'savedir': 'downloaded', 'search_size_per_source': 5, 'proxies': {}}
184
+ albums = get_albums(keywords=name, config=config)
185
+ album = random.choice(albums)
186
+ save_path = get_random_spit(album)
187
+ return save_path
188
+
189
+
190
  app = gr.Blocks()
191
  with app:
192
+ title = "Singer Voice Clone 0.1 Demo"
193
+ desc = """ small singer voice clone Demo App. <br />
194
+ Enter keywords auto search music to clone or upload music yourself
195
+ It's just a simplified demo, you can use more advanced features optimize music quality <br />"""
196
+ tutorial_link = "https://docs.cworld.ai"
197
+
198
+ gr.HTML(
199
+ f"""
200
+ <div style="text-align: center; margin: 0 auto;">
201
+ <div
202
+ style="
203
+ display: inline-flex;
204
+ align-items: center;
205
+ gap: 0.8rem;
206
+ font-size: 1.75rem;
207
+ "
208
+ >
209
+ <svg height="100%" stroke-miterlimit="10" style="fill-rule:nonzero;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;" version="1.1" viewBox="0 0 100 100" width="100%" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
210
+ <defs/>
211
+ <clipPath id="ArtboardFrame">
212
+ <rect height="100" width="100" x="0" y="0"/>
213
+ </clipPath>
214
+ <g clip-path="url(#ArtboardFrame)" id="SvgjsG2907">
215
+ <g opacity="1">
216
+ <g opacity="1">
217
+ <path d="M49.5597 6.74187C73.4486 6.74187 92.893 26.1863 92.893 50.0752C92.893 73.9641 73.4486 93.4085 49.5597 93.4085C25.6708 93.4085 6.22637 73.9641 6.22637 50.0752C6.22637 26.1863 25.6708 6.74187 49.5597 6.74187M49.5597 0.075206C21.893 0.075206-0.440293 22.4085-0.440293 50.0752C-0.440293 77.7419 21.893 100.075 49.5597 100.075C77.2264 100.075 99.5597 77.7419 99.5597 50.0752C99.5597 22.4085 77.2264 0.075206 49.5597 0.075206L49.5597 0.075206Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
218
+ <path d="M55.1153 77.853L44.0042 77.853L44.0042 72.2974C44.0042 69.1863 46.4486 66.7419 49.5597 66.7419L49.5597 66.7419C52.6708 66.7419 55.1153 69.1863 55.1153 72.2974L55.1153 77.853Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
219
+ <path d="M21.7819 33.4085L32.893 33.4085L32.893 33.4085L32.893 55.6308L32.893 55.6308L21.7819 55.6308L21.7819 55.6308L21.7819 33.4085L21.7819 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
220
+ <path d="M66.2264 33.4085L77.3375 33.4085L77.3375 33.4085L77.3375 55.6308L77.3375 55.6308L66.2264 55.6308L66.2264 55.6308L66.2264 33.4085L66.2264 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
221
+ </g>
222
+ </g>
223
+ </g>
224
+ </svg>
225
+ <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
226
+ {title}
227
+ </h1>
228
+ </div>
229
+ <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
230
+ {desc}
231
+ There is the <a href="{tutorial_link}"> tutorial </a>
232
+ </p>
233
+ </div>
234
+ """
235
+ )
236
+
237
+ sid = gr.Dropdown(label="Singer", choices=["22", "33", "47", "51"], value="47")
238
+
239
+ vc_input2 = gr.Textbox(label="Music Name")
240
+ vc_search = gr.Button("Auto Search", variant="primary")
241
+ vc_input3 = gr.Audio(label="Upload Music Yourself")
242
+
243
+ vc_search.click(auto_search, [vc_input2], [vc_input3])
244
+
245
+ vc_submit = gr.Button("Convert", variant="primary")
246
+ vc_output1 = gr.Textbox(label="Run Status")
247
+ vc_output2 = gr.Audio(label="Result Audio")
248
+ vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
249
 
250
  app.launch()
music/__init__.py ADDED
File without changes
music/search.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+
4
+ from musicdl import musicdl
5
+ from musicdl.modules import Downloader
6
+ from pydub import AudioSegment
7
+
8
+ def is_integer(string):
9
+ if string.isdigit():
10
+ return int(string)
11
+ else:
12
+ return 0
13
+
14
+
15
+ def is_numeric(string):
16
+ if string.isdigit():
17
+ return True
18
+ if string.count('.') == 1:
19
+ integer_part, decimal_part = string.split('.')
20
+ if integer_part.isdigit() and decimal_part.isdigit():
21
+ return True
22
+ return False
23
+
24
+
25
+ def time_to_seconds(time_string):
26
+ hours, minutes, seconds = map(lambda x: is_integer(x), time_string.split(':'))
27
+ total_seconds = hours * 3600 + minutes * 60 + seconds
28
+ return total_seconds
29
+
30
+
31
+ def size_to_int(size_string):
32
+ prefix_size_str = size_string[:-2] # 去除最后的单位部分,转换为浮点数
33
+ if not is_numeric(prefix_size_str):
34
+ return 5.1 * 1024 * 1024
35
+ unit = size_string[-2:] # 获取单位部分
36
+ size = float(prefix_size_str)
37
+ if unit == 'KB':
38
+ size *= 1024 # 转换为字节
39
+ elif unit == 'MB':
40
+ size *= 1024 * 1024
41
+ elif unit == 'GB':
42
+ size *= 1024 * 1024 * 1024
43
+ elif unit == 'TB':
44
+ size *= 1024 * 1024 * 1024 * 1024
45
+
46
+ return int(size) # 转换为整数
47
+
48
+
49
+ def get_albums(keywords, config):
50
+ target_srcs = [
51
+ 'kugou', 'kuwo', 'qqmusic', 'qianqian', 'fivesing',
52
+ 'netease', 'migu', 'joox', 'yiting',
53
+ ]
54
+ client = musicdl.musicdl(config=config)
55
+ results = client.search(keywords, target_srcs)
56
+ albums_set = set()
57
+ valid_albums = []
58
+ for albums in results.values():
59
+ if len(albums) == 0:
60
+ continue
61
+ for album in albums:
62
+ if album['songname'] in albums_set:
63
+ continue
64
+ if album['ext'] != 'mp3':
65
+ continue
66
+ if size_to_int(album['filesize']) > 5 * 1024 * 1024:
67
+ continue
68
+ if time_to_seconds(album['duration']) > 300:
69
+ continue
70
+ else:
71
+ albums_set.add(album['songname'])
72
+ valid_albums.append(album)
73
+ return valid_albums
74
+
75
+
76
+ def get_random_spit(songinfo):
77
+ d = Downloader(songinfo)
78
+ d.start()
79
+ save_path = os.path.join(songinfo["savedir"], f"{songinfo['savename']}.{songinfo['ext']}")
80
+ song = AudioSegment.from_mp3(save_path)
81
+ # pydub does things in milliseconds
82
+ length = len(song)
83
+ left_idx = length / 2 - 15 * 1000
84
+ right_idx = length / 2 + 15 * 1000
85
+ if left_idx < 0:
86
+ left_idx = 0
87
+ if right_idx > length:
88
+ right_idx = length
89
+ middle_30s = song[left_idx:right_idx]
90
+ middle_30s.export(save_path, format="mp3")
requirements.txt CHANGED
@@ -13,3 +13,5 @@ torchcrepe
13
  transformers
14
  tqdm
15
  librosa
 
 
 
13
  transformers
14
  tqdm
15
  librosa
16
+ pydub
17
+ musicdl