dodo12

Runtime error

App Files Files Community

pengdaqian commited on May 12, 2023

Commit

737aeb3

•

1 Parent(s): 27d3bc5

fix

Browse files

Files changed (4) hide show

app.py +70 -20
music/__init__.py +0 -0
music/search.py +90 -0
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from vits.models import SynthesizerInfer
 from omegaconf import OmegaConf
 import torchcrepe
@@ -8,6 +9,7 @@ import gradio as gr
 import librosa
 import numpy as np
 import soundfile
 import logging
@@ -78,7 +80,6 @@ model.to(device)
 def svc_change(argswave, argsspk):
     argsppg = "svc_tmp.ppg.npy"
     os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
@@ -132,7 +133,7 @@ def svc_change(argswave, argsspk):
             sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
             sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
             sub_har = source[:, :, cut_s *
-                             hop_size:cut_e * hop_size].to(device)
             sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
             sub_out = sub_out[0, 0].data.cpu().detach().numpy()
@@ -170,31 +171,80 @@ def svc_main(sid, input_audio):
         audio = librosa.to_mono(audio.transpose(1, 0))
     if sampling_rate != 16000:
         audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
-    if (len(audio) > 16000*100):
-        audio = audio[:16000*100]
     wav_path = "temp.wav"
     soundfile.write(wav_path, audio, 16000, format="wav")
     out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
     return "Success", (48000, out_audio)
 app = gr.Blocks()
 with app:
-    with gr.Tabs():
-        with gr.TabItem("sovits 5.0"):
-            gr.Markdown(value="""
-                基于开源数据:Multi-Singer
-                https://github.com/Multi-Singer/Multi-Singer.github.io
-                [轻度伴奏可以无需去伴奏]就能直接进行歌声转换的SVC库
-                """)
-            sid = gr.Dropdown(label="音色", choices=[
-                              "22", "33", "47", "51"], value="47")
-            vc_input3 = gr.Audio(label="上传音频")
-            vc_submit = gr.Button("转换", variant="primary")
-            vc_output1 = gr.Textbox(label="状态信息")
-            vc_output2 = gr.Audio(label="转换音频")
-        vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
     app.launch()

+from music.music import get_random_spit, get_albums
 from vits.models import SynthesizerInfer
 from omegaconf import OmegaConf
 import torchcrepe
 import librosa
 import numpy as np
 import soundfile
+import random
 import logging
 def svc_change(argswave, argsspk):
     argsppg = "svc_tmp.ppg.npy"
     os.system(f"python whisper/inference.py -w {argswave} -p {argsppg}")
             sub_pit = pit[cut_s:cut_e].unsqueeze(0).to(device)
             sub_len = torch.LongTensor([cut_e - cut_s]).to(device)
             sub_har = source[:, :, cut_s *
+                                   hop_size:cut_e * hop_size].to(device)
             sub_out = model.inference(sub_ppg, sub_pit, spk, sub_len, sub_har)
             sub_out = sub_out[0, 0].data.cpu().detach().numpy()
         audio = librosa.to_mono(audio.transpose(1, 0))
     if sampling_rate != 16000:
         audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
+    if len(audio) > 16000 * 100:
+        audio = audio[:16000 * 100]
     wav_path = "temp.wav"
     soundfile.write(wav_path, audio, 16000, format="wav")
     out_audio = svc_change(wav_path, f"configs/singers/singer00{sid}.npy")
     return "Success", (48000, out_audio)
+def auto_search(name):
+    config = {'logfilepath': 'musicdl.log', 'savedir': 'downloaded', 'search_size_per_source': 5, 'proxies': {}}
+    albums = get_albums(keywords=name, config=config)
+    album = random.choice(albums)
+    save_path = get_random_spit(album)
+    return save_path
 app = gr.Blocks()
 with app:
+    title = "Singer Voice Clone 0.1 Demo"
+    desc = """ small singer voice clone Demo App. <br />
+               Enter keywords auto search music to clone or upload music yourself
+               It's just a simplified demo, you can use more advanced features optimize music quality <br />"""
+    tutorial_link = "https://docs.cworld.ai"
+    gr.HTML(
+        f"""
+            <div style="text-align: center; margin: 0 auto;">
+              <div
+                style="
+                  display: inline-flex;
+                  align-items: center;
+                  gap: 0.8rem;
+                  font-size: 1.75rem;
+                "
+              >
+                <svg height="100%" stroke-miterlimit="10" style="fill-rule:nonzero;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;" version="1.1" viewBox="0 0 100 100" width="100%" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+                    <defs/>
+                    <clipPath id="ArtboardFrame">
+                    <rect height="100" width="100" x="0" y="0"/>
+                    </clipPath>
+                    <g clip-path="url(#ArtboardFrame)" id="SvgjsG2907">
+                    <g opacity="1">
+                    <g opacity="1">
+                    <path d="M49.5597 6.74187C73.4486 6.74187 92.893 26.1863 92.893 50.0752C92.893 73.9641 73.4486 93.4085 49.5597 93.4085C25.6708 93.4085 6.22637 73.9641 6.22637 50.0752C6.22637 26.1863 25.6708 6.74187 49.5597 6.74187M49.5597 0.075206C21.893 0.075206-0.440293 22.4085-0.440293 50.0752C-0.440293 77.7419 21.893 100.075 49.5597 100.075C77.2264 100.075 99.5597 77.7419 99.5597 50.0752C99.5597 22.4085 77.2264 0.075206 49.5597 0.075206L49.5597 0.075206Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
+                    <path d="M55.1153 77.853L44.0042 77.853L44.0042 72.2974C44.0042 69.1863 46.4486 66.7419 49.5597 66.7419L49.5597 66.7419C52.6708 66.7419 55.1153 69.1863 55.1153 72.2974L55.1153 77.853Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
+                    <path d="M21.7819 33.4085L32.893 33.4085L32.893 33.4085L32.893 55.6308L32.893 55.6308L21.7819 55.6308L21.7819 55.6308L21.7819 33.4085L21.7819 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
+                    <path d="M66.2264 33.4085L77.3375 33.4085L77.3375 33.4085L77.3375 55.6308L77.3375 55.6308L66.2264 55.6308L66.2264 55.6308L66.2264 33.4085L66.2264 33.4085Z" fill="#111111" fill-rule="nonzero" opacity="1" stroke="none"/>
+                    </g>
+                    </g>
+                    </g>
+                </svg>
+                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
+                  {title}
+                </h1>
+              </div>
+              <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
+                {desc}
+                There is the <a href="{tutorial_link}"> tutorial </a>
+              </p>
+            </div>
+        """
+    )
+    sid = gr.Dropdown(label="Singer", choices=["22", "33", "47", "51"], value="47")
+    vc_input2 = gr.Textbox(label="Music Name")
+    vc_search = gr.Button("Auto Search", variant="primary")
+    vc_input3 = gr.Audio(label="Upload Music Yourself")
+    vc_search.click(auto_search, [vc_input2], [vc_input3])
+    vc_submit = gr.Button("Convert", variant="primary")
+    vc_output1 = gr.Textbox(label="Run Status")
+    vc_output2 = gr.Audio(label="Result Audio")
+    vc_submit.click(svc_main, [sid, vc_input3], [vc_output1, vc_output2])
     app.launch()

music/__init__.py ADDED Viewed

File without changes

music/search.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os.path
+import random
+from musicdl import musicdl
+from musicdl.modules import Downloader
+from pydub import AudioSegment
+def is_integer(string):
+    if string.isdigit():
+        return int(string)
+    else:
+        return 0
+def is_numeric(string):
+    if string.isdigit():
+        return True
+    if string.count('.') == 1:
+        integer_part, decimal_part = string.split('.')
+        if integer_part.isdigit() and decimal_part.isdigit():
+            return True
+    return False
+def time_to_seconds(time_string):
+    hours, minutes, seconds = map(lambda x: is_integer(x), time_string.split(':'))
+    total_seconds = hours * 3600 + minutes * 60 + seconds
+    return total_seconds
+def size_to_int(size_string):
+    prefix_size_str = size_string[:-2]  # 去除最后的单位部分，转换为浮点数
+    if not is_numeric(prefix_size_str):
+        return 5.1 * 1024 * 1024
+    unit = size_string[-2:]  # 获取单位部分
+    size = float(prefix_size_str)
+    if unit == 'KB':
+        size *= 1024  # 转换为字节
+    elif unit == 'MB':
+        size *= 1024 * 1024
+    elif unit == 'GB':
+        size *= 1024 * 1024 * 1024
+    elif unit == 'TB':
+        size *= 1024 * 1024 * 1024 * 1024
+    return int(size)  # 转换为整数
+def get_albums(keywords, config):
+    target_srcs = [
+        'kugou', 'kuwo', 'qqmusic', 'qianqian', 'fivesing',
+        'netease', 'migu', 'joox', 'yiting',
+    ]
+    client = musicdl.musicdl(config=config)
+    results = client.search(keywords, target_srcs)
+    albums_set = set()
+    valid_albums = []
+    for albums in results.values():
+        if len(albums) == 0:
+            continue
+        for album in albums:
+            if album['songname'] in albums_set:
+                continue
+            if album['ext'] != 'mp3':
+                continue
+            if size_to_int(album['filesize']) > 5 * 1024 * 1024:
+                continue
+            if time_to_seconds(album['duration']) > 300:
+                continue
+            else:
+                albums_set.add(album['songname'])
+                valid_albums.append(album)
+    return valid_albums
+def get_random_spit(songinfo):
+    d = Downloader(songinfo)
+    d.start()
+    save_path = os.path.join(songinfo["savedir"], f"{songinfo['savename']}.{songinfo['ext']}")
+    song = AudioSegment.from_mp3(save_path)
+    # pydub does things in milliseconds
+    length = len(song)
+    left_idx = length / 2 - 15 * 1000
+    right_idx = length / 2 + 15 * 1000
+    if left_idx < 0:
+        left_idx = 0
+    if right_idx > length:
+        right_idx = length
+    middle_30s = song[left_idx:right_idx]
+    middle_30s.export(save_path, format="mp3")

requirements.txt CHANGED Viewed

@@ -13,3 +13,5 @@ torchcrepe
 transformers
 tqdm
 librosa

 transformers
 tqdm
 librosa
+pydub
+musicdl