add mos prediction
Browse files
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import numpy as np
|
3 |
import gradio as gr
|
4 |
import pyopenjtalk
|
@@ -156,7 +158,14 @@ def gen_song(model_name, spk, texts, durs, pitchs):
|
|
156 |
spk_embed = np.load(singer_embeddings[model_name][spk])
|
157 |
output_dict = svs(batch, lids=lid, spembs=spk_embed)
|
158 |
wav_info = output_dict["wav"].cpu().numpy()
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
|
162 |
# SP: silence, AP: aspirate.
|
@@ -276,6 +285,7 @@ Music score usually includes lyrics, as well as duration and pitch of each word
|
|
276 |
with gr.Column(variant="panel"):
|
277 |
gened_song = gr.Audio(label="Generated Song", type="numpy")
|
278 |
run_status = gr.Textbox(label="Running Status")
|
|
|
279 |
|
280 |
gr.Examples(
|
281 |
examples=examples,
|
@@ -300,7 +310,7 @@ Music score usually includes lyrics, as well as duration and pitch of each word
|
|
300 |
generate.click(
|
301 |
fn=gen_song,
|
302 |
inputs=[model_name, singer, lyrics, duration, pitch],
|
303 |
-
outputs=[gened_song, run_status],
|
304 |
)
|
305 |
|
306 |
demo.launch()
|
|
|
1 |
import os
|
2 |
+
import torch
|
3 |
+
import librosa
|
4 |
import numpy as np
|
5 |
import gradio as gr
|
6 |
import pyopenjtalk
|
|
|
158 |
spk_embed = np.load(singer_embeddings[model_name][spk])
|
159 |
output_dict = svs(batch, lids=lid, spembs=spk_embed)
|
160 |
wav_info = output_dict["wav"].cpu().numpy()
|
161 |
+
|
162 |
+
# mos prediction with sr=16k
|
163 |
+
predictor = torch.hub.load("South-Twilight/SingMOS:v0.2.0", "singing_ssl_mos", trust_repo=True)
|
164 |
+
wav_mos = librosa.resample(wav_info, orig_sr=fs, target_sr=16000)
|
165 |
+
wav_mos = torch.from_numpy(wav_mos).unsqueeze(0)
|
166 |
+
len_mos = torch.tensor([wav_mos.shape[1]])
|
167 |
+
score = predictor(wav_mos, len_mos)
|
168 |
+
return (fs, wav_info), "success!", round(score.item(), 2)
|
169 |
|
170 |
|
171 |
# SP: silence, AP: aspirate.
|
|
|
285 |
with gr.Column(variant="panel"):
|
286 |
gened_song = gr.Audio(label="Generated Song", type="numpy")
|
287 |
run_status = gr.Textbox(label="Running Status")
|
288 |
+
pred_mos = gr.Textbox(label=" Pseudo MOS")
|
289 |
|
290 |
gr.Examples(
|
291 |
examples=examples,
|
|
|
310 |
generate.click(
|
311 |
fn=gen_song,
|
312 |
inputs=[model_name, singer, lyrics, duration, pitch],
|
313 |
+
outputs=[gened_song, run_status, pred_mos],
|
314 |
)
|
315 |
|
316 |
demo.launch()
|