TangRain commited on
Commit
8db2907
·
1 Parent(s): 9794c19

add mos prediction

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import numpy as np
3
  import gradio as gr
4
  import pyopenjtalk
@@ -156,7 +158,14 @@ def gen_song(model_name, spk, texts, durs, pitchs):
156
  spk_embed = np.load(singer_embeddings[model_name][spk])
157
  output_dict = svs(batch, lids=lid, spembs=spk_embed)
158
  wav_info = output_dict["wav"].cpu().numpy()
159
- return (fs, wav_info), "success!"
 
 
 
 
 
 
 
160
 
161
 
162
  # SP: silence, AP: aspirate.
@@ -276,6 +285,7 @@ Music score usually includes lyrics, as well as duration and pitch of each word
276
  with gr.Column(variant="panel"):
277
  gened_song = gr.Audio(label="Generated Song", type="numpy")
278
  run_status = gr.Textbox(label="Running Status")
 
279
 
280
  gr.Examples(
281
  examples=examples,
@@ -300,7 +310,7 @@ Music score usually includes lyrics, as well as duration and pitch of each word
300
  generate.click(
301
  fn=gen_song,
302
  inputs=[model_name, singer, lyrics, duration, pitch],
303
- outputs=[gened_song, run_status],
304
  )
305
 
306
  demo.launch()
 
1
  import os
2
+ import torch
3
+ import librosa
4
  import numpy as np
5
  import gradio as gr
6
  import pyopenjtalk
 
158
  spk_embed = np.load(singer_embeddings[model_name][spk])
159
  output_dict = svs(batch, lids=lid, spembs=spk_embed)
160
  wav_info = output_dict["wav"].cpu().numpy()
161
+
162
+ # mos prediction with sr=16k
163
+ predictor = torch.hub.load("South-Twilight/SingMOS:v0.2.0", "singing_ssl_mos", trust_repo=True)
164
+ wav_mos = librosa.resample(wav_info, orig_sr=fs, target_sr=16000)
165
+ wav_mos = torch.from_numpy(wav_mos).unsqueeze(0)
166
+ len_mos = torch.tensor([wav_mos.shape[1]])
167
+ score = predictor(wav_mos, len_mos)
168
+ return (fs, wav_info), "success!", round(score.item(), 2)
169
 
170
 
171
  # SP: silence, AP: aspirate.
 
285
  with gr.Column(variant="panel"):
286
  gened_song = gr.Audio(label="Generated Song", type="numpy")
287
  run_status = gr.Textbox(label="Running Status")
288
+ pred_mos = gr.Textbox(label=" Pseudo MOS")
289
 
290
  gr.Examples(
291
  examples=examples,
 
310
  generate.click(
311
  fn=gen_song,
312
  inputs=[model_name, singer, lyrics, duration, pitch],
313
+ outputs=[gened_song, run_status, pred_mos],
314
  )
315
 
316
  demo.launch()