ylacombe HF staff commited on
Commit
f251112
1 Parent(s): 372ba01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -15,7 +15,12 @@ def _grab_best_device(use_gpu=True):
15
  device = _grab_best_device()
16
 
17
  HUB_PATH = "ylacombe/vits_vctk_welsh_male"
18
- pipe = pipeline("text-to-speech", model=HUB_PATH, device=0)
 
 
 
 
 
19
 
20
  title = "# 🐶 VITS"
21
 
@@ -23,17 +28,29 @@ description = """
23
 
24
  """
25
 
26
- num_speakers = pipe.model.config.num_speakers
 
27
 
28
  # Inference
29
- def generate_audio(text):
 
 
 
 
 
 
 
30
 
31
  out = []
32
- for i in range(num_speakers):
33
  forward_params = {"speaker_id": i}
34
- output = pipe(text, forward_params=forward_params)
35
- out.append((output["sampling_rate"], output["audio"].squeeze()))
 
 
 
36
 
 
37
  return out
38
 
39
 
@@ -45,13 +62,31 @@ with gr.Blocks() as demo_blocks:
45
  with gr.Column():
46
  inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?")
47
  btn = gr.Button("Generate Audio!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  with gr.Column():
50
  outputs = []
51
- for i in range(num_speakers):
52
- out_audio = gr.Audio(type="numpy", autoplay=False, label=f"Generated Audio {i}", show_label=True)
53
  outputs.append(out_audio)
54
 
55
- btn.click(generate_audio, [inp_text], outputs)
 
56
 
57
- demo_blocks.launch()
 
15
  device = _grab_best_device()
16
 
17
  HUB_PATH = "ylacombe/vits_vctk_welsh_male"
18
+
19
+
20
+ pipe_dict = {
21
+ "current_model": "ylacombe/vits_vctk_welsh_male",
22
+ "pipe": pipeline("text-to-speech", model=HUB_PATH, device=0),
23
+ }
24
 
25
  title = "# 🐶 VITS"
26
 
 
28
 
29
  """
30
 
31
+ max_speakers = 15
32
+
33
 
34
  # Inference
35
+ def generate_audio(text, model_id):
36
+
37
+ if pipe_dict["current_model"] != model_id:
38
+ gr.Warning("Model has changed - loading new model")
39
+ pipe_dict["pipe"] = pipeline("text-to-speech", model=model_id, device=0)
40
+ pipe_dict["current_model"] = model_id
41
+
42
+ num_speakers = pipe_dict["pipe"].model.config.num_speakers
43
 
44
  out = []
45
+ for i in range(min(num_speakers, max_speakers)):
46
  forward_params = {"speaker_id": i}
47
+ output = pipe_dict["pipe"](text, forward_params=forward_params)
48
+
49
+ output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True,
50
+ visible=True)
51
+ out.append(output)
52
 
53
+ out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
54
  return out
55
 
56
 
 
62
  with gr.Column():
63
  inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?")
64
  btn = gr.Button("Generate Audio!")
65
+ model_id = gr.Dropdown(
66
+ [
67
+ "ylacombe/vits_vctk_welsh_male",
68
+ "ylacombe/vits_vctk_welsh_female",
69
+ "ylacombe/vits_ljs_welsh_male",
70
+ "ylacombe/vits_ljs_welsh_female",
71
+ "ylacombe/vits_vctk_irish_male",
72
+ "ylacombe/vits_vctk_scottish_female",
73
+ "ylacombe/vits_ljs_irish_male",
74
+ "ylacombe/vits_ljs_scottish_female",
75
+ "ylacombe/mms-tam-finetuned-multispeaker",
76
+ "ylacombe/mms-spa-finetuned-chilean-multispeaker",
77
+ ],
78
+ value="ylacombe/vits_vctk_welsh_male",
79
+ label="Model",
80
+ info="Model you want to test",
81
+ )
82
 
83
  with gr.Column():
84
  outputs = []
85
+ for i in range(max_speakers):
86
+ out_audio = gr.Audio(type="numpy", autoplay=False, label=f"Generated Audio - speaker {i}", show_label=True, visible=False)
87
  outputs.append(out_audio)
88
 
89
+ btn.click(generate_audio, [inp_text, model_id], outputs)
90
+
91
 
92
+ demo_blocks.launch(debug=True)