Florian Lux commited on
Commit
49696ae
β€’
1 Parent(s): c8c05d4

try it one more time with speaker

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. app.py +13 -11
  3. packages.txt +1 -0
.gitignore CHANGED
@@ -11,4 +11,5 @@ audios/
11
  *playground*
12
  *.json
13
  .tmp/
14
- .vscode/
 
 
11
  *playground*
12
  *.json
13
  .tmp/
14
+ .vscode/
15
+ Models/
app.py CHANGED
@@ -1,15 +1,14 @@
1
  import os
2
 
3
- import gdown
4
  import gradio as gr
5
  import numpy as np
6
  import torch
7
 
8
  from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
9
- import os
10
 
11
  os.system("pip uninstall -y gradio")
12
- os.system("pip install gradio==2.7.5")
 
13
 
14
  def float2pcm(sig, dtype='int16'):
15
  """
@@ -30,16 +29,10 @@ def float2pcm(sig, dtype='int16'):
30
  class TTS_Interface:
31
 
32
  def __init__(self):
33
- os.makedirs("Models/HiFiGAN_combined", exist_ok=True)
34
- os.makedirs("Models/FastSpeech2_Meta", exist_ok=True)
35
- if not os.path.exists("Models/FastSpeech2_Meta/best.pt"):
36
- gdown.download(id="1-AhjmCR6DDI6rtzPIn9ksOxQyHKf6CbG", output="Models/FastSpeech2_Meta/best.pt")
37
- if not os.path.exists("Models/HiFiGAN_combined/best.pt"):
38
- gdown.download(id="1-5sP-0JDUvKTjxhO3hUVJgArSUjuhU6P", output="Models/HiFiGAN_combined/best.pt")
39
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
40
  self.model = Meta_FastSpeech2(device=self.device)
41
 
42
- def read(self, prompt, language):
43
  language_id_lookup = {
44
  "English" : "en",
45
  "German" : "de",
@@ -52,6 +45,11 @@ class TTS_Interface:
52
  "French" : "fr"
53
  }
54
  self.model.set_language(language_id_lookup[language])
 
 
 
 
 
55
  wav = self.model(prompt)
56
  return 48000, float2pcm(wav.cpu().numpy())
57
 
@@ -69,7 +67,11 @@ iface = gr.Interface(fn=meta_model.read,
69
  'Russian',
70
  'Hungarian',
71
  'Dutch',
72
- 'French'], type="value", default='English', label="Language Selection")],
 
 
 
 
73
  outputs=gr.outputs.Audio(type="numpy", label=None),
74
  layout="vertical",
75
  title="IMS Toucan Multilingual Multispeaker Demo",
 
1
  import os
2
 
 
3
  import gradio as gr
4
  import numpy as np
5
  import torch
6
 
7
  from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
 
8
 
9
  os.system("pip uninstall -y gradio")
10
+ os.system("pip install gradio==2.7.5.2")
11
+
12
 
13
  def float2pcm(sig, dtype='int16'):
14
  """
 
29
  class TTS_Interface:
30
 
31
  def __init__(self):
 
 
 
 
 
 
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
  self.model = Meta_FastSpeech2(device=self.device)
34
 
35
+ def read(self, prompt, language, path_to_audio):
36
  language_id_lookup = {
37
  "English" : "en",
38
  "German" : "de",
 
45
  "French" : "fr"
46
  }
47
  self.model.set_language(language_id_lookup[language])
48
+ if path_to_audio is not None:
49
+ try:
50
+ self.model.set_utterance_embedding(path_to_audio)
51
+ except RuntimeError:
52
+ pass
53
  wav = self.model(prompt)
54
  return 48000, float2pcm(wav.cpu().numpy())
55
 
 
67
  'Russian',
68
  'Hungarian',
69
  'Dutch',
70
+ 'French'], type="value", default='English', label="Language Selection"),
71
+ gr.inputs.Audio(source="microphone",
72
+ optional=True,
73
+ label="Make the TTS imitate your Voice (optional, press once to start recording and again to stop)",
74
+ type="filepath")],
75
  outputs=gr.outputs.Audio(type="numpy", label=None),
76
  layout="vertical",
77
  title="IMS Toucan Multilingual Multispeaker Demo",
packages.txt CHANGED
@@ -1,2 +1,3 @@
1
  libsndfile1
2
  espeak-ng
 
 
1
  libsndfile1
2
  espeak-ng
3
+ ffmpeg