Ionut-Bostan commited on
Commit
d197937
1 Parent(s): c6ab084

allowing model to synthesize samples using the CPU

Browse files
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. app.py +16 -3
  3. output/.DS_Store +0 -0
  4. output/ckpt/.DS_Store +0 -0
  5. utils/.DS_Store +0 -0
  6. utils/model.py +5 -5
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -1,7 +1,20 @@
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
1
  import gradio as gr
2
+ import subprocess
3
 
4
+ predefined_texts = ["Example text 1", "Example text 2", "Example text 3"]
 
5
 
6
+
7
+ def synthesize_speech(text, speaker_id):
8
+ command = f"python3 synthesize.py --text '{text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
9
+ output = subprocess.check_output(command, shell=True)
10
+ # Replace this with the path of the generated audio file
11
+ audio_file = 'output_file_path'
12
+ return audio_file
13
+
14
+
15
+ iface = gr.Interface(fn=synthesize_speech,
16
+ inputs=[gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
17
+ gr.inputs.Slider(minimum=0, maximum=10, step=1, default=0, label="Speaker ID")],
18
+ outputs=gr.outputs.Audio(type="file"),
19
+ title="Text-to-Speech Demo")
20
  iface.launch()
output/.DS_Store CHANGED
Binary files a/output/.DS_Store and b/output/.DS_Store differ
 
output/ckpt/.DS_Store CHANGED
Binary files a/output/ckpt/.DS_Store and b/output/ckpt/.DS_Store differ
 
utils/.DS_Store CHANGED
Binary files a/utils/.DS_Store and b/utils/.DS_Store differ
 
utils/model.py CHANGED
@@ -17,8 +17,8 @@ def get_model(args, configs, device, train=False):
17
  train_config["path"]["ckpt_path"],
18
  "{}.pth.tar".format(args.restore_step),
19
  )
20
- ckpt = torch.load(ckpt_path)
21
- model.load_state_dict(ckpt["model"], strict=False)
22
 
23
  if train:
24
  scheduled_optim = ScheduledOptim(
@@ -50,7 +50,7 @@ def get_vocoder(config, device):
50
  )
51
  elif speaker == "universal":
52
  vocoder = torch.hub.load(
53
- "descriptinc/melgan-neurips", "load_melgan", "multi_speaker"
54
  )
55
  vocoder.mel2wav.eval()
56
  vocoder.mel2wav.to(device)
@@ -60,9 +60,9 @@ def get_vocoder(config, device):
60
  config = hifigan.AttrDict(config)
61
  vocoder = hifigan.Generator(config)
62
  if speaker == "LJSpeech":
63
- ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar")
64
  elif speaker == "universal":
65
- ckpt = torch.load("hifigan/generator_universal.pth.tar")
66
  vocoder.load_state_dict(ckpt["generator"])
67
  vocoder.eval()
68
  vocoder.remove_weight_norm()
 
17
  train_config["path"]["ckpt_path"],
18
  "{}.pth.tar".format(args.restore_step),
19
  )
20
+ ckpt = torch.load(ckpt_path, map_location=device)
21
+ model.load_state_dict(ckpt["model"])
22
 
23
  if train:
24
  scheduled_optim = ScheduledOptim(
 
50
  )
51
  elif speaker == "universal":
52
  vocoder = torch.hub.load(
53
+ "descriptinc/melgan-neurips", "load_melgan", "multi_speaker",map_location=device
54
  )
55
  vocoder.mel2wav.eval()
56
  vocoder.mel2wav.to(device)
 
60
  config = hifigan.AttrDict(config)
61
  vocoder = hifigan.Generator(config)
62
  if speaker == "LJSpeech":
63
+ ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar",map_location=device)
64
  elif speaker == "universal":
65
+ ckpt = torch.load("hifigan/generator_universal.pth.tar",map_location=device)
66
  vocoder.load_state_dict(ckpt["generator"])
67
  vocoder.eval()
68
  vocoder.remove_weight_norm()