Ahsen Khaliq commited on
Commit
7f7f412
1 Parent(s): 5af8374

Update demo_cli.py

Browse files
Files changed (1) hide show
  1. demo_cli.py +12 -18
demo_cli.py CHANGED
@@ -20,26 +20,21 @@ if __name__ == '__main__':
20
  formatter_class=argparse.ArgumentDefaultsHelpFormatter
21
  )
22
  parser.add_argument("-e", "--enc_model_fpath", type=Path,
23
- default="encoder/saved_models/pretrained.pt",
24
  help="Path to a saved encoder")
25
  parser.add_argument("-s", "--syn_model_fpath", type=Path,
26
- default="synthesizer/saved_models/pretrained/pretrained.pt",
27
  help="Path to a saved synthesizer")
28
  parser.add_argument("-v", "--voc_model_fpath", type=Path,
29
- default="vocoder/saved_models/pretrained/pretrained.pt",
30
  help="Path to a saved vocoder")
31
- parser.add_argument("--cpu", action="store_true", help=\
32
- "If True, processing is done on CPU, even when a GPU is available.")
33
- parser.add_argument("--no_sound", action="store_true", help=\
34
- "If True, audio won't be played.")
35
- parser.add_argument("--seed", type=int, default=None, help=\
36
- "Optional random number seed value to make toolbox deterministic.")
37
- parser.add_argument("--no_mp3_support", action="store_true", help=\
38
- "If True, disallows loading mp3 files to prevent audioread errors when ffmpeg is not installed.")
39
  parser.add_argument("-audio", "--audio_path", type=Path, required = True,
40
  help="Path to a audio file")
41
- parser.add_argument("--text", type=str, required = True, help=\
42
- "Text Input")
43
  args = parser.parse_args()
44
  print_args(args, parser)
45
  if not args.no_sound:
@@ -95,7 +90,7 @@ if __name__ == '__main__':
95
  # The sampling rate is the number of values (samples) recorded per second, it is set to
96
  # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond
97
  # to an audio of 1 second.
98
- print("\tTesting the encoder...")
99
  encoder.embed_utterance(np.zeros(encoder.sampling_rate))
100
 
101
  # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
@@ -109,7 +104,7 @@ if __name__ == '__main__':
109
  # illustrate that
110
  embeds = [embed, np.zeros(speaker_embedding_size)]
111
  texts = ["test 1", "test 2"]
112
- print("\tTesting the synthesizer... (loading the model will output a lot of text)")
113
  mels = synthesizer.synthesize_spectrograms(texts, embeds)
114
 
115
  # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We
@@ -118,7 +113,7 @@ if __name__ == '__main__':
118
  # The vocoder can take a callback function to display the generation. More on that later. For
119
  # now we'll simply hide it like this:
120
  no_action = lambda *args: None
121
- print("\tTesting the vocoder...")
122
  # For the sake of making this test short, we'll pass a short target length. The target length
123
  # is the length of the wav segments that are processed in parallel. E.g. for audio sampled
124
  # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
@@ -139,8 +134,7 @@ if __name__ == '__main__':
139
  # while True:
140
  try:
141
  # Get the reference audio filepath
142
- message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \
143
- "wav, m4a, flac, ...):\n"
144
  in_fpath = args.audio_path
145
 
146
  if in_fpath.suffix.lower() == ".mp3" and args.no_mp3_support:
 
20
  formatter_class=argparse.ArgumentDefaultsHelpFormatter
21
  )
22
  parser.add_argument("-e", "--enc_model_fpath", type=Path,
23
+ default="encpretrained.pt",
24
  help="Path to a saved encoder")
25
  parser.add_argument("-s", "--syn_model_fpath", type=Path,
26
+ default="synpretrained.pt",
27
  help="Path to a saved synthesizer")
28
  parser.add_argument("-v", "--voc_model_fpath", type=Path,
29
+ default="vocpretrained.pt",
30
  help="Path to a saved vocoder")
31
+ parser.add_argument("--cpu", action="store_true", help=\\n "If True, processing is done on CPU, even when a GPU is available.")
32
+ parser.add_argument("--no_sound", action="store_true", help=\\n "If True, audio won't be played.")
33
+ parser.add_argument("--seed", type=int, default=None, help=\\n "Optional random number seed value to make toolbox deterministic.")
34
+ parser.add_argument("--no_mp3_support", action="store_true", help=\\n "If True, disallows loading mp3 files to prevent audioread errors when ffmpeg is not installed.")
 
 
 
 
35
  parser.add_argument("-audio", "--audio_path", type=Path, required = True,
36
  help="Path to a audio file")
37
+ parser.add_argument("--text", type=str, required = True, help=\\n "Text Input")
 
38
  args = parser.parse_args()
39
  print_args(args, parser)
40
  if not args.no_sound:
 
90
  # The sampling rate is the number of values (samples) recorded per second, it is set to
91
  # 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond
92
  # to an audio of 1 second.
93
+ print(" Testing the encoder...")
94
  encoder.embed_utterance(np.zeros(encoder.sampling_rate))
95
 
96
  # Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
 
104
  # illustrate that
105
  embeds = [embed, np.zeros(speaker_embedding_size)]
106
  texts = ["test 1", "test 2"]
107
+ print(" Testing the synthesizer... (loading the model will output a lot of text)")
108
  mels = synthesizer.synthesize_spectrograms(texts, embeds)
109
 
110
  # The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We
 
113
  # The vocoder can take a callback function to display the generation. More on that later. For
114
  # now we'll simply hide it like this:
115
  no_action = lambda *args: None
116
+ print(" Testing the vocoder...")
117
  # For the sake of making this test short, we'll pass a short target length. The target length
118
  # is the length of the wav segments that are processed in parallel. E.g. for audio sampled
119
  # at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
 
134
  # while True:
135
  try:
136
  # Get the reference audio filepath
137
+ message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \\n "wav, m4a, flac, ...):\n"
 
138
  in_fpath = args.audio_path
139
 
140
  if in_fpath.suffix.lower() == ".mp3" and args.no_mp3_support: