jbetker commited on
Commit
9acce23
1 Parent(s): a4cda68
.gitignore CHANGED
@@ -129,7 +129,6 @@ dmypy.json
129
  .pyre/
130
 
131
  .idea/*
132
- tortoise/.models/*
133
- tortoise/random_voices/*
134
  .custom/*
135
  results/*
 
129
  .pyre/
130
 
131
  .idea/*
132
+ .models/*
 
133
  .custom/*
134
  results/*
tortoise/models/arch_util.py CHANGED
@@ -290,7 +290,7 @@ class AudioMiniEncoder(nn.Module):
290
 
291
  class TorchMelSpectrogram(nn.Module):
292
  def __init__(self, filter_length=1024, hop_length=256, win_length=1024, n_mel_channels=80, mel_fmin=0, mel_fmax=8000,
293
- sampling_rate=22050, normalize=False, mel_norm_file='data/mel_norms.pth'):
294
  super().__init__()
295
  # These are the default tacotron values for the MEL spectrogram.
296
  self.filter_length = filter_length
 
290
 
291
  class TorchMelSpectrogram(nn.Module):
292
  def __init__(self, filter_length=1024, hop_length=256, win_length=1024, n_mel_channels=80, mel_fmin=0, mel_fmax=8000,
293
+ sampling_rate=22050, normalize=False, mel_norm_file='tortoise/data/mel_norms.pth'):
294
  super().__init__()
295
  # These are the default tacotron values for the MEL spectrogram.
296
  self.filter_length = filter_length
tortoise/read.py CHANGED
@@ -28,7 +28,7 @@ def split_and_recombine_text(texts, desired_length=200, max_len=300):
28
 
29
  if __name__ == '__main__':
30
  parser = argparse.ArgumentParser()
31
- parser.add_argument('--textfile', type=str, help='A file containing the text to read.', default="data/riding_hood.txt")
32
  parser.add_argument('--voice', type=str, help='Selects the voice to use for generation. See options in voices/ directory (and add your own!) '
33
  'Use the & character to join two voices together. Use a comma to perform inference on multiple voices.', default='pat')
34
  parser.add_argument('--output_path', type=str, help='Where to store outputs.', default='../results/longform/')
 
28
 
29
  if __name__ == '__main__':
30
  parser = argparse.ArgumentParser()
31
+ parser.add_argument('--textfile', type=str, help='A file containing the text to read.', default="tortoise/data/riding_hood.txt")
32
  parser.add_argument('--voice', type=str, help='Selects the voice to use for generation. See options in voices/ directory (and add your own!) '
33
  'Use the & character to join two voices together. Use a comma to perform inference on multiple voices.', default='pat')
34
  parser.add_argument('--output_path', type=str, help='Where to store outputs.', default='../results/longform/')
tortoise/utils/audio.py CHANGED
@@ -82,10 +82,10 @@ def dynamic_range_decompression(x, C=1):
82
 
83
 
84
  def get_voices():
85
- subs = os.listdir('voices')
86
  voices = {}
87
  for sub in subs:
88
- subj = os.path.join('voices', sub)
89
  if os.path.isdir(subj):
90
  voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.pth'))
91
  return voices
 
82
 
83
 
84
  def get_voices():
85
+ subs = os.listdir('tortoise/voices')
86
  voices = {}
87
  for sub in subs:
88
+ subj = os.path.join('tortoise/voices', sub)
89
  if os.path.isdir(subj):
90
  voices[sub] = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.pth'))
91
  return voices
tortoise/utils/tokenizer.py CHANGED
@@ -164,7 +164,7 @@ def lev_distance(s1, s2):
164
  return distances[-1]
165
 
166
  class VoiceBpeTokenizer:
167
- def __init__(self, vocab_file='data/tokenizer.json'):
168
  if vocab_file is not None:
169
  self.tokenizer = Tokenizer.from_file(vocab_file)
170
 
 
164
  return distances[-1]
165
 
166
  class VoiceBpeTokenizer:
167
+ def __init__(self, vocab_file='tortoise/data/tokenizer.json'):
168
  if vocab_file is not None:
169
  self.tokenizer = Tokenizer.from_file(vocab_file)
170