jbetker commited on
Commit
9db06e1
1 Parent(s): cdc26b5

param improvements from investigation

Browse files
Files changed (2) hide show
  1. eval_multiple.py +4 -3
  2. sweep.py +7 -6
eval_multiple.py CHANGED
@@ -7,7 +7,7 @@ from utils.audio import load_audio
7
 
8
  if __name__ == '__main__':
9
  fname = 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv'
10
- outpath = 'D:\\tmp\\tortoise-tts-eval\\redo_outlier'
11
  outpath_real = 'D:\\tmp\\tortoise-tts-eval\\real'
12
 
13
  os.makedirs(outpath, exist_ok=True)
@@ -24,8 +24,9 @@ if __name__ == '__main__':
24
  path = os.path.join(os.path.dirname(fname), line[1])
25
  cond_audio = load_audio(path, 22050)
26
  torchaudio.save(os.path.join(outpath_real, os.path.basename(line[1])), cond_audio, 22050)
27
- sample = tts.tts(transcript, [cond_audio, cond_audio], num_autoregressive_samples=256, k=1, diffusion_iterations=200, cond_free=False,
28
- top_k=None, top_p=.95, typical_sampling=False, temperature=.7, length_penalty=.5, repetition_penalty=1)
 
29
  down = torchaudio.functional.resample(sample, 24000, 22050)
30
  fout_path = os.path.join(outpath, os.path.basename(line[1]))
31
  torchaudio.save(fout_path, down.squeeze(0), 22050)
 
7
 
8
  if __name__ == '__main__':
9
  fname = 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv'
10
+ outpath = 'D:\\tmp\\tortoise-tts-eval\\attempt_best'
11
  outpath_real = 'D:\\tmp\\tortoise-tts-eval\\real'
12
 
13
  os.makedirs(outpath, exist_ok=True)
 
24
  path = os.path.join(os.path.dirname(fname), line[1])
25
  cond_audio = load_audio(path, 22050)
26
  torchaudio.save(os.path.join(outpath_real, os.path.basename(line[1])), cond_audio, 22050)
27
+ sample = tts.tts(transcript, [cond_audio, cond_audio], num_autoregressive_samples=512, k=1,
28
+ repetition_penalty=2.0, length_penalty=2, temperature=.5, top_p=.5,
29
+ diffusion_temperature=.7, cond_free_k=2, diffusion_iterations=400)
30
  down = torchaudio.functional.resample(sample, 24000, 22050)
31
  fout_path = os.path.join(outpath, os.path.basename(line[1]))
32
  torchaudio.save(fout_path, down.squeeze(0), 22050)
sweep.py CHANGED
@@ -25,18 +25,18 @@ def permutations(args):
25
 
26
  if __name__ == '__main__':
27
  fname = 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv'
28
- outpath_base = 'D:\\tmp\\tortoise-tts-eval\\std_sweep_diffusion'
29
  outpath_real = 'D:\\tmp\\tortoise-tts-eval\\real'
30
 
31
  arg_ranges = {
32
- 'diffusion_temperature': [.5, .7, 1],
33
- 'cond_free_k': [.5, 1, 2],
34
  }
35
  cfgs = permutations(arg_ranges)
36
  shuffle(cfgs)
37
 
38
  for cfg in cfgs:
39
- outpath = os.path.join(outpath_base, f'{cfg["cond_free_k"]}_{cfg["diffusion_temperature"]}')
40
  os.makedirs(outpath, exist_ok=True)
41
  os.makedirs(outpath_real, exist_ok=True)
42
  with open(fname, 'r', encoding='utf-8') as f:
@@ -51,8 +51,9 @@ if __name__ == '__main__':
51
  path = os.path.join(os.path.dirname(fname), line[1])
52
  cond_audio = load_audio(path, 22050)
53
  torchaudio.save(os.path.join(outpath_real, os.path.basename(line[1])), cond_audio, 22050)
54
- sample = tts.tts(transcript, [cond_audio, cond_audio], num_autoregressive_samples=256, k=1, diffusion_iterations=200, cond_free=False,
55
- repetition_penalty=1.5, length_penalty=2, temperature=.9, top_p=.9)
 
56
  down = torchaudio.functional.resample(sample, 24000, 22050)
57
  fout_path = os.path.join(outpath, os.path.basename(line[1]))
58
  torchaudio.save(fout_path, down.squeeze(0), 22050)
 
25
 
26
  if __name__ == '__main__':
27
  fname = 'Y:\\libritts\\test-clean\\transcribed-brief-w2v.tsv'
28
+ outpath_base = 'D:\\tmp\\tortoise-tts-eval\\std_sweep3'
29
  outpath_real = 'D:\\tmp\\tortoise-tts-eval\\real'
30
 
31
  arg_ranges = {
32
+ 'top_p': [.3,.4,.5,.6],
33
+ 'temperature': [.5, .6],
34
  }
35
  cfgs = permutations(arg_ranges)
36
  shuffle(cfgs)
37
 
38
  for cfg in cfgs:
39
+ outpath = os.path.join(outpath_base, f'{cfg["top_p"]}_{cfg["temperature"]}')
40
  os.makedirs(outpath, exist_ok=True)
41
  os.makedirs(outpath_real, exist_ok=True)
42
  with open(fname, 'r', encoding='utf-8') as f:
 
51
  path = os.path.join(os.path.dirname(fname), line[1])
52
  cond_audio = load_audio(path, 22050)
53
  torchaudio.save(os.path.join(outpath_real, os.path.basename(line[1])), cond_audio, 22050)
54
+ sample = tts.tts(transcript, [cond_audio, cond_audio], num_autoregressive_samples=256, k=1, diffusion_iterations=200,
55
+ repetition_penalty=2.0, length_penalty=2, temperature=.5, top_p=.5,
56
+ diffusion_temperature=.7, cond_free_k=2, **cfg)
57
  down = torchaudio.functional.resample(sample, 24000, 22050)
58
  fout_path = os.path.join(outpath, os.path.basename(line[1]))
59
  torchaudio.save(fout_path, down.squeeze(0), 22050)