mrq commited on
Commit
faa8da1
·
1 Parent(s): 02beb1d

modified logic to determine valid voice folders, also allows subdirs within the folder (for example: ./voices/SH/james/ will be named SH/james)

Browse files
Files changed (2) hide show
  1. src/utils.py +47 -5
  2. src/webui.py +2 -5
src/utils.py CHANGED
@@ -32,6 +32,7 @@ import gradio as gr
32
  import gradio.utils
33
  import pandas as pd
34
 
 
35
  from datetime import datetime
36
  from datetime import timedelta
37
 
@@ -1709,7 +1710,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
1709
 
1710
  results = {}
1711
 
1712
- files = sorted( get_voices(load_latents=False)[voice] )
1713
  indir = f'./training/{voice}/'
1714
  infile = f'{indir}/whisper.json'
1715
 
@@ -2104,9 +2105,15 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
2104
  phn_file = jobs['phonemize'][0][i]
2105
  normalized = jobs['phonemize'][1][i]
2106
 
2107
- phonemized = valle_phonemize( normalized )
2108
- open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
2109
- print("Phonemized:", phn_file)
 
 
 
 
 
 
2110
 
2111
  training_joined = "\n".join(lines['training'])
2112
  validation_joined = "\n".join(lines['validation'])
@@ -2431,12 +2438,47 @@ def import_voices(files, saveAs=None, progress=None):
2431
  def relative_paths( dirs ):
2432
  return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
2433
 
 
 
 
 
 
 
 
 
 
 
2434
  def get_voice_list(dir=get_voice_dir(), append_defaults=False):
2435
  defaults = [ "random", "microphone" ]
2436
  os.makedirs(dir, exist_ok=True)
2437
- res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2438
  if append_defaults:
2439
  res = res + defaults
 
2440
  return res
2441
 
2442
  def get_valle_models(dir="./training/"):
 
32
  import gradio.utils
33
  import pandas as pd
34
 
35
+ from glob import glob
36
  from datetime import datetime
37
  from datetime import timedelta
38
 
 
1710
 
1711
  results = {}
1712
 
1713
+ files = get_voice(voice, load_latents=False)
1714
  indir = f'./training/{voice}/'
1715
  infile = f'{indir}/whisper.json'
1716
 
 
2105
  phn_file = jobs['phonemize'][0][i]
2106
  normalized = jobs['phonemize'][1][i]
2107
 
2108
+ try:
2109
+ phonemized = valle_phonemize( normalized )
2110
+ open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
2111
+ print("Phonemized:", phn_file)
2112
+ except Exception as e:
2113
+ message = f"Failed to phonemize: {phn_file}: {normalized}"
2114
+ messages.append(message)
2115
+ print(message)
2116
+
2117
 
2118
  training_joined = "\n".join(lines['training'])
2119
  validation_joined = "\n".join(lines['validation'])
 
2438
  def relative_paths( dirs ):
2439
  return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
2440
 
2441
+ def get_voice( name, dir=get_voice_dir(), load_latents=True ):
2442
+ subj = f'{dir}/{name}/'
2443
+ if not os.path.isdir(subj):
2444
+ return
2445
+
2446
+ voice = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.flac'))
2447
+ if load_latents:
2448
+ voice = voice + list(glob(f'{subj}/*.pth'))
2449
+ return sorted( voice )
2450
+
2451
  def get_voice_list(dir=get_voice_dir(), append_defaults=False):
2452
  defaults = [ "random", "microphone" ]
2453
  os.makedirs(dir, exist_ok=True)
2454
+ #res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
2455
+
2456
+ res = []
2457
+ for name in os.listdir(dir):
2458
+ if name in defaults:
2459
+ continue
2460
+ if not os.path.isdir(f'{dir}/{name}'):
2461
+ continue
2462
+ if len(os.listdir(os.path.join(dir, name))) == 0:
2463
+ continue
2464
+ files = get_voice( name, dir=dir )
2465
+
2466
+ if len(files) > 0:
2467
+ res.append(name)
2468
+ else:
2469
+ for subdir in os.listdir(f'{dir}/{name}'):
2470
+ if not os.path.isdir(f'{dir}/{name}/{subdir}'):
2471
+ continue
2472
+ files = get_voice( f'{name}/{subdir}', dir=dir )
2473
+ if len(files) == 0:
2474
+ continue
2475
+ res.append(f'{name}/{subdir}')
2476
+
2477
+ res = sorted(res)
2478
+
2479
  if append_defaults:
2480
  res = res + defaults
2481
+
2482
  return res
2483
 
2484
  def get_valle_models(dir="./training/"):
src/webui.py CHANGED
@@ -201,7 +201,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
201
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
202
 
203
  messages = []
204
- files = sorted( get_voices(load_latents=False)[voice] )
205
  for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
206
  diarization = pipeline(file)
207
  for turn, _, speaker in diarization.itertracks(yield_label=True):
@@ -217,15 +217,12 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
217
  messages = []
218
  voices = get_voice_list()
219
 
220
- """
221
- for voice in voices:
222
- message = prepare_dataset_proxy(voice, **kwargs)
223
- messages.append(message)
224
  """
225
  for voice in voices:
226
  print("Processing:", voice)
227
  message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
228
  messages.append(message)
 
229
 
230
  if slice_audio:
231
  for voice in voices:
 
201
  pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
202
 
203
  messages = []
204
+ files = get_voice(voice, load_latents=False)
205
  for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
206
  diarization = pipeline(file)
207
  for turn, _, speaker in diarization.itertracks(yield_label=True):
 
217
  messages = []
218
  voices = get_voice_list()
219
 
 
 
 
 
220
  """
221
  for voice in voices:
222
  print("Processing:", voice)
223
  message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
224
  messages.append(message)
225
+ """
226
 
227
  if slice_audio:
228
  for voice in voices: