Spaces:
Build error
Build error
mrq
commited on
Commit
·
faa8da1
1
Parent(s):
02beb1d
modified logic to determine valid voice folders, also allows subdirs within the folder (for example: ./voices/SH/james/ will be named SH/james)
Browse files- src/utils.py +47 -5
- src/webui.py +2 -5
src/utils.py
CHANGED
@@ -32,6 +32,7 @@ import gradio as gr
|
|
32 |
import gradio.utils
|
33 |
import pandas as pd
|
34 |
|
|
|
35 |
from datetime import datetime
|
36 |
from datetime import timedelta
|
37 |
|
@@ -1709,7 +1710,7 @@ def transcribe_dataset( voice, language=None, skip_existings=False, progress=Non
|
|
1709 |
|
1710 |
results = {}
|
1711 |
|
1712 |
-
files =
|
1713 |
indir = f'./training/{voice}/'
|
1714 |
infile = f'{indir}/whisper.json'
|
1715 |
|
@@ -2104,9 +2105,15 @@ def prepare_dataset( voice, use_segments=False, text_length=0, audio_length=0, p
|
|
2104 |
phn_file = jobs['phonemize'][0][i]
|
2105 |
normalized = jobs['phonemize'][1][i]
|
2106 |
|
2107 |
-
|
2108 |
-
|
2109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
2110 |
|
2111 |
training_joined = "\n".join(lines['training'])
|
2112 |
validation_joined = "\n".join(lines['validation'])
|
@@ -2431,12 +2438,47 @@ def import_voices(files, saveAs=None, progress=None):
|
|
2431 |
def relative_paths( dirs ):
|
2432 |
return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
|
2433 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2434 |
def get_voice_list(dir=get_voice_dir(), append_defaults=False):
|
2435 |
defaults = [ "random", "microphone" ]
|
2436 |
os.makedirs(dir, exist_ok=True)
|
2437 |
-
res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2438 |
if append_defaults:
|
2439 |
res = res + defaults
|
|
|
2440 |
return res
|
2441 |
|
2442 |
def get_valle_models(dir="./training/"):
|
|
|
32 |
import gradio.utils
|
33 |
import pandas as pd
|
34 |
|
35 |
+
from glob import glob
|
36 |
from datetime import datetime
|
37 |
from datetime import timedelta
|
38 |
|
|
|
1710 |
|
1711 |
results = {}
|
1712 |
|
1713 |
+
files = get_voice(voice, load_latents=False)
|
1714 |
indir = f'./training/{voice}/'
|
1715 |
infile = f'{indir}/whisper.json'
|
1716 |
|
|
|
2105 |
phn_file = jobs['phonemize'][0][i]
|
2106 |
normalized = jobs['phonemize'][1][i]
|
2107 |
|
2108 |
+
try:
|
2109 |
+
phonemized = valle_phonemize( normalized )
|
2110 |
+
open(phn_file, 'w', encoding='utf-8').write(" ".join(phonemized))
|
2111 |
+
print("Phonemized:", phn_file)
|
2112 |
+
except Exception as e:
|
2113 |
+
message = f"Failed to phonemize: {phn_file}: {normalized}"
|
2114 |
+
messages.append(message)
|
2115 |
+
print(message)
|
2116 |
+
|
2117 |
|
2118 |
training_joined = "\n".join(lines['training'])
|
2119 |
validation_joined = "\n".join(lines['validation'])
|
|
|
2438 |
def relative_paths( dirs ):
|
2439 |
return [ './' + os.path.relpath( d ).replace("\\", "/") for d in dirs ]
|
2440 |
|
2441 |
+
def get_voice( name, dir=get_voice_dir(), load_latents=True ):
|
2442 |
+
subj = f'{dir}/{name}/'
|
2443 |
+
if not os.path.isdir(subj):
|
2444 |
+
return
|
2445 |
+
|
2446 |
+
voice = list(glob(f'{subj}/*.wav')) + list(glob(f'{subj}/*.mp3')) + list(glob(f'{subj}/*.flac'))
|
2447 |
+
if load_latents:
|
2448 |
+
voice = voice + list(glob(f'{subj}/*.pth'))
|
2449 |
+
return sorted( voice )
|
2450 |
+
|
2451 |
def get_voice_list(dir=get_voice_dir(), append_defaults=False):
|
2452 |
defaults = [ "random", "microphone" ]
|
2453 |
os.makedirs(dir, exist_ok=True)
|
2454 |
+
#res = sorted([d for d in os.listdir(dir) if d not in defaults and os.path.isdir(os.path.join(dir, d)) and len(os.listdir(os.path.join(dir, d))) > 0 ])
|
2455 |
+
|
2456 |
+
res = []
|
2457 |
+
for name in os.listdir(dir):
|
2458 |
+
if name in defaults:
|
2459 |
+
continue
|
2460 |
+
if not os.path.isdir(f'{dir}/{name}'):
|
2461 |
+
continue
|
2462 |
+
if len(os.listdir(os.path.join(dir, name))) == 0:
|
2463 |
+
continue
|
2464 |
+
files = get_voice( name, dir=dir )
|
2465 |
+
|
2466 |
+
if len(files) > 0:
|
2467 |
+
res.append(name)
|
2468 |
+
else:
|
2469 |
+
for subdir in os.listdir(f'{dir}/{name}'):
|
2470 |
+
if not os.path.isdir(f'{dir}/{name}/{subdir}'):
|
2471 |
+
continue
|
2472 |
+
files = get_voice( f'{name}/{subdir}', dir=dir )
|
2473 |
+
if len(files) == 0:
|
2474 |
+
continue
|
2475 |
+
res.append(f'{name}/{subdir}')
|
2476 |
+
|
2477 |
+
res = sorted(res)
|
2478 |
+
|
2479 |
if append_defaults:
|
2480 |
res = res + defaults
|
2481 |
+
|
2482 |
return res
|
2483 |
|
2484 |
def get_valle_models(dir="./training/"):
|
src/webui.py
CHANGED
@@ -201,7 +201,7 @@ def diarize_dataset( voice, progress=gr.Progress(track_tqdm=False) ):
|
|
201 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
202 |
|
203 |
messages = []
|
204 |
-
files =
|
205 |
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
|
206 |
diarization = pipeline(file)
|
207 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
@@ -217,15 +217,12 @@ def prepare_all_datasets( language, validation_text_length, validation_audio_len
|
|
217 |
messages = []
|
218 |
voices = get_voice_list()
|
219 |
|
220 |
-
"""
|
221 |
-
for voice in voices:
|
222 |
-
message = prepare_dataset_proxy(voice, **kwargs)
|
223 |
-
messages.append(message)
|
224 |
"""
|
225 |
for voice in voices:
|
226 |
print("Processing:", voice)
|
227 |
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
228 |
messages.append(message)
|
|
|
229 |
|
230 |
if slice_audio:
|
231 |
for voice in voices:
|
|
|
201 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=args.hf_token)
|
202 |
|
203 |
messages = []
|
204 |
+
files = get_voice(voice, load_latents=False)
|
205 |
for file in enumerate_progress(files, desc="Iterating through voice files", progress=progress):
|
206 |
diarization = pipeline(file)
|
207 |
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
|
|
217 |
messages = []
|
218 |
voices = get_voice_list()
|
219 |
|
|
|
|
|
|
|
|
|
220 |
"""
|
221 |
for voice in voices:
|
222 |
print("Processing:", voice)
|
223 |
message = transcribe_dataset( voice=voice, language=language, skip_existings=skip_existings, progress=progress )
|
224 |
messages.append(message)
|
225 |
+
"""
|
226 |
|
227 |
if slice_audio:
|
228 |
for voice in voices:
|