Spaces:
Runtime error
Runtime error
Update finetune_xtts_hf.py
Browse files- finetune_xtts_hf.py +11 -5
finetune_xtts_hf.py
CHANGED
|
@@ -19,15 +19,21 @@ print("=== Descargando dataset sob111/voxpopuli_es_500 ===")
|
|
| 19 |
ds = load_dataset("sob111/voxpopuli_es_500", split="train", token=HF_TOKEN)
|
| 20 |
|
| 21 |
# Guardar metadata.json en el formato esperado por Coqui TTS
|
| 22 |
-
os.makedirs("/tmp/voxpopuli_es_500", exist_ok=True)
|
| 23 |
meta_file = "/tmp/voxpopuli_es_500/metadata.json"
|
| 24 |
|
| 25 |
with open(meta_file, "w", encoding="utf-8") as f:
|
| 26 |
-
for sample in ds:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
entry = {
|
| 28 |
-
"audio_file":
|
| 29 |
-
"text": sample
|
| 30 |
-
"speaker_name": sample.get("
|
| 31 |
}
|
| 32 |
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 33 |
|
|
|
|
| 19 |
ds = load_dataset("sob111/voxpopuli_es_500", split="train", token=HF_TOKEN)
|
| 20 |
|
| 21 |
# Guardar metadata.json en el formato esperado por Coqui TTS
|
| 22 |
+
os.makedirs("/tmp/voxpopuli_es_500/wav_data", exist_ok=True)
|
| 23 |
meta_file = "/tmp/voxpopuli_es_500/metadata.json"
|
| 24 |
|
| 25 |
with open(meta_file, "w", encoding="utf-8") as f:
|
| 26 |
+
for i, sample in enumerate(ds):
|
| 27 |
+
# Guardar cada audio en wav_data
|
| 28 |
+
audio_path = f"/tmp/voxpopuli_es_500/wav_data/sample_{i}.wav"
|
| 29 |
+
array = sample["audio"]["array"]
|
| 30 |
+
import soundfile as sf
|
| 31 |
+
sf.write(audio_path, array, sample["audio"]["sampling_rate"])
|
| 32 |
+
|
| 33 |
entry = {
|
| 34 |
+
"audio_file": audio_path,
|
| 35 |
+
"text": sample.get("text") or sample.get("sentence", ""),
|
| 36 |
+
"speaker_name": str(sample.get("speaker_id", "speaker"))
|
| 37 |
}
|
| 38 |
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 39 |
|