mrfakename commited on
Commit
af4e5fa
1 Parent(s): c6eda07

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

Files changed (1) hide show
  1. src/f5_tts/train/finetune_gradio.py +34 -13
src/f5_tts/train/finetune_gradio.py CHANGED
@@ -737,19 +737,36 @@ def format_seconds_to_hms(seconds):
737
  return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
738
 
739
 
740
- def get_correct_audio_path(audio_input, base_path="wavs"):
741
- # Case 1: If it's a full path, use it directly
742
- if os.path.isabs(audio_input):
743
- file_audio = audio_input
 
 
744
 
745
- # Case 2: If it has .wav but is not a full path
746
- elif audio_input.endswith(".wav") and not os.path.isabs(audio_input):
747
- file_audio = os.path.join(base_path, audio_input)
748
 
749
- # Case 3: If only the name (no .wav and not a full path)
750
- elif not audio_input.endswith(".wav") and not os.path.isabs(audio_input):
751
- file_audio = os.path.join(base_path, audio_input + ".wav")
752
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
753
  return file_audio
754
 
755
 
@@ -795,8 +812,8 @@ def create_metadata(name_project, ch_tokenizer, progress=gr.Progress()):
795
  print(f"Error processing {file_audio}: {e}")
796
  continue
797
 
798
- if duration < 1 and duration > 25:
799
- error_files.append([file_audio, "duration < 1 and > 25 "])
800
  continue
801
  if len(text) < 4:
802
  error_files.append([file_audio, "very small text len 3"])
@@ -1463,7 +1480,11 @@ Skip this step if you have your dataset, raw.arrow , duraction.json and vocab.tx
1463
 
1464
  gr.Markdown(
1465
  """```plaintext
1466
- place all your wavs folder and your metadata.csv file in {your name project}
 
 
 
 
1467
  my_speak/
1468
 
1469
  ├── wavs/
 
737
  return "{:02d}:{:02d}:{:02d}".format(hours, minutes, int(seconds))
738
 
739
 
740
+ def get_correct_audio_path(
741
+ audio_input,
742
+ base_path="wavs",
743
+ supported_formats=("wav", "mp3", "aac", "flac", "m4a", "alac", "ogg", "aiff", "wma", "amr"),
744
+ ):
745
+ file_audio = None
746
 
747
+ # Helper function to check if file has a supported extension
748
+ def has_supported_extension(file_name):
749
+ return any(file_name.endswith(f".{ext}") for ext in supported_formats)
750
 
751
+ # Case 1: If it's a full path with a valid extension, use it directly
752
+ if os.path.isabs(audio_input) and has_supported_extension(audio_input):
753
+ file_audio = audio_input
754
 
755
+ # Case 2: If it has a supported extension but is not a full path
756
+ elif has_supported_extension(audio_input) and not os.path.isabs(audio_input):
757
+ file_audio = os.path.join(base_path, audio_input)
758
+ print("2")
759
+
760
+ # Case 3: If only the name is given (no extension and not a full path)
761
+ elif not has_supported_extension(audio_input) and not os.path.isabs(audio_input):
762
+ print("3")
763
+ for ext in supported_formats:
764
+ potential_file = os.path.join(base_path, f"{audio_input}.{ext}")
765
+ if os.path.exists(potential_file):
766
+ file_audio = potential_file
767
+ break
768
+ else:
769
+ file_audio = os.path.join(base_path, f"{audio_input}.{supported_formats[0]}")
770
  return file_audio
771
 
772
 
 
812
  print(f"Error processing {file_audio}: {e}")
813
  continue
814
 
815
+ if duration < 1 or duration > 25:
816
+ error_files.append([file_audio, "duration < 1 or > 25 "])
817
  continue
818
  if len(text) < 4:
819
  error_files.append([file_audio, "very small text len 3"])
 
1480
 
1481
  gr.Markdown(
1482
  """```plaintext
1483
+ place all your wavs folder and your metadata.csv file in {your name project}
1484
+
1485
+ suport format for audio "wav", "mp3", "aac", "flac", "m4a", "alac", "ogg", "aiff", "wma", "amr"
1486
+
1487
+ example wav format
1488
  my_speak/
1489
 
1490
  ├── wavs/