ulysses115 commited on
Commit
c706110
1 Parent(s): 59425a6

Upload preprocess.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. preprocess.py +25 -0
preprocess.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import text
3
+ from utils import load_filepaths_and_text
4
+
5
+ if __name__ == '__main__':
6
+ parser = argparse.ArgumentParser()
7
+ parser.add_argument("--out_extension", default="cleaned")
8
+ parser.add_argument("--text_index", default=1, type=int)
9
+ parser.add_argument("--filelists", nargs="+", default=["filelists/ljs_audio_text_val_filelist.txt", "filelists/ljs_audio_text_test_filelist.txt"])
10
+ parser.add_argument("--text_cleaners", nargs="+", default=["english_cleaners2"])
11
+
12
+ args = parser.parse_args()
13
+
14
+
15
+ for filelist in args.filelists:
16
+ print("START:", filelist)
17
+ filepaths_and_text = load_filepaths_and_text(filelist)
18
+ for i in range(len(filepaths_and_text)):
19
+ original_text = filepaths_and_text[i][args.text_index]
20
+ cleaned_text = text._clean_text(original_text, args.text_cleaners)
21
+ filepaths_and_text[i][args.text_index] = cleaned_text
22
+
23
+ new_filelist = filelist + "." + args.out_extension
24
+ with open(new_filelist, "w", encoding="utf-8") as f:
25
+ f.writelines(["|".join(x) + "\n" for x in filepaths_and_text])