sk0032 commited on
Commit
c28c4b6
1 Parent(s): 0499fef

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trainer_0_log.txt filter=lfs diff=lfs merge=lfs -text
best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d473e8956c52204d2c1dfc2b43299a6ed3f3064dba5ad39682cbbfc629e8d4
3
+ size 997871045
best_model_1009517.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d473e8956c52204d2c1dfc2b43299a6ed3f3064dba5ad39682cbbfc629e8d4
3
+ size 997871045
checkpoint_1111000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e42647d7d4f6a03d61e08961e753fa5366d693c49eb35a734edb5caae8a1ddd
3
+ size 997871109
checkpoint_1112000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae36115905bed58d939e8f79733ce590d2dce0b357e933a8c88782ea173debf5
3
+ size 997871109
checkpoint_1113000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b8b41c60301c234f7031bcdf80cae317859aaf6815ce7983d4248a98678c66
3
+ size 997871109
checkpoint_1113117.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c96fda4ad9778107ba5e80c5cf3306f5a050b588ad850cb9c9b31014de70bf1f
3
+ size 997871109
events.out.tfevents.1693935825.ip-172-16-76-92.ec2.internal.52882.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e085546e402a09aa9a8ae7d884d1cd03586e3a88a316c36ab8aa199a263c12c
3
+ size 10044566854
train_vits.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from trainer import Trainer, TrainerArgs
4
+
5
+ from TTS.tts.configs.shared_configs import BaseDatasetConfig
6
+ from TTS.tts.configs.vits_config import VitsConfig
7
+ from TTS.tts.datasets import load_tts_samples
8
+ from TTS.tts.models.vits import Vits, VitsAudioConfig
9
+ from TTS.tts.utils.text.tokenizer import TTSTokenizer
10
+ from TTS.utils.audio import AudioProcessor
11
+
12
+ #output_path = os.path.dirname(os.path.abspath(__file__))
13
+ ##########################################
14
+ #Change this to your dataset directory
15
+ ##########################################
16
+ output_path = os.path.dirname(os.path.abspath(__file__))
17
+ dataset_config = BaseDatasetConfig(
18
+ ##########################################
19
+ #Change this to your dataset directory
20
+ ##########################################
21
+ formatter="ljspeech", meta_file_train="metadata.csv", path="/home/ec2-user/SageMaker/tts-sage/recipes/ljspeech/vits_tts/adam"
22
+
23
+ )
24
+ audio_config = VitsAudioConfig(
25
+ sample_rate=48000, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None
26
+ )
27
+
28
+ config = VitsConfig(
29
+ audio=audio_config,
30
+ run_name="tts-adam-48k",
31
+ batch_size=7,
32
+ eval_batch_size=12,
33
+ batch_group_size=4,
34
+ # num_loader_workers=8,
35
+ num_loader_workers=4,
36
+ num_eval_loader_workers=4,
37
+ run_eval=True,
38
+ test_delay_epochs=-1,
39
+ epochs=100000,
40
+ save_step=1000,
41
+ save_checkpoints=True,
42
+ save_n_checkpoints=4,
43
+ save_best_after=1000,
44
+ #text_cleaner="english_cleaners",
45
+ text_cleaner="multilingual_cleaners",
46
+ use_phonemes=True,
47
+ phoneme_language="en-us",
48
+ phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
49
+ compute_input_seq_cache=True,
50
+ print_step=25,
51
+ print_eval=True,
52
+ mixed_precision=True,
53
+ output_path=output_path,
54
+ datasets=[dataset_config],
55
+ cudnn_benchmark=False,
56
+ )
57
+
58
+ # INITIALIZE THE AUDIO PROCESSOR
59
+ # Audio processor is used for feature extraction and audio I/O.
60
+ # It mainly serves to the dataloader and the training loggers.
61
+ ap = AudioProcessor.init_from_config(config)
62
+
63
+ # INITIALIZE THE TOKENIZER
64
+ # Tokenizer is used to convert text to sequences of token IDs.
65
+ # config is updated with the default characters if not defined in the config.
66
+ tokenizer, config = TTSTokenizer.init_from_config(config)
67
+
68
+ # LOAD DATA SAMPLES
69
+ # Each sample is a list of ```[text, audio_file_path, speaker_name]```
70
+ # You can define your custom sample loader returning the list of samples.
71
+ # Or define your custom formatter and pass it to the `load_tts_samples`.
72
+ # Check `TTS.tts.datasets.load_tts_samples` for more details.
73
+ train_samples, eval_samples = load_tts_samples(
74
+ dataset_config,
75
+ eval_split=True,
76
+ eval_split_max_size=config.eval_split_max_size,
77
+ eval_split_size=config.eval_split_size,
78
+ )
79
+
80
+ # init model
81
+ model = Vits(config, ap, tokenizer, speaker_manager=None)
82
+
83
+ # init the trainer and begin
84
+ trainer = Trainer(
85
+ TrainerArgs(),
86
+ config,
87
+ output_path,
88
+ model=model,
89
+ train_samples=train_samples,
90
+ eval_samples=eval_samples,
91
+ )
92
+ trainer.fit()
trainer_0_log.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865cd234a1230128eeb78c0d507eba4019732b0bbdc5360225011581c43dd60f
3
+ size 18198855
trainer_1_log.txt ADDED
File without changes
trainer_2_log.txt ADDED
File without changes
trainer_3_log.txt ADDED
File without changes
trainer_4_log.txt ADDED
File without changes
trainer_5_log.txt ADDED
File without changes
trainer_6_log.txt ADDED
File without changes
trainer_7_log.txt ADDED
File without changes