Training in progress, step 200
Browse files- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- train_conv_slurm_full.py +12 -10
- training_args.bin +1 -1
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4988522632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:574b6173d421e48618cab813ba4c01980928c611f41fd03eba8de73360a0aa8a
|
3 |
size 4988522632
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1118042580
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdd6d69ff075939395f9774225b8131baefa33d9b34ec513bd136f2517e28f69
|
3 |
size 1118042580
|
train_conv_slurm_full.py
CHANGED
@@ -17,16 +17,17 @@ import numpy as np
|
|
17 |
# ========================
|
18 |
WANDB_PROJECT_NAME = "mmlm-conv-full"
|
19 |
WANDB_API_KEY = "0793be66347fa388f401f66cb39fd661452d660d"
|
|
|
20 |
DATASET = load_dataset("voidful/all_conv_data_filtered_small")['train']
|
21 |
# DATASET = datasets.load_from_disk("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/hf_dialogue_chinese_llama31_70B_user_long_2_with_silence")
|
22 |
LM_MODEL_NAME = "voidful/Llama-3.2-8B-Whisper"
|
23 |
OUTPUT_DIR = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full"
|
24 |
MODEL_SAVE_PATH = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-model-full"
|
25 |
TRAIN_TEST_SPLIT_RATIO = 0.1
|
26 |
-
EPOCHS =
|
27 |
BATCH_SIZE = 1
|
28 |
-
LEARNING_RATE =
|
29 |
-
GRADIENT_ACCUMULATION_STEPS =
|
30 |
USE_BF16 = True
|
31 |
USE_FP16 = False
|
32 |
LOGGING_STEPS = 1
|
@@ -53,6 +54,7 @@ def initialize_wandb():
|
|
53 |
group="mmlm",
|
54 |
)
|
55 |
|
|
|
56 |
class CustomDataset(Dataset):
|
57 |
"""Custom dataset class for handling audio-text data."""
|
58 |
|
@@ -67,6 +69,7 @@ class CustomDataset(Dataset):
|
|
67 |
entry = self.data
|
68 |
# print(len(entry[idx]["user_audio_path"]['array']),entry[idx]["user_audio_path"]['array'])
|
69 |
audio_path = torch.tensor(entry[idx]["user_audio_path"]['array'])
|
|
|
70 |
# if not os.path.exists(audio_path):
|
71 |
# audio_path = os.path.join("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/", audio_path)
|
72 |
audio_tensor = load_audio_to_tensor(audio_path)[0]
|
@@ -76,8 +79,8 @@ class CustomDataset(Dataset):
|
|
76 |
user_text_with_pad = text_with_pad[0]
|
77 |
user_text_with_pad = "[PAD]" + user_text_with_pad
|
78 |
audio_tensor = torch.cat([audio_tensor[0], torch.zeros(int(24000 * 0.08 * 1))], dim=0).unsqueeze(dim=0)
|
79 |
-
|
80 |
-
machine_text_with_pad = text_with_pad[1][5:] + "[PAD]"
|
81 |
audio_unit = np.array(entry[idx]["machine_unit"])
|
82 |
|
83 |
zero_sequences = [] # To store start and end times
|
@@ -127,7 +130,6 @@ class CustomDataset(Dataset):
|
|
127 |
}
|
128 |
|
129 |
|
130 |
-
|
131 |
class CustomDataCollator:
|
132 |
"""Custom data collator for batching audio and text inputs."""
|
133 |
|
@@ -174,8 +176,8 @@ def main():
|
|
174 |
# Split dataset
|
175 |
# data = data.train_test_split(test_size=0.5, seed=42)
|
176 |
data = data.shuffle(seed=42)
|
177 |
-
subset_size =
|
178 |
-
data = data.select(range(subset_size))
|
179 |
train_dataset = CustomDataset(data, tokenizer)
|
180 |
# eval_dataset = CustomDataset(data['test'], tokenizer)
|
181 |
# train_dataset = CustomDataset(data.select([0, 1, 2, 3, 4]), tokenizer)
|
@@ -226,8 +228,8 @@ def main():
|
|
226 |
)
|
227 |
|
228 |
# Train and evaluate model
|
229 |
-
# resume_from_checkpoint =
|
230 |
-
trainer.train()
|
231 |
|
232 |
# Save model
|
233 |
trainer.save_model(MODEL_SAVE_PATH)
|
|
|
17 |
# ========================
|
18 |
WANDB_PROJECT_NAME = "mmlm-conv-full"
|
19 |
WANDB_API_KEY = "0793be66347fa388f401f66cb39fd661452d660d"
|
20 |
+
# DATASET = load_dataset("voidful/all_conv_data")['train']
|
21 |
DATASET = load_dataset("voidful/all_conv_data_filtered_small")['train']
|
22 |
# DATASET = datasets.load_from_disk("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/hf_dialogue_chinese_llama31_70B_user_long_2_with_silence")
|
23 |
LM_MODEL_NAME = "voidful/Llama-3.2-8B-Whisper"
|
24 |
OUTPUT_DIR = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full"
|
25 |
MODEL_SAVE_PATH = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-model-full"
|
26 |
TRAIN_TEST_SPLIT_RATIO = 0.1
|
27 |
+
EPOCHS = 5000
|
28 |
BATCH_SIZE = 1
|
29 |
+
LEARNING_RATE = 1e-6
|
30 |
+
GRADIENT_ACCUMULATION_STEPS = 20
|
31 |
USE_BF16 = True
|
32 |
USE_FP16 = False
|
33 |
LOGGING_STEPS = 1
|
|
|
54 |
group="mmlm",
|
55 |
)
|
56 |
|
57 |
+
|
58 |
class CustomDataset(Dataset):
|
59 |
"""Custom dataset class for handling audio-text data."""
|
60 |
|
|
|
69 |
entry = self.data
|
70 |
# print(len(entry[idx]["user_audio_path"]['array']),entry[idx]["user_audio_path"]['array'])
|
71 |
audio_path = torch.tensor(entry[idx]["user_audio_path"]['array'])
|
72 |
+
# audio_path = entry[idx]["user_audio_path"]
|
73 |
# if not os.path.exists(audio_path):
|
74 |
# audio_path = os.path.join("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/", audio_path)
|
75 |
audio_tensor = load_audio_to_tensor(audio_path)[0]
|
|
|
79 |
user_text_with_pad = text_with_pad[0]
|
80 |
user_text_with_pad = "[PAD]" + user_text_with_pad
|
81 |
audio_tensor = torch.cat([audio_tensor[0], torch.zeros(int(24000 * 0.08 * 1))], dim=0).unsqueeze(dim=0)
|
82 |
+
machine_text_with_pad = text_with_pad[1]
|
83 |
+
# machine_text_with_pad = text_with_pad[1][5:] + "[PAD]"
|
84 |
audio_unit = np.array(entry[idx]["machine_unit"])
|
85 |
|
86 |
zero_sequences = [] # To store start and end times
|
|
|
130 |
}
|
131 |
|
132 |
|
|
|
133 |
class CustomDataCollator:
|
134 |
"""Custom data collator for batching audio and text inputs."""
|
135 |
|
|
|
176 |
# Split dataset
|
177 |
# data = data.train_test_split(test_size=0.5, seed=42)
|
178 |
data = data.shuffle(seed=42)
|
179 |
+
# subset_size = 4000
|
180 |
+
# data = data.select(range(subset_size))
|
181 |
train_dataset = CustomDataset(data, tokenizer)
|
182 |
# eval_dataset = CustomDataset(data['test'], tokenizer)
|
183 |
# train_dataset = CustomDataset(data.select([0, 1, 2, 3, 4]), tokenizer)
|
|
|
228 |
)
|
229 |
|
230 |
# Train and evaluate model
|
231 |
+
# resume_from_checkpoint = ‘/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full/checkpoint-75200/’
|
232 |
+
trainer.train(resume_from_checkpoint='/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full/checkpoint-75200/')
|
233 |
|
234 |
# Save model
|
235 |
trainer.save_model(MODEL_SAVE_PATH)
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c244fb7f19cf364a14a14281a7e64bd1dad296cfa501f160bb8430cbc9730013
|
3 |
size 7672
|