voidful commited on
Commit
0bd9c7e
·
verified ·
1 Parent(s): 540d3c3

Training in progress, step 200

Browse files
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1061349853637e78beb25546e7229b776288b8bc0b1e9ceedb20bfff093efd7
3
  size 4988522632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574b6173d421e48618cab813ba4c01980928c611f41fd03eba8de73360a0aa8a
3
  size 4988522632
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ecf70e6c84ac3ab87aa49dc18fda2bc0d34397121771d9a136472128766a909
3
  size 1118042580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd6d69ff075939395f9774225b8131baefa33d9b34ec513bd136f2517e28f69
3
  size 1118042580
train_conv_slurm_full.py CHANGED
@@ -17,16 +17,17 @@ import numpy as np
17
  # ========================
18
  WANDB_PROJECT_NAME = "mmlm-conv-full"
19
  WANDB_API_KEY = "0793be66347fa388f401f66cb39fd661452d660d"
 
20
  DATASET = load_dataset("voidful/all_conv_data_filtered_small")['train']
21
  # DATASET = datasets.load_from_disk("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/hf_dialogue_chinese_llama31_70B_user_long_2_with_silence")
22
  LM_MODEL_NAME = "voidful/Llama-3.2-8B-Whisper"
23
  OUTPUT_DIR = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full"
24
  MODEL_SAVE_PATH = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-model-full"
25
  TRAIN_TEST_SPLIT_RATIO = 0.1
26
- EPOCHS = 300
27
  BATCH_SIZE = 1
28
- LEARNING_RATE = 8e-4
29
- GRADIENT_ACCUMULATION_STEPS = 2
30
  USE_BF16 = True
31
  USE_FP16 = False
32
  LOGGING_STEPS = 1
@@ -53,6 +54,7 @@ def initialize_wandb():
53
  group="mmlm",
54
  )
55
 
 
56
  class CustomDataset(Dataset):
57
  """Custom dataset class for handling audio-text data."""
58
 
@@ -67,6 +69,7 @@ class CustomDataset(Dataset):
67
  entry = self.data
68
  # print(len(entry[idx]["user_audio_path"]['array']),entry[idx]["user_audio_path"]['array'])
69
  audio_path = torch.tensor(entry[idx]["user_audio_path"]['array'])
 
70
  # if not os.path.exists(audio_path):
71
  # audio_path = os.path.join("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/", audio_path)
72
  audio_tensor = load_audio_to_tensor(audio_path)[0]
@@ -76,8 +79,8 @@ class CustomDataset(Dataset):
76
  user_text_with_pad = text_with_pad[0]
77
  user_text_with_pad = "[PAD]" + user_text_with_pad
78
  audio_tensor = torch.cat([audio_tensor[0], torch.zeros(int(24000 * 0.08 * 1))], dim=0).unsqueeze(dim=0)
79
- # machine_text_with_pad = text_with_pad[1]
80
- machine_text_with_pad = text_with_pad[1][5:] + "[PAD]"
81
  audio_unit = np.array(entry[idx]["machine_unit"])
82
 
83
  zero_sequences = [] # To store start and end times
@@ -127,7 +130,6 @@ class CustomDataset(Dataset):
127
  }
128
 
129
 
130
-
131
  class CustomDataCollator:
132
  """Custom data collator for batching audio and text inputs."""
133
 
@@ -174,8 +176,8 @@ def main():
174
  # Split dataset
175
  # data = data.train_test_split(test_size=0.5, seed=42)
176
  data = data.shuffle(seed=42)
177
- subset_size = 100
178
- data = data.select(range(subset_size))
179
  train_dataset = CustomDataset(data, tokenizer)
180
  # eval_dataset = CustomDataset(data['test'], tokenizer)
181
  # train_dataset = CustomDataset(data.select([0, 1, 2, 3, 4]), tokenizer)
@@ -226,8 +228,8 @@ def main():
226
  )
227
 
228
  # Train and evaluate model
229
- # resume_from_checkpoint = '/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-fixed-10k/checkpoint-2000/'
230
- trainer.train()
231
 
232
  # Save model
233
  trainer.save_model(MODEL_SAVE_PATH)
 
17
  # ========================
18
  WANDB_PROJECT_NAME = "mmlm-conv-full"
19
  WANDB_API_KEY = "0793be66347fa388f401f66cb39fd661452d660d"
20
+ # DATASET = load_dataset("voidful/all_conv_data")['train']
21
  DATASET = load_dataset("voidful/all_conv_data_filtered_small")['train']
22
  # DATASET = datasets.load_from_disk("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/hf_dialogue_chinese_llama31_70B_user_long_2_with_silence")
23
  LM_MODEL_NAME = "voidful/Llama-3.2-8B-Whisper"
24
  OUTPUT_DIR = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full"
25
  MODEL_SAVE_PATH = "/mnt/home/ntuspeechlabtaipei1/mmlm-conv-model-full"
26
  TRAIN_TEST_SPLIT_RATIO = 0.1
27
+ EPOCHS = 5000
28
  BATCH_SIZE = 1
29
+ LEARNING_RATE = 1e-6
30
+ GRADIENT_ACCUMULATION_STEPS = 20
31
  USE_BF16 = True
32
  USE_FP16 = False
33
  LOGGING_STEPS = 1
 
54
  group="mmlm",
55
  )
56
 
57
+
58
  class CustomDataset(Dataset):
59
  """Custom dataset class for handling audio-text data."""
60
 
 
69
  entry = self.data
70
  # print(len(entry[idx]["user_audio_path"]['array']),entry[idx]["user_audio_path"]['array'])
71
  audio_path = torch.tensor(entry[idx]["user_audio_path"]['array'])
72
+ # audio_path = entry[idx]["user_audio_path"]
73
  # if not os.path.exists(audio_path):
74
  # audio_path = os.path.join("/mnt/home/ntuspeechlabtaipei1/anthony/Soundon-TTS-preprocessing/", audio_path)
75
  audio_tensor = load_audio_to_tensor(audio_path)[0]
 
79
  user_text_with_pad = text_with_pad[0]
80
  user_text_with_pad = "[PAD]" + user_text_with_pad
81
  audio_tensor = torch.cat([audio_tensor[0], torch.zeros(int(24000 * 0.08 * 1))], dim=0).unsqueeze(dim=0)
82
+ machine_text_with_pad = text_with_pad[1]
83
+ # machine_text_with_pad = text_with_pad[1][5:] + "[PAD]"
84
  audio_unit = np.array(entry[idx]["machine_unit"])
85
 
86
  zero_sequences = [] # To store start and end times
 
130
  }
131
 
132
 
 
133
  class CustomDataCollator:
134
  """Custom data collator for batching audio and text inputs."""
135
 
 
176
  # Split dataset
177
  # data = data.train_test_split(test_size=0.5, seed=42)
178
  data = data.shuffle(seed=42)
179
+ # subset_size = 4000
180
+ # data = data.select(range(subset_size))
181
  train_dataset = CustomDataset(data, tokenizer)
182
  # eval_dataset = CustomDataset(data['test'], tokenizer)
183
  # train_dataset = CustomDataset(data.select([0, 1, 2, 3, 4]), tokenizer)
 
228
  )
229
 
230
  # Train and evaluate model
231
+ # resume_from_checkpoint = ‘/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full/checkpoint-75200/’
232
+ trainer.train(resume_from_checkpoint='/mnt/home/ntuspeechlabtaipei1/mmlm-conv-training-full/checkpoint-75200/')
233
 
234
  # Save model
235
  trainer.save_model(MODEL_SAVE_PATH)
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043756452dd25ffceba12b66887f0f266cae4544bb031d32637bfbae7b033734
3
  size 7672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c244fb7f19cf364a14a14281a7e64bd1dad296cfa501f160bb8430cbc9730013
3
  size 7672