supawichwac commited on
Commit
5932791
1 Parent(s): 9613253

Saving train state of step 5

Browse files
distil-whisper/events.out.tfevents.1715183755.server02.1990428.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89067edc26fa465bd32a850c8f10ead9195d7e28ea74ffab40e7e4c485c2e403
3
- size 88
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0354de01094b8592de53be68e8334fc420b4c287e9e9001240278094f8d39757
3
+ size 428
distil-whisper/events.out.tfevents.1715185948.server02.2003546.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d1df0578ebc524aef4c901dc2bc8d268d478f60a8425cb059994bd0aa32c8c
3
+ size 88
distil-whisper/events.out.tfevents.1715198685.server02.2050598.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c1c723bbc492f1204ace70eb4d257d87b54510f03dd9c27d25658eb3400728
3
+ size 392
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc0c1ad5d126bcd257bd671c07641b701a001e808c0e63178fe6a801f1587bbe
3
+ size 3025686376
run_distillation.py CHANGED
@@ -1278,16 +1278,17 @@ def main():
1278
  # 11. Define Evaluation Metrics
1279
  def compute_metrics(preds, labels):
1280
  # replace padded labels by the padding token
1281
- print(f" preds : {preds}")
1282
- print(f" labels : {labels}")
1283
  for idx in range(len(labels)):
1284
  labels[idx][labels[idx] == -100] = tokenizer.pad_token_id
1285
 
1286
  pred_str = tokenizer.batch_decode(preds, skip_special_tokens=True, decode_with_timestamps=return_timestamps)
 
1287
  # we do not want to group tokens when computing the metrics
 
1288
  label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
1289
  wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
1290
-
1291
  # normalize everything and re-compute the WER
1292
  norm_pred_str = [normalizer(pred) for pred in pred_str]
1293
  norm_label_str = [normalizer(label) for label in label_str]
@@ -1755,5 +1756,5 @@ def main():
1755
  if __name__ == "__main__":
1756
  main()
1757
  '''
1758
- accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5 --save_steps 50 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 25 --save_total_limit 1 --max_steps 5 --per_device_train_batch_size 4 --per_device_eval_batch_size 2 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 5
1759
  '''
 
1278
  # 11. Define Evaluation Metrics
1279
  def compute_metrics(preds, labels):
1280
  # replace padded labels by the padding token
1281
+
 
1282
  for idx in range(len(labels)):
1283
  labels[idx][labels[idx] == -100] = tokenizer.pad_token_id
1284
 
1285
  pred_str = tokenizer.batch_decode(preds, skip_special_tokens=True, decode_with_timestamps=return_timestamps)
1286
+ print(f" pred_str : {pred_str}")
1287
  # we do not want to group tokens when computing the metrics
1288
+
1289
  label_str = tokenizer.batch_decode(labels, skip_special_tokens=True)
1290
  wer_ortho = 100 * metric.compute(predictions=pred_str, references=label_str)
1291
+ print(f" label_str : {label_str}")
1292
  # normalize everything and re-compute the WER
1293
  norm_pred_str = [normalizer(pred) for pred in pred_str]
1294
  norm_label_str = [normalizer(label) for label in label_str]
 
1756
  if __name__ == "__main__":
1757
  main()
1758
  '''
1759
+ accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5 --save_steps 5 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 5 --save_total_limit 1 --max_steps 15 --per_device_train_batch_size 4 --per_device_eval_batch_size 2 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 50
1760
  '''