supawichwac commited on
Commit
67d478d
1 Parent(s): 2ea5fbf

Saving train state of step 5000

Browse files
checkpoint-5000-epoch-0/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c8afe47501f6ac5b2f112cbfc5b57bce18e4930d09140d818a8e1397b02110
3
+ size 3025686376
checkpoint-5000-epoch-0/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b395c8a7e2bda655c415580106288d0387c227efd641bf4e11c1cd735fdb37a
3
+ size 4361070048
checkpoint-5000-epoch-0/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa1457704c96775105d1787f67bb4a4cb2c899b948d39aae7605519a119a841d
3
+ size 955539578
checkpoint-5000-epoch-0/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182d0310c20462ddb30b52d3b75bc973bf951ee7a1044515e35ba0fa6bf75189
3
+ size 14344
checkpoint-5000-epoch-0/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e59aef00050f673889222b3d653de69a99bb1a8c64cad08748089305697e88a
3
+ size 1064
distil-whisper/events.out.tfevents.1715198685.server02.2050598.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c49381c60ae8ceff82f71af02dbf8e86599e6acefc6e343d54f3164ff96cfb4c
3
- size 1036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd480b8f76bebae6dd2b6412b25e2b1b996840cd1b32bf89eb83fd3199a3008
3
+ size 1680
distil-whisper/events.out.tfevents.1715202403.server02.2067342.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155cd2eec4daa073571f0a64cf8d9e267994346662108932f0971dba6abde89d
3
+ size 88
distil-whisper/events.out.tfevents.1715222264.server02.2131186.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a06e3a1863146ccc1d6e9d2f7348ca1e03111936f4b49886b06d25442480bd
3
+ size 62058
run_distillation.py CHANGED
@@ -1756,5 +1756,5 @@ def main():
1756
  if __name__ == "__main__":
1757
  main()
1758
  '''
1759
- accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5 --save_steps 5 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 5 --save_total_limit 1 --max_steps 15 --per_device_train_batch_size 4 --per_device_eval_batch_size 2 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 50
1760
  '''
 
1756
  if __name__ == "__main__":
1757
  main()
1758
  '''
1759
+ accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5000 --save_steps 5000 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 25 --save_total_limit 1 --max_steps 50000 --per_device_train_batch_size 4 --per_device_eval_batch_size 4 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 50
1760
  '''