RASMUS commited on
Commit
e1874c6
1 Parent(s): 88bb61c

Training in progress, step 27000

Browse files
.gitattributes CHANGED
@@ -39,3 +39,5 @@ community-events/huggan/assets/pix2pix_maps.png filter=lfs diff=lfs merge=lfs -t
39
  community-events/huggan/assets/wandb.png filter=lfs diff=lfs merge=lfs -text
40
  wandb/run-20231118_132020-qg88991p/run-qg88991p.wandb filter=lfs diff=lfs merge=lfs -text
41
  wandb/run-20231119_105908-nwx0xdmm/run-nwx0xdmm.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
39
  community-events/huggan/assets/wandb.png filter=lfs diff=lfs merge=lfs -text
40
  wandb/run-20231118_132020-qg88991p/run-qg88991p.wandb filter=lfs diff=lfs merge=lfs -text
41
  wandb/run-20231119_105908-nwx0xdmm/run-nwx0xdmm.wandb filter=lfs diff=lfs merge=lfs -text
42
+ wandb/run-20231119_105908-nwx0xdmm/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
43
+ wandb/run-20231122_100021-qrxr9o47/run-qrxr9o47.wandb filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -34,6 +34,7 @@
34
  "mask_time_length": 10,
35
  "mask_time_min_masks": 2,
36
  "mask_time_prob": 0.05,
 
37
  "max_source_positions": 1500,
38
  "max_target_positions": 448,
39
  "median_filter_width": 7,
 
34
  "mask_time_length": 10,
35
  "mask_time_min_masks": 2,
36
  "mask_time_prob": 0.05,
37
+ "max_length": 448,
38
  "max_source_positions": 1500,
39
  "max_target_positions": 448,
40
  "median_filter_width": 7,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74eacfca3ebe20a52f7d30c59c5d75c0590319ade9029e6353b301d09ac8dbfc
3
  size 3219908024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256dbf5bf7dd51701df4c930521269fc6ebc126a8cfdf26df730c1343e52f8fa
3
  size 3219908024
start_train.sh CHANGED
@@ -42,4 +42,4 @@ deepspeed run_speech_recognition_seq2seq_streaming.py \
42
  --use_auth_token \
43
  --push_to_hub \
44
  --push_to_hub_model_id="WhisperLargeFinnishV3" \
45
- --resume_from_checkpoint="./checkpoint-7000"
 
42
  --use_auth_token \
43
  --push_to_hub \
44
  --push_to_hub_model_id="WhisperLargeFinnishV3" \
45
+ --resume_from_checkpoint="./checkpoint-26000"
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca5d22c96e3a01c90c6d5a2f12fe2d7c1881cd34751d01b577ea8b353592f589
3
  size 6456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15474ac29434544ae625779797f25a48c1f3f4beceb060e4a903486009997382
3
  size 6456
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -1,27 +1,27 @@
1
- 2023-11-19 10:59:09,019 INFO MainThread:605851 [wandb_setup.py:_flush():76] Current SDK version is 0.16.0
2
- 2023-11-19 10:59:09,020 INFO MainThread:605851 [wandb_setup.py:_flush():76] Configure stats pid to 605851
3
- 2023-11-19 10:59:09,020 INFO MainThread:605851 [wandb_setup.py:_flush():76] Loading settings from /home/rasmus/.config/wandb/settings
4
- 2023-11-19 10:59:09,020 INFO MainThread:605851 [wandb_setup.py:_flush():76] Loading settings from /mnt/e/wandb/settings
5
- 2023-11-19 10:59:09,020 INFO MainThread:605851 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
- 2023-11-19 10:59:09,020 INFO MainThread:605851 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
- 2023-11-19 10:59:09,021 INFO MainThread:605851 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/mnt/e/run_speech_recognition_seq2seq_streaming.py', 'program': '/mnt/e/run_speech_recognition_seq2seq_streaming.py'}
8
- 2023-11-19 10:59:09,021 INFO MainThread:605851 [wandb_init.py:_log_setup():524] Logging user logs to /mnt/e/wandb/run-20231119_105908-nwx0xdmm/logs/debug.log
9
- 2023-11-19 10:59:09,022 INFO MainThread:605851 [wandb_init.py:_log_setup():525] Logging internal logs to /mnt/e/wandb/run-20231119_105908-nwx0xdmm/logs/debug-internal.log
10
- 2023-11-19 10:59:09,022 INFO MainThread:605851 [wandb_init.py:init():564] calling init triggers
11
- 2023-11-19 10:59:09,022 INFO MainThread:605851 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
12
  config: {}
13
- 2023-11-19 10:59:09,023 INFO MainThread:605851 [wandb_init.py:init():614] starting backend
14
- 2023-11-19 10:59:09,023 INFO MainThread:605851 [wandb_init.py:init():618] setting up manager
15
- 2023-11-19 10:59:09,029 INFO MainThread:605851 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
- 2023-11-19 10:59:09,033 INFO MainThread:605851 [wandb_init.py:init():624] backend started and connected
17
- 2023-11-19 10:59:09,053 INFO MainThread:605851 [wandb_init.py:init():716] updated telemetry
18
- 2023-11-19 10:59:09,056 INFO MainThread:605851 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
- 2023-11-19 10:59:09,506 INFO MainThread:605851 [wandb_run.py:_on_init():2254] communicating current version
20
- 2023-11-19 10:59:09,554 INFO MainThread:605851 [wandb_run.py:_on_init():2263] got version response
21
- 2023-11-19 10:59:09,554 INFO MainThread:605851 [wandb_init.py:init():800] starting run threads in backend
22
- 2023-11-19 10:59:14,492 INFO MainThread:605851 [wandb_run.py:_console_start():2233] atexit reg
23
- 2023-11-19 10:59:14,492 INFO MainThread:605851 [wandb_run.py:_redirect():2088] redirect: wrap_raw
24
- 2023-11-19 10:59:14,493 INFO MainThread:605851 [wandb_run.py:_redirect():2153] Wrapping output streams.
25
- 2023-11-19 10:59:14,493 INFO MainThread:605851 [wandb_run.py:_redirect():2178] Redirects installed.
26
- 2023-11-19 10:59:14,495 INFO MainThread:605851 [wandb_init.py:init():841] run started, returning control to user process
27
- 2023-11-19 10:59:14,497 INFO MainThread:605851 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.36.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 60000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Nov19_10-55-41_DESKTOP-59O9VN1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'ds_config.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-7000', 'hub_model_id': 'RASMUS/WhisperLargeFinnishV3', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'WhisperLargeFinnishV3', 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': True, 'torch_compile_backend': 'inductor', 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
 
1
+ 2023-11-22 10:00:21,657 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Current SDK version is 0.16.0
2
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Configure stats pid to 2217649
3
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from /home/rasmus/.config/wandb/settings
4
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from /mnt/e/wandb/settings
5
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/mnt/e/run_speech_recognition_seq2seq_streaming.py', 'program': '/mnt/e/run_speech_recognition_seq2seq_streaming.py'}
8
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:_log_setup():524] Logging user logs to /mnt/e/wandb/run-20231122_100021-qrxr9o47/logs/debug.log
9
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:_log_setup():525] Logging internal logs to /mnt/e/wandb/run-20231122_100021-qrxr9o47/logs/debug-internal.log
10
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:init():564] calling init triggers
11
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
12
  config: {}
13
+ 2023-11-22 10:00:21,661 INFO MainThread:2217649 [wandb_init.py:init():614] starting backend
14
+ 2023-11-22 10:00:21,661 INFO MainThread:2217649 [wandb_init.py:init():618] setting up manager
15
+ 2023-11-22 10:00:21,667 INFO MainThread:2217649 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2023-11-22 10:00:21,676 INFO MainThread:2217649 [wandb_init.py:init():624] backend started and connected
17
+ 2023-11-22 10:00:21,717 INFO MainThread:2217649 [wandb_init.py:init():716] updated telemetry
18
+ 2023-11-22 10:00:21,722 INFO MainThread:2217649 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
+ 2023-11-22 10:00:22,205 INFO MainThread:2217649 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2023-11-22 10:00:22,253 INFO MainThread:2217649 [wandb_run.py:_on_init():2263] got version response
21
+ 2023-11-22 10:00:22,253 INFO MainThread:2217649 [wandb_init.py:init():800] starting run threads in backend
22
+ 2023-11-22 10:00:27,927 INFO MainThread:2217649 [wandb_run.py:_console_start():2233] atexit reg
23
+ 2023-11-22 10:00:27,928 INFO MainThread:2217649 [wandb_run.py:_redirect():2088] redirect: wrap_raw
24
+ 2023-11-22 10:00:27,929 INFO MainThread:2217649 [wandb_run.py:_redirect():2153] Wrapping output streams.
25
+ 2023-11-22 10:00:27,929 INFO MainThread:2217649 [wandb_run.py:_redirect():2178] Redirects installed.
26
+ 2023-11-22 10:00:27,930 INFO MainThread:2217649 [wandb_init.py:init():841] run started, returning control to user process
27
+ 2023-11-22 10:00:27,933 INFO MainThread:2217649 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.36.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 60000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Nov22_09-55-44_DESKTOP-59O9VN1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'ds_config.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-26000', 'hub_model_id': 'RASMUS/WhisperLargeFinnishV3', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'WhisperLargeFinnishV3', 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': True, 'torch_compile_backend': 'inductor', 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
wandb/run-20231119_105908-nwx0xdmm/files/output.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20231119_105908-nwx0xdmm/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 0.0712, "train/learning_rate": 2.0324237288135594e-06, "train/epoch": 0.33, "train/global_step": 20060, "_timestamp": 1700533941.1437454, "_runtime": 149592.1096584797, "_step": 665, "eval/loss": 0.1776123046875, "eval/wer": 8.509234828496043, "eval/runtime": 604.4094, "eval/samples_per_second": 2.819, "eval/steps_per_second": 0.705}
 
1
+ {"train/loss": 0.0556, "train/learning_rate": 1.7094915254237288e-06, "train/epoch": 0.44, "train/global_step": 26420, "_timestamp": 1700600828.0143824, "_runtime": 216478.9802954197, "_step": 989, "eval/loss": 0.197509765625, "eval/wer": 8.301922352054278, "eval/runtime": 649.294, "eval/samples_per_second": 2.624, "eval/steps_per_second": 0.656, "_wandb": {"runtime": 216615}}
wandb/run-20231119_105908-nwx0xdmm/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20231119_105908-nwx0xdmm/logs/debug.log CHANGED
@@ -25,3 +25,4 @@ config: {}
25
  2023-11-19 10:59:14,493 INFO MainThread:605851 [wandb_run.py:_redirect():2178] Redirects installed.
26
  2023-11-19 10:59:14,495 INFO MainThread:605851 [wandb_init.py:init():841] run started, returning control to user process
27
  2023-11-19 10:59:14,497 INFO MainThread:605851 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.36.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 60000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Nov19_10-55-41_DESKTOP-59O9VN1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'ds_config.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-7000', 'hub_model_id': 'RASMUS/WhisperLargeFinnishV3', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'WhisperLargeFinnishV3', 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': True, 'torch_compile_backend': 'inductor', 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
 
 
25
  2023-11-19 10:59:14,493 INFO MainThread:605851 [wandb_run.py:_redirect():2178] Redirects installed.
26
  2023-11-19 10:59:14,495 INFO MainThread:605851 [wandb_init.py:init():841] run started, returning control to user process
27
  2023-11-19 10:59:14,497 INFO MainThread:605851 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.36.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 60000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Nov19_10-55-41_DESKTOP-59O9VN1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'ds_config.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-7000', 'hub_model_id': 'RASMUS/WhisperLargeFinnishV3', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'WhisperLargeFinnishV3', 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': True, 'torch_compile_backend': 'inductor', 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
28
+ 2023-11-21 23:09:29,646 WARNING MsgRouterThr:605851 [router.py:message_loop():77] message_loop has been closed
wandb/run-20231119_105908-nwx0xdmm/run-nwx0xdmm.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4815856c5203c68d373aea1f58544c4f5864fd2c3cc73cf29d1fde1df0640e7a
3
- size 9458453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a032e58caafe6cf610e02ca71bf9f8041e357ee650fa4ab7879c284a033836
3
+ size 14294891
wandb/run-20231122_100021-qrxr9o47/files/conda-environment.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: WhisperFinetuneEnv
2
+ channels:
3
+ - defaults
4
+ dependencies:
5
+ - _libgcc_mutex=0.1=main
6
+ - _openmp_mutex=5.1=1_gnu
7
+ - bzip2=1.0.8=h7b6447c_0
8
+ - ca-certificates=2023.08.22=h06a4308_0
9
+ - ld_impl_linux-64=2.38=h1181459_1
10
+ - libffi=3.4.4=h6a678d5_0
11
+ - libgcc-ng=11.2.0=h1234567_1
12
+ - libgomp=11.2.0=h1234567_1
13
+ - libstdcxx-ng=11.2.0=h1234567_1
14
+ - libuuid=1.41.5=h5eee18b_0
15
+ - ncurses=6.4=h6a678d5_0
16
+ - openssl=3.0.12=h7f8727e_0
17
+ - pip=23.3=py310h06a4308_0
18
+ - python=3.10.13=h955ad1f_0
19
+ - readline=8.2=h5eee18b_0
20
+ - setuptools=68.0.0=py310h06a4308_0
21
+ - sqlite=3.41.2=h5eee18b_0
22
+ - tk=8.6.12=h1ccaba5_0
23
+ - wheel=0.41.2=py310h06a4308_0
24
+ - xz=5.4.2=h5eee18b_0
25
+ - zlib=1.2.13=h5eee18b_0
26
+ - pip:
27
+ - absl-py==2.0.0
28
+ - accelerate==0.24.1
29
+ - aiohttp==3.9.0
30
+ - aiosignal==1.3.1
31
+ - annotated-types==0.6.0
32
+ - anyio==4.0.0
33
+ - appdirs==1.4.4
34
+ - argon2-cffi==23.1.0
35
+ - argon2-cffi-bindings==21.2.0
36
+ - arrow==1.3.0
37
+ - asttokens==2.4.1
38
+ - async-lru==2.0.4
39
+ - async-timeout==4.0.3
40
+ - attrs==23.1.0
41
+ - audioread==3.0.1
42
+ - babel==2.13.1
43
+ - beautifulsoup4==4.12.2
44
+ - bleach==6.1.0
45
+ - cachetools==5.3.2
46
+ - certifi==2023.11.17
47
+ - cffi==1.16.0
48
+ - charset-normalizer==3.3.2
49
+ - click==8.1.7
50
+ - comm==0.2.0
51
+ - datasets==2.15.1.dev0
52
+ - debugpy==1.8.0
53
+ - decorator==5.1.1
54
+ - deepspeed==0.12.3
55
+ - defusedxml==0.7.1
56
+ - dill==0.3.7
57
+ - docker-pycreds==0.4.0
58
+ - evaluate==0.4.1
59
+ - exceptiongroup==1.1.3
60
+ - executing==2.0.1
61
+ - fastjsonschema==2.19.0
62
+ - filelock==3.13.1
63
+ - fqdn==1.5.1
64
+ - frozenlist==1.4.0
65
+ - fsspec==2023.10.0
66
+ - gitdb==4.0.11
67
+ - gitpython==3.1.40
68
+ - google-auth==2.23.4
69
+ - google-auth-oauthlib==1.1.0
70
+ - grpcio==1.59.3
71
+ - hjson==3.1.0
72
+ - huggingface-hub==0.19.4
73
+ - idna==3.4
74
+ - iprogress==0.4
75
+ - ipykernel==6.26.0
76
+ - ipython==8.17.2
77
+ - ipywidgets==8.1.1
78
+ - isoduration==20.11.0
79
+ - jedi==0.19.1
80
+ - jinja2==3.1.2
81
+ - jiwer==3.0.3
82
+ - joblib==1.3.2
83
+ - json5==0.9.14
84
+ - jsonpointer==2.4
85
+ - jsonschema==4.20.0
86
+ - jsonschema-specifications==2023.11.1
87
+ - jupyter==1.0.0
88
+ - jupyter-client==8.6.0
89
+ - jupyter-console==6.6.3
90
+ - jupyter-core==5.5.0
91
+ - jupyter-events==0.9.0
92
+ - jupyter-lsp==2.2.0
93
+ - jupyter-server==2.10.1
94
+ - jupyter-server-terminals==0.4.4
95
+ - jupyterlab==4.0.8
96
+ - jupyterlab-pygments==0.2.2
97
+ - jupyterlab-server==2.25.1
98
+ - jupyterlab-widgets==3.0.9
99
+ - lazy-loader==0.3
100
+ - librosa==0.10.1
101
+ - llvmlite==0.41.1
102
+ - markdown==3.5.1
103
+ - markupsafe==2.1.3
104
+ - matplotlib-inline==0.1.6
105
+ - mistune==3.0.2
106
+ - more-itertools==10.1.0
107
+ - mpmath==1.3.0
108
+ - msgpack==1.0.7
109
+ - multidict==6.0.4
110
+ - multiprocess==0.70.15
111
+ - nbclient==0.9.0
112
+ - nbconvert==7.11.0
113
+ - nbformat==5.9.2
114
+ - nest-asyncio==1.5.8
115
+ - networkx==3.2.1
116
+ - ninja==1.11.1.1
117
+ - notebook==7.0.6
118
+ - notebook-shim==0.2.3
119
+ - numba==0.58.1
120
+ - numpy==1.26.2
121
+ - nvidia-cublas-cu12==12.1.3.1
122
+ - nvidia-cuda-cupti-cu12==12.1.105
123
+ - nvidia-cuda-nvrtc-cu12==12.1.105
124
+ - nvidia-cuda-runtime-cu12==12.1.105
125
+ - nvidia-cudnn-cu12==8.9.2.26
126
+ - nvidia-cufft-cu12==11.0.2.54
127
+ - nvidia-curand-cu12==10.3.2.106
128
+ - nvidia-cusolver-cu12==11.4.5.107
129
+ - nvidia-cusparse-cu12==12.1.0.106
130
+ - nvidia-nccl-cu12==2.18.1
131
+ - nvidia-nvjitlink-cu12==12.3.101
132
+ - nvidia-nvtx-cu12==12.1.105
133
+ - oauthlib==3.2.2
134
+ - overrides==7.4.0
135
+ - packaging==23.2
136
+ - pandas==2.1.3
137
+ - pandocfilters==1.5.0
138
+ - parso==0.8.3
139
+ - pexpect==4.8.0
140
+ - pillow==10.1.0
141
+ - platformdirs==4.0.0
142
+ - pooch==1.8.0
143
+ - prometheus-client==0.18.0
144
+ - prompt-toolkit==3.0.41
145
+ - protobuf==4.23.4
146
+ - psutil==5.9.6
147
+ - ptyprocess==0.7.0
148
+ - pure-eval==0.2.2
149
+ - py-cpuinfo==9.0.0
150
+ - pyarrow==14.0.1
151
+ - pyarrow-hotfix==0.5
152
+ - pyasn1==0.5.0
153
+ - pyasn1-modules==0.3.0
154
+ - pycparser==2.21
155
+ - pydantic==2.5.1
156
+ - pydantic-core==2.14.3
157
+ - pygments==2.16.1
158
+ - pynvml==11.5.0
159
+ - python-dateutil==2.8.2
160
+ - python-json-logger==2.0.7
161
+ - pytz==2023.3.post1
162
+ - pyyaml==6.0.1
163
+ - pyzmq==25.1.1
164
+ - qtconsole==5.5.1
165
+ - qtpy==2.4.1
166
+ - rapidfuzz==3.5.2
167
+ - referencing==0.31.0
168
+ - regex==2023.10.3
169
+ - requests==2.31.0
170
+ - requests-oauthlib==1.3.1
171
+ - responses==0.18.0
172
+ - rfc3339-validator==0.1.4
173
+ - rfc3986-validator==0.1.1
174
+ - rpds-py==0.13.0
175
+ - rsa==4.9
176
+ - safetensors==0.4.0
177
+ - scikit-learn==1.3.2
178
+ - scipy==1.11.3
179
+ - send2trash==1.8.2
180
+ - sentry-sdk==1.35.0
181
+ - setproctitle==1.3.3
182
+ - six==1.16.0
183
+ - smmap==5.0.1
184
+ - sniffio==1.3.0
185
+ - soundfile==0.12.1
186
+ - soupsieve==2.5
187
+ - soxr==0.3.7
188
+ - stack-data==0.6.3
189
+ - sympy==1.12
190
+ - tensorboard==2.15.1
191
+ - tensorboard-data-server==0.7.2
192
+ - terminado==0.18.0
193
+ - threadpoolctl==3.2.0
194
+ - tinycss2==1.2.1
195
+ - tokenizers==0.15.0
196
+ - tomli==2.0.1
197
+ - torch==2.1.1
198
+ - torchaudio==2.1.1
199
+ - torchvision==0.16.1
200
+ - tornado==6.3.3
201
+ - tqdm==4.66.1
202
+ - traitlets==5.13.0
203
+ - transformers==4.36.0.dev0
204
+ - triton==2.1.0
205
+ - types-python-dateutil==2.8.19.14
206
+ - typing-extensions==4.8.0
207
+ - tzdata==2023.3
208
+ - uri-template==1.3.0
209
+ - urllib3==2.1.0
210
+ - wandb==0.16.0
211
+ - wcwidth==0.2.10
212
+ - webcolors==1.13
213
+ - webencodings==0.5.1
214
+ - websocket-client==1.6.4
215
+ - werkzeug==3.0.1
216
+ - widgetsnbextension==4.0.9
217
+ - xxhash==3.4.1
218
+ - yarl==1.9.2
219
+ prefix: /home/rasmus/miniconda3/envs/WhisperFinetuneEnv
wandb/run-20231122_100021-qrxr9o47/files/config.yaml ADDED
@@ -0,0 +1,718 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.13
7
+ cli_version: 0.16.0
8
+ framework: huggingface
9
+ huggingface_version: 4.36.0.dev0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1700640021.677551
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ - 100
24
+ 2:
25
+ - 1
26
+ - 5
27
+ - 11
28
+ - 49
29
+ - 51
30
+ - 53
31
+ - 55
32
+ - 71
33
+ - 100
34
+ 3:
35
+ - 7
36
+ - 23
37
+ 4: 3.10.13
38
+ 5: 0.16.0
39
+ 6: 4.36.0.dev0
40
+ 8:
41
+ - 5
42
+ 9:
43
+ 1: transformers_trainer
44
+ 13: linux-x86_64
45
+ m:
46
+ - 1: train/global_step
47
+ 6:
48
+ - 3
49
+ - 1: train/loss
50
+ 5: 1
51
+ 6:
52
+ - 1
53
+ - 1: train/learning_rate
54
+ 5: 1
55
+ 6:
56
+ - 1
57
+ - 1: train/epoch
58
+ 5: 1
59
+ 6:
60
+ - 1
61
+ - 1: eval/loss
62
+ 5: 1
63
+ 6:
64
+ - 1
65
+ - 1: eval/wer
66
+ 5: 1
67
+ 6:
68
+ - 1
69
+ - 1: eval/runtime
70
+ 5: 1
71
+ 6:
72
+ - 1
73
+ - 1: eval/samples_per_second
74
+ 5: 1
75
+ 6:
76
+ - 1
77
+ - 1: eval/steps_per_second
78
+ 5: 1
79
+ 6:
80
+ - 1
81
+ vocab_size:
82
+ desc: null
83
+ value: 51866
84
+ num_mel_bins:
85
+ desc: null
86
+ value: 128
87
+ d_model:
88
+ desc: null
89
+ value: 1280
90
+ encoder_layers:
91
+ desc: null
92
+ value: 32
93
+ encoder_attention_heads:
94
+ desc: null
95
+ value: 20
96
+ decoder_layers:
97
+ desc: null
98
+ value: 32
99
+ decoder_attention_heads:
100
+ desc: null
101
+ value: 20
102
+ decoder_ffn_dim:
103
+ desc: null
104
+ value: 5120
105
+ encoder_ffn_dim:
106
+ desc: null
107
+ value: 5120
108
+ dropout:
109
+ desc: null
110
+ value: 0.0
111
+ attention_dropout:
112
+ desc: null
113
+ value: 0.0
114
+ activation_dropout:
115
+ desc: null
116
+ value: 0.0
117
+ activation_function:
118
+ desc: null
119
+ value: gelu
120
+ init_std:
121
+ desc: null
122
+ value: 0.02
123
+ encoder_layerdrop:
124
+ desc: null
125
+ value: 0.0
126
+ decoder_layerdrop:
127
+ desc: null
128
+ value: 0.0
129
+ use_cache:
130
+ desc: null
131
+ value: true
132
+ num_hidden_layers:
133
+ desc: null
134
+ value: 32
135
+ scale_embedding:
136
+ desc: null
137
+ value: false
138
+ max_source_positions:
139
+ desc: null
140
+ value: 1500
141
+ max_target_positions:
142
+ desc: null
143
+ value: 448
144
+ classifier_proj_size:
145
+ desc: null
146
+ value: 256
147
+ use_weighted_layer_sum:
148
+ desc: null
149
+ value: false
150
+ apply_spec_augment:
151
+ desc: null
152
+ value: false
153
+ mask_time_prob:
154
+ desc: null
155
+ value: 0.05
156
+ mask_time_length:
157
+ desc: null
158
+ value: 10
159
+ mask_time_min_masks:
160
+ desc: null
161
+ value: 2
162
+ mask_feature_prob:
163
+ desc: null
164
+ value: 0.0
165
+ mask_feature_length:
166
+ desc: null
167
+ value: 10
168
+ mask_feature_min_masks:
169
+ desc: null
170
+ value: 0
171
+ median_filter_width:
172
+ desc: null
173
+ value: 7
174
+ return_dict:
175
+ desc: null
176
+ value: true
177
+ output_hidden_states:
178
+ desc: null
179
+ value: false
180
+ output_attentions:
181
+ desc: null
182
+ value: false
183
+ torchscript:
184
+ desc: null
185
+ value: false
186
+ torch_dtype:
187
+ desc: null
188
+ value: float16
189
+ use_bfloat16:
190
+ desc: null
191
+ value: false
192
+ tf_legacy_loss:
193
+ desc: null
194
+ value: false
195
+ pruned_heads:
196
+ desc: null
197
+ value: {}
198
+ tie_word_embeddings:
199
+ desc: null
200
+ value: true
201
+ is_encoder_decoder:
202
+ desc: null
203
+ value: true
204
+ is_decoder:
205
+ desc: null
206
+ value: false
207
+ cross_attention_hidden_size:
208
+ desc: null
209
+ value: null
210
+ add_cross_attention:
211
+ desc: null
212
+ value: false
213
+ tie_encoder_decoder:
214
+ desc: null
215
+ value: false
216
+ max_length:
217
+ desc: null
218
+ value: 448
219
+ min_length:
220
+ desc: null
221
+ value: 0
222
+ do_sample:
223
+ desc: null
224
+ value: false
225
+ early_stopping:
226
+ desc: null
227
+ value: false
228
+ num_beams:
229
+ desc: null
230
+ value: 1
231
+ num_beam_groups:
232
+ desc: null
233
+ value: 1
234
+ diversity_penalty:
235
+ desc: null
236
+ value: 0.0
237
+ temperature:
238
+ desc: null
239
+ value: 1.0
240
+ top_k:
241
+ desc: null
242
+ value: 50
243
+ top_p:
244
+ desc: null
245
+ value: 1.0
246
+ typical_p:
247
+ desc: null
248
+ value: 1.0
249
+ repetition_penalty:
250
+ desc: null
251
+ value: 1.0
252
+ length_penalty:
253
+ desc: null
254
+ value: 1.0
255
+ no_repeat_ngram_size:
256
+ desc: null
257
+ value: 0
258
+ encoder_no_repeat_ngram_size:
259
+ desc: null
260
+ value: 0
261
+ bad_words_ids:
262
+ desc: null
263
+ value: null
264
+ num_return_sequences:
265
+ desc: null
266
+ value: 1
267
+ chunk_size_feed_forward:
268
+ desc: null
269
+ value: 0
270
+ output_scores:
271
+ desc: null
272
+ value: false
273
+ return_dict_in_generate:
274
+ desc: null
275
+ value: false
276
+ forced_bos_token_id:
277
+ desc: null
278
+ value: null
279
+ forced_eos_token_id:
280
+ desc: null
281
+ value: null
282
+ remove_invalid_values:
283
+ desc: null
284
+ value: false
285
+ exponential_decay_length_penalty:
286
+ desc: null
287
+ value: null
288
+ suppress_tokens:
289
+ desc: null
290
+ value: null
291
+ begin_suppress_tokens:
292
+ desc: null
293
+ value:
294
+ - 220
295
+ - 50257
296
+ architectures:
297
+ desc: null
298
+ value:
299
+ - WhisperForConditionalGeneration
300
+ finetuning_task:
301
+ desc: null
302
+ value: null
303
+ id2label:
304
+ desc: null
305
+ value:
306
+ '0': LABEL_0
307
+ '1': LABEL_1
308
+ label2id:
309
+ desc: null
310
+ value:
311
+ LABEL_0: 0
312
+ LABEL_1: 1
313
+ tokenizer_class:
314
+ desc: null
315
+ value: null
316
+ prefix:
317
+ desc: null
318
+ value: null
319
+ bos_token_id:
320
+ desc: null
321
+ value: 50257
322
+ pad_token_id:
323
+ desc: null
324
+ value: 50256
325
+ eos_token_id:
326
+ desc: null
327
+ value: 50257
328
+ sep_token_id:
329
+ desc: null
330
+ value: null
331
+ decoder_start_token_id:
332
+ desc: null
333
+ value: 50258
334
+ task_specific_params:
335
+ desc: null
336
+ value: null
337
+ problem_type:
338
+ desc: null
339
+ value: null
340
+ _name_or_path:
341
+ desc: null
342
+ value: openai/whisper-large-v3
343
+ transformers_version:
344
+ desc: null
345
+ value: 4.36.0.dev0
346
+ model_type:
347
+ desc: null
348
+ value: whisper
349
+ forced_decoder_ids:
350
+ desc: null
351
+ value: null
352
+ output_dir:
353
+ desc: null
354
+ value: ./
355
+ overwrite_output_dir:
356
+ desc: null
357
+ value: true
358
+ do_train:
359
+ desc: null
360
+ value: true
361
+ do_eval:
362
+ desc: null
363
+ value: true
364
+ do_predict:
365
+ desc: null
366
+ value: false
367
+ evaluation_strategy:
368
+ desc: null
369
+ value: steps
370
+ prediction_loss_only:
371
+ desc: null
372
+ value: false
373
+ per_device_train_batch_size:
374
+ desc: null
375
+ value: 8
376
+ per_device_eval_batch_size:
377
+ desc: null
378
+ value: 4
379
+ per_gpu_train_batch_size:
380
+ desc: null
381
+ value: null
382
+ per_gpu_eval_batch_size:
383
+ desc: null
384
+ value: null
385
+ gradient_accumulation_steps:
386
+ desc: null
387
+ value: 2
388
+ eval_accumulation_steps:
389
+ desc: null
390
+ value: null
391
+ eval_delay:
392
+ desc: null
393
+ value: 0
394
+ learning_rate:
395
+ desc: null
396
+ value: 3.0e-06
397
+ weight_decay:
398
+ desc: null
399
+ value: 0.0
400
+ adam_beta1:
401
+ desc: null
402
+ value: 0.9
403
+ adam_beta2:
404
+ desc: null
405
+ value: 0.999
406
+ adam_epsilon:
407
+ desc: null
408
+ value: 1.0e-08
409
+ max_grad_norm:
410
+ desc: null
411
+ value: 1.0
412
+ num_train_epochs:
413
+ desc: null
414
+ value: 3.0
415
+ max_steps:
416
+ desc: null
417
+ value: 60000
418
+ lr_scheduler_type:
419
+ desc: null
420
+ value: linear
421
+ lr_scheduler_kwargs:
422
+ desc: null
423
+ value: {}
424
+ warmup_ratio:
425
+ desc: null
426
+ value: 0.0
427
+ warmup_steps:
428
+ desc: null
429
+ value: 1000
430
+ log_level:
431
+ desc: null
432
+ value: passive
433
+ log_level_replica:
434
+ desc: null
435
+ value: warning
436
+ log_on_each_node:
437
+ desc: null
438
+ value: true
439
+ logging_dir:
440
+ desc: null
441
+ value: ./runs/Nov22_09-55-44_DESKTOP-59O9VN1
442
+ logging_strategy:
443
+ desc: null
444
+ value: steps
445
+ logging_first_step:
446
+ desc: null
447
+ value: false
448
+ logging_steps:
449
+ desc: null
450
+ value: 20
451
+ logging_nan_inf_filter:
452
+ desc: null
453
+ value: true
454
+ save_strategy:
455
+ desc: null
456
+ value: steps
457
+ save_steps:
458
+ desc: null
459
+ value: 1000
460
+ save_total_limit:
461
+ desc: null
462
+ value: null
463
+ save_safetensors:
464
+ desc: null
465
+ value: true
466
+ save_on_each_node:
467
+ desc: null
468
+ value: false
469
+ no_cuda:
470
+ desc: null
471
+ value: false
472
+ use_cpu:
473
+ desc: null
474
+ value: false
475
+ use_mps_device:
476
+ desc: null
477
+ value: false
478
+ seed:
479
+ desc: null
480
+ value: 42
481
+ data_seed:
482
+ desc: null
483
+ value: null
484
+ jit_mode_eval:
485
+ desc: null
486
+ value: false
487
+ use_ipex:
488
+ desc: null
489
+ value: false
490
+ bf16:
491
+ desc: null
492
+ value: false
493
+ fp16:
494
+ desc: null
495
+ value: true
496
+ fp16_opt_level:
497
+ desc: null
498
+ value: O1
499
+ half_precision_backend:
500
+ desc: null
501
+ value: auto
502
+ bf16_full_eval:
503
+ desc: null
504
+ value: false
505
+ fp16_full_eval:
506
+ desc: null
507
+ value: false
508
+ tf32:
509
+ desc: null
510
+ value: null
511
+ local_rank:
512
+ desc: null
513
+ value: 0
514
+ ddp_backend:
515
+ desc: null
516
+ value: null
517
+ tpu_num_cores:
518
+ desc: null
519
+ value: null
520
+ tpu_metrics_debug:
521
+ desc: null
522
+ value: false
523
+ debug:
524
+ desc: null
525
+ value: []
526
+ dataloader_drop_last:
527
+ desc: null
528
+ value: false
529
+ eval_steps:
530
+ desc: null
531
+ value: 1000
532
+ dataloader_num_workers:
533
+ desc: null
534
+ value: 0
535
+ past_index:
536
+ desc: null
537
+ value: -1
538
+ run_name:
539
+ desc: null
540
+ value: ./
541
+ disable_tqdm:
542
+ desc: null
543
+ value: false
544
+ remove_unused_columns:
545
+ desc: null
546
+ value: true
547
+ label_names:
548
+ desc: null
549
+ value: null
550
+ load_best_model_at_end:
551
+ desc: null
552
+ value: true
553
+ metric_for_best_model:
554
+ desc: null
555
+ value: wer
556
+ greater_is_better:
557
+ desc: null
558
+ value: false
559
+ ignore_data_skip:
560
+ desc: null
561
+ value: false
562
+ fsdp:
563
+ desc: null
564
+ value: []
565
+ fsdp_min_num_params:
566
+ desc: null
567
+ value: 0
568
+ fsdp_config:
569
+ desc: null
570
+ value:
571
+ min_num_params: 0
572
+ xla: false
573
+ xla_fsdp_grad_ckpt: false
574
+ fsdp_transformer_layer_cls_to_wrap:
575
+ desc: null
576
+ value: null
577
+ deepspeed:
578
+ desc: null
579
+ value: ds_config.json
580
+ label_smoothing_factor:
581
+ desc: null
582
+ value: 0.0
583
+ optim:
584
+ desc: null
585
+ value: adamw_torch
586
+ optim_args:
587
+ desc: null
588
+ value: null
589
+ adafactor:
590
+ desc: null
591
+ value: false
592
+ group_by_length:
593
+ desc: null
594
+ value: false
595
+ length_column_name:
596
+ desc: null
597
+ value: input_length
598
+ report_to:
599
+ desc: null
600
+ value:
601
+ - wandb
602
+ ddp_find_unused_parameters:
603
+ desc: null
604
+ value: null
605
+ ddp_bucket_cap_mb:
606
+ desc: null
607
+ value: null
608
+ ddp_broadcast_buffers:
609
+ desc: null
610
+ value: null
611
+ dataloader_pin_memory:
612
+ desc: null
613
+ value: true
614
+ skip_memory_metrics:
615
+ desc: null
616
+ value: true
617
+ use_legacy_prediction_loop:
618
+ desc: null
619
+ value: false
620
+ push_to_hub:
621
+ desc: null
622
+ value: true
623
+ resume_from_checkpoint:
624
+ desc: null
625
+ value: ./checkpoint-26000
626
+ hub_model_id:
627
+ desc: null
628
+ value: RASMUS/WhisperLargeFinnishV3
629
+ hub_strategy:
630
+ desc: null
631
+ value: every_save
632
+ hub_token:
633
+ desc: null
634
+ value: <HUB_TOKEN>
635
+ hub_private_repo:
636
+ desc: null
637
+ value: false
638
+ hub_always_push:
639
+ desc: null
640
+ value: false
641
+ gradient_checkpointing:
642
+ desc: null
643
+ value: true
644
+ gradient_checkpointing_kwargs:
645
+ desc: null
646
+ value: null
647
+ include_inputs_for_metrics:
648
+ desc: null
649
+ value: false
650
+ fp16_backend:
651
+ desc: null
652
+ value: auto
653
+ push_to_hub_model_id:
654
+ desc: null
655
+ value: WhisperLargeFinnishV3
656
+ push_to_hub_organization:
657
+ desc: null
658
+ value: null
659
+ push_to_hub_token:
660
+ desc: null
661
+ value: <PUSH_TO_HUB_TOKEN>
662
+ mp_parameters:
663
+ desc: null
664
+ value: ''
665
+ auto_find_batch_size:
666
+ desc: null
667
+ value: false
668
+ full_determinism:
669
+ desc: null
670
+ value: false
671
+ torchdynamo:
672
+ desc: null
673
+ value: null
674
+ ray_scope:
675
+ desc: null
676
+ value: last
677
+ ddp_timeout:
678
+ desc: null
679
+ value: 1800
680
+ torch_compile:
681
+ desc: null
682
+ value: true
683
+ torch_compile_backend:
684
+ desc: null
685
+ value: inductor
686
+ torch_compile_mode:
687
+ desc: null
688
+ value: null
689
+ dispatch_batches:
690
+ desc: null
691
+ value: null
692
+ split_batches:
693
+ desc: null
694
+ value: false
695
+ include_tokens_per_second:
696
+ desc: null
697
+ value: false
698
+ include_num_input_tokens_seen:
699
+ desc: null
700
+ value: false
701
+ neftune_noise_alpha:
702
+ desc: null
703
+ value: null
704
+ sortish_sampler:
705
+ desc: null
706
+ value: false
707
+ predict_with_generate:
708
+ desc: null
709
+ value: true
710
+ generation_max_length:
711
+ desc: null
712
+ value: 225
713
+ generation_num_beams:
714
+ desc: null
715
+ value: null
716
+ generation_config:
717
+ desc: null
718
+ value: null
wandb/run-20231122_100021-qrxr9o47/files/output.log ADDED
@@ -0,0 +1,1190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Reading metadata...: 2165it [00:01, 2091.77it/s] | 0/60000 [00:00<?, ?it/s]
2
+ Reading metadata...: 1it [00:01, 1.02s/it]
3
+
4
+
5
+ [INFO|trainer_utils.py:759] 2023-11-22 10:02:47,011 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
6
+ Reading metadata...: 1650it [00:00, 10770.26it/s]
7
+ Reading metadata...: 1it [00:00, 6.80it/s]
8
+
9
+ Reading metadata...: 1650it [00:00, 10115.12it/s]
10
+ Reading metadata...: 2165it [00:00, 14042.07it/s]
11
+ Reading metadata...: 1it [00:00, 6.85it/s]
12
+
13
+ Reading metadata...: 2165it [00:00, 13309.28it/s]
14
+ Reading metadata...: 1650it [00:00, 8448.63it/s]
15
+ Reading metadata...: 1650it [00:00, 10364.40it/s]
16
+ Reading metadata...: 2165it [00:00, 9428.67it/s]
17
+ Reading metadata...: 1650it [00:00, 10640.87it/s]
18
+ Reading metadata...: 2165it [00:00, 12696.95it/s]
19
+ Reading metadata...: 1650it [00:00, 10258.32it/s]
20
+ Reading metadata...: 2165it [00:00, 4710.14it/s]
21
+ Reading metadata...: 1650it [00:00, 10575.18it/s]
22
+ Reading metadata...: 1650it [00:00, 10434.57it/s]
23
+ Reading metadata...: 2165it [00:00, 5422.66it/s]
24
+ Reading metadata...: 1650it [00:00, 9295.20it/s]
25
+ Reading metadata...: 2165it [00:00, 14024.20it/s]
26
+ Reading metadata...: 1650it [00:00, 3584.90it/s]
27
+ Reading metadata...: 2165it [00:00, 10291.07it/s]
28
+ Reading metadata...: 1650it [00:00, 10892.36it/s]
29
+ Reading metadata...: 1it [00:00, 6.91it/s]
30
+
31
+ Reading metadata...: 2165it [00:00, 10458.57it/s]
32
+ Reading metadata...: 1650it [00:00, 10005.74it/s]
33
+ Reading metadata...: 2165it [00:00, 11594.39it/s]
34
+ Reading metadata...: 1650it [00:00, 10291.47it/s]
35
+ Reading metadata...: 2165it [00:00, 13948.97it/s]
36
+ Reading metadata...: 1650it [00:00, 10099.77it/s]
37
+ Reading metadata...: 1650it [00:00, 6295.95it/s]
38
+ Reading metadata...: 2165it [00:00, 13887.47it/s]
39
+ Reading metadata...: 1650it [00:00, 7971.18it/s]
40
+ Reading metadata...: 2165it [00:00, 9432.98it/s]
41
+ Reading metadata...: 1650it [00:00, 9463.73it/s]
42
+ Reading metadata...: 2165it [00:00, 12577.75it/s]
43
+ Reading metadata...: 1650it [00:00, 10109.37it/s]
44
+ Reading metadata...: 2165it [00:00, 13560.18it/s]
45
+ Reading metadata...: 1it [00:00, 6.62it/s]
46
+
47
+ Reading metadata...: 1650it [00:00, 9886.86it/s]
48
+ Reading metadata...: 2165it [00:00, 13258.29it/s]
49
+ Reading metadata...: 1650it [00:00, 10246.35it/s]
50
+ Reading metadata...: 2165it [00:00, 9269.32it/s]
51
+ Reading metadata...: 1650it [00:00, 10293.66it/s]
52
+ Reading metadata...: 1it [00:00, 6.56it/s]
53
+
54
+ Reading metadata...: 1650it [00:00, 10623.23it/s]
55
+ Reading metadata...: 1650it [00:00, 10933.23it/s]
56
+ Reading metadata...: 1it [00:00, 6.92it/s]
57
+
58
+
59
+ [WARNING|logging.py:329] 2023-11-22 15:47:28,364 >> `use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`...
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+ 43%|████████████████████████████████████████▎ | 26020/60000 [5:53:10<16:25:11, 1.74s/it]
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+ 43%|████████████████████████████████████████▎ | 26040/60000 [5:56:33<95:56:28, 10.17s/it]
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+ 43%|████████████████████████████████████████▍ | 26059/60000 [5:59:45<96:59:47, 10.29s/it]
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+ 43%|███████████████████████████████████████▉ | 26080/60000 [6:03:25<110:16:32, 11.70s/it]
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+ 44%|████████████████████████████████████████▍ | 26100/60000 [6:06:43<94:13:59, 10.01s/it]
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+ 44%|████████████████████████████████████████▍ | 26120/60000 [6:10:01<94:05:24, 10.00s/it]
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+ 44%|████████████████████████████████████████▌ | 26140/60000 [6:13:28<95:52:26, 10.19s/it]
206
+
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+ 44%|████████████████████████████████████████▌ | 26160/60000 [6:16:51<93:55:58, 9.99s/it]
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+ 44%|████████████████████████████████████████▌ | 26179/60000 [6:20:05<92:57:07, 9.89s/it]
247
+
248
+
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+ 44%|████████████████████████████████████████▌ | 26199/60000 [6:23:22<91:17:42, 9.72s/it]
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+ 44%|████████████████████████████████████████▋ | 26220/60000 [6:26:50<96:34:09, 10.29s/it]
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+ 44%|████████████████████████████████████████▋ | 26240/60000 [6:30:09<92:51:54, 9.90s/it]
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+ 44%|████████████████████████████████████████▋ | 26260/60000 [6:33:36<95:21:39, 10.17s/it]
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+ 44%|████████████████████████████████████████▎ | 26280/60000 [6:37:20<123:53:35, 13.23s/it]
353
+
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
+
373
+ 44%|████████████████████████████████████████▊ | 26300/60000 [6:41:05<95:25:50, 10.19s/it]
374
+
375
+
376
+
377
+
378
+
379
+
380
+
381
+
382
+
383
+
384
+
385
+
386
+
387
+
388
+
389
+
390
+
391
+
392
+
393
+
394
+ 44%|████████████████████████████████████████▊ | 26320/60000 [6:44:27<94:57:22, 10.15s/it]
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+
408
+
409
+
410
+
411
+
412
+
413
+
414
+
415
+ 44%|████████████████████████████████████████▊ | 26340/60000 [6:47:50<93:05:33, 9.96s/it]
416
+
417
+
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+ 44%|████████████████████████████████████████▊ | 26359/60000 [6:51:11<95:25:08, 10.21s/it]
436
+
437
+
438
+
439
+
440
+
441
+
442
+
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+
453
+
454
+
455
+
456
+ 44%|████████████████████████████████████████▉ | 26379/60000 [6:54:33<94:52:34, 10.16s/it]
457
+
458
+
459
+
460
+
461
+
462
+
463
+
464
+
465
+
466
+
467
+
468
+
469
+
470
+
471
+
472
+
473
+
474
+
475
+
476
+
477
+
478
+ 44%|████████████████████████████████████████▉ | 26400/60000 [6:58:09<94:40:27, 10.14s/it]
479
+
480
+
481
+
482
+
483
+
484
+
485
+
486
+
487
+
488
+
489
+
490
+
491
+
492
+
493
+
494
+
495
+
496
+
497
+
498
+
499
+ 44%|████████████████████████████████████████▉ | 26420/60000 [7:01:33<97:18:13, 10.43s/it]
500
+
501
+
502
+
503
+
504
+
505
+
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+ 44%|████████████████████████████████████████▌ | 26440/60000 [7:05:30<102:54:30, 11.04s/it]
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+ 44%|█████████████████████████████████████████ | 26460/60000 [7:08:54<96:05:51, 10.31s/it]
542
+
543
+
544
+
545
+
546
+
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+ 44%|█████████████████████████████████████████ | 26480/60000 [7:12:17<95:20:20, 10.24s/it]
563
+
564
+
565
+
566
+
567
+
568
+
569
+
570
+
571
+
572
+
573
+
574
+
575
+
576
+
577
+
578
+
579
+
580
+
581
+
582
+
583
+ 44%|█████████████████████████████████████████ | 26500/60000 [7:15:42<94:48:09, 10.19s/it]
584
+
585
+
586
+
587
+
588
+
589
+
590
+
591
+
592
+
593
+
594
+
595
+
596
+
597
+
598
+
599
+
600
+
601
+
602
+
603
+
604
+ 44%|████████████████████████████████████████▋ | 26520/60000 [7:19:08<102:47:39, 11.05s/it]
605
+
606
+
607
+
608
+
609
+
610
+
611
+
612
+
613
+
614
+
615
+
616
+
617
+
618
+
619
+
620
+
621
+
622
+
623
+
624
+
625
+ 44%|█████████████████████████████████████████▏ | 26540/60000 [7:22:28<95:18:33, 10.25s/it]
626
+
627
+
628
+
629
+
630
+
631
+
632
+
633
+
634
+
635
+
636
+
637
+
638
+
639
+
640
+
641
+
642
+
643
+
644
+
645
+
646
+ 44%|████████████��████████████████████████████▏ | 26560/60000 [7:25:48<92:21:30, 9.94s/it]
647
+
648
+
649
+
650
+
651
+
652
+
653
+
654
+
655
+
656
+
657
+
658
+
659
+
660
+
661
+
662
+
663
+
664
+
665
+
666
+
667
+ 44%|█████████████████████████████████████████▏ | 26580/60000 [7:29:10<92:33:21, 9.97s/it]
668
+
669
+
670
+
671
+
672
+
673
+
674
+
675
+
676
+
677
+
678
+
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+
688
+ 44%|████████████████████████████████████████▊ | 26600/60000 [7:32:53<132:18:31, 14.26s/it]
689
+
690
+
691
+
692
+
693
+
694
+
695
+
696
+
697
+
698
+
699
+
700
+
701
+
702
+
703
+
704
+
705
+
706
+
707
+
708
+
709
+ 44%|█████████████████████████████████████████▎ | 26620/60000 [7:36:31<95:10:40, 10.26s/it]
710
+
711
+
712
+
713
+
714
+
715
+
716
+
717
+
718
+
719
+
720
+
721
+
722
+ 44%|█████████████████████████████████████████▎ | 26632/60000 [7:38:26<81:37:28, 8.81s/it]
723
+
724
+ 44%|█████████████████████████████████████████▎ | 26633/60000 [7:38:33<75:28:42, 8.14s/it]
725
+
726
+
727
+
728
+
729
+
730
+
731
+ 44%|█████████████████████████████████████████▎ | 26639/60000 [7:39:31<89:10:50, 9.62s/it]
732
+
733
+
734
+
735
+
736
+
737
+
738
+
739
+
740
+
741
+
742
+
743
+
744
+
745
+
746
+
747
+
748
+
749
+
750
+
751
+
752
+
753
+ 44%|█████████████████████████████████████████▎ | 26660/60000 [7:43:02<92:33:09, 9.99s/it]
754
+
755
+
756
+
757
+
758
+
759
+
760
+
761
+
762
+
763
+
764
+
765
+
766
+
767
+
768
+
769
+
770
+
771
+
772
+
773
+
774
+ 44%|█████████████████████████████████████████▎ | 26680/60000 [7:46:24<92:53:37, 10.04s/it]
775
+
776
+
777
+
778
+
779
+
780
+
781
+
782
+
783
+
784
+
785
+
786
+
787
+
788
+
789
+
790
+
791
+
792
+
793
+
794
+ 44%|████████████████████████████████████████▉ | 26699/60000 [7:49:42<103:25:53, 11.18s/it]
795
+
796
+ Reading metadata...: 1650it [00:00, 2640.63it/s] | 26700/60000 [7:49:53<100:37:44, 10.88s/it]
797
+
798
+
799
+
800
+
801
+
802
+
803
+
804
+
805
+
806
+
807
+
808
+
809
+
810
+
811
+
812
+
813
+
814
+
815
+
816
+ 45%|█████████████████████████████████████████▍ | 26719/60000 [7:53:06<91:05:06, 9.85s/it]
817
+
818
+
819
+
820
+
821
+
822
+
823
+
824
+
825
+
826
+
827
+
828
+
829
+
830
+
831
+
832
+
833
+
834
+
835
+
836
+
837
+ 45%|█████████████████████████████████████████▍ | 26739/60000 [7:57:07<96:07:19, 10.40s/it]
838
+
839
+
840
+
841
+
842
+
843
+
844
+
845
+
846
+
847
+
848
+
849
+
850
+
851
+
852
+
853
+
854
+
855
+
856
+
857
+
858
+ 45%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 26759/60000 [8:01:29<103:02:04, 11.16s/it]
859
+
860
+
861
+
862
+
863
+
864
+
865
+
866
+
867
+
868
+
869
+
870
+
871
+
872
+
873
+
874
+
875
+
876
+
877
+
878
+
879
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 26779/60000 [8:05:05<99:22:41, 10.77s/it]
880
+
881
+
882
+
883
+
884
+
885
+
886
+
887
+
888
+
889
+
890
+
891
+
892
+
893
+
894
+
895
+
896
+
897
+
898
+
899
+
900
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 26799/60000 [8:08:45<97:18:50, 10.55s/it]
901
+
902
+
903
+
904
+
905
+
906
+
907
+
908
+
909
+
910
+
911
+
912
+
913
+
914
+
915
+
916
+
917
+
918
+
919
+
920
+
921
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 26819/60000 [8:12:16<98:14:08, 10.66s/it]
922
+
923
+
924
+
925
+
926
+
927
+
928
+
929
+
930
+
931
+
932
+
933
+
934
+
935
+
936
+
937
+
938
+
939
+
940
+
941
+
942
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 26839/60000 [8:15:47<97:32:15, 10.59s/it]
943
+
944
+
945
+
946
+
947
+
948
+
949
+
950
+
951
+
952
+
953
+
954
+
955
+
956
+
957
+
958
+
959
+
960
+
961
+
962
+
963
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 26859/60000 [8:19:28<104:49:20, 11.39s/it]
964
+
965
+
966
+
967
+
968
+
969
+
970
+
971
+
972
+
973
+
974
+
975
+
976
+
977
+
978
+
979
+
980
+
981
+
982
+
983
+
984
+
985
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 26880/60000 [8:23:24<102:15:37, 11.12s/it]
986
+
987
+
988
+
989
+
990
+
991
+
992
+
993
+
994
+
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+
1003
+
1004
+
1005
+
1006
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 26900/60000 [8:27:00<99:42:17, 10.84s/it]
1007
+
1008
+
1009
+
1010
+
1011
+
1012
+
1013
+
1014
+
1015
+
1016
+
1017
+
1018
+
1019
+
1020
+
1021
+
1022
+
1023
+
1024
+
1025
+
1026
+
1027
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 26920/60000 [8:30:45<107:01:30, 11.65s/it]
1028
+
1029
+
1030
+
1031
+
1032
+
1033
+
1034
+
1035
+
1036
+
1037
+
1038
+
1039
+
1040
+
1041
+
1042
+
1043
+
1044
+
1045
+
1046
+
1047
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 26939/60000 [8:34:11<98:30:06, 10.73s/it]
1048
+
1049
+
1050
+
1051
+
1052
+
1053
+
1054
+
1055
+
1056
+
1057
+
1058
+
1059
+
1060
+
1061
+
1062
+
1063
+
1064
+
1065
+
1066
+
1067
+
1068
+
1069
+ 45%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████��██████████████████████████▌ | 26960/60000 [8:38:18<101:25:53, 11.05s/it]
1070
+
1071
+
1072
+
1073
+
1074
+
1075
+
1076
+
1077
+
1078
+
1079
+
1080
+
1081
+
1082
+
1083
+
1084
+
1085
+
1086
+
1087
+
1088
+
1089
+
1090
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 26980/60000 [8:42:02<100:28:45, 10.95s/it]
1091
+
1092
+
1093
+
1094
+
1095
+
1096
+
1097
+
1098
+
1099
+
1100
+
1101
+
1102
+
1103
+
1104
+
1105
+
1106
+
1107
+
1108
+
1109
+
1110
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 26999/60000 [8:45:30<100:14:09, 10.93s/it]
1111
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 27000/60000 [8:45:41<100:50:31, 11.00s/it][INFO|trainer.py:3173] 2023-11-22 18:46:09,458 >> ***** Running Evaluation *****
1112
+ [INFO|trainer.py:3177] 2023-11-22 18:46:09,461 >> Num examples: Unknown
1113
+ [INFO|trainer.py:3178] 2023-11-22 18:46:09,461 >> Batch size = 4
1114
+ Reading metadata...: 1704it [00:00, 2566.26it/s]
1115
+
1116
+ [INFO|trainer_utils.py:759] 2023-11-22 18:46:12,313 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: age, client_id, locale, gender, accent, up_votes, segment, input_length, down_votes, path. If age, client_id, locale, gender, accent, up_votes, segment, input_length, down_votes, path are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
1117
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 27000/60000 [8:57:11<100:50:31, 11.00s/it]
1118
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 27000/60000 [8:57:11<100:50:31, 11.00s/it][INFO|trainer.py:2896] 2023-11-22 18:57:43,033 >> Saving model checkpoint to ./checkpoint-27000
1119
+ [INFO|configuration_utils.py:462] 2023-11-22 18:57:43,068 >> Configuration saved in ./checkpoint-27000/config.json
1120
+ [INFO|configuration_utils.py:568] 2023-11-22 18:57:43,077 >> Configuration saved in ./checkpoint-27000/generation_config.json
1121
+ [INFO|modeling_utils.py:2194] 2023-11-22 18:58:14,712 >> Model weights saved in ./checkpoint-27000/pytorch_model.bin
1122
+ [INFO|feature_extraction_utils.py:425] 2023-11-22 18:58:14,726 >> Feature extractor saved in ./checkpoint-27000/preprocessor_config.json
1123
+ [2023-11-22 18:58:17,883] [INFO] [logging.py:96:log_dist] [Rank 0] [Torch] Checkpoint global_step27000 is about to be saved!
1124
+ [2023-11-22 18:58:17,952] [INFO] [logging.py:96:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-27000/global_step27000/mp_rank_00_model_states.pt
1125
+ [2023-11-22 18:58:17,952] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./checkpoint-27000/global_step27000/mp_rank_00_model_states.pt...
1126
+ [2023-11-22 18:58:39,977] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./checkpoint-27000/global_step27000/mp_rank_00_model_states.pt.
1127
+ [2023-11-22 18:58:40,036] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving ./checkpoint-27000/global_step27000/zero_pp_rank_0_mp_rank_00_optim_states.pt...
1128
+ [2023-11-22 18:59:03,183] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved ./checkpoint-27000/global_step27000/zero_pp_rank_0_mp_rank_00_optim_states.pt.
1129
+ [2023-11-22 18:59:03,205] [INFO] [engine.py:3417:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-27000/global_step27000/zero_pp_rank_0_mp_rank_00_optim_states.pt
1130
+ [2023-11-22 18:59:03,213] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step27000 is ready now!
1131
+ [INFO|feature_extraction_utils.py:425] 2023-11-22 19:00:19,016 >> Feature extractor saved in ./preprocessor_config.json
1132
+
1133
+
1134
+
1135
+
1136
+
1137
+
1138
+
1139
+
1140
+
1141
+
1142
+
1143
+
1144
+
1145
+
1146
+
1147
+
1148
+
1149
+
1150
+ 45%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 27019/60000 [9:03:28<105:06:04, 11.47s/it]
1151
+
1152
+
1153
+
1154
+
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+
1172
+ 45%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 27040/60000 [9:07:37<104:50:49, 11.45s/it]
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+
1179
+
1180
+
1181
+
1182
+
1183
+
1184
+
1185
+
1186
+
1187
+
1188
+
1189
+
1190
+
wandb/run-20231122_100021-qrxr9o47/files/requirements.txt ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.0.0
2
+ accelerate==0.24.1
3
+ aiohttp==3.9.0
4
+ aiosignal==1.3.1
5
+ annotated-types==0.6.0
6
+ anyio==4.0.0
7
+ appdirs==1.4.4
8
+ argon2-cffi-bindings==21.2.0
9
+ argon2-cffi==23.1.0
10
+ arrow==1.3.0
11
+ asttokens==2.4.1
12
+ async-lru==2.0.4
13
+ async-timeout==4.0.3
14
+ attrs==23.1.0
15
+ audioread==3.0.1
16
+ babel==2.13.1
17
+ beautifulsoup4==4.12.2
18
+ bleach==6.1.0
19
+ cachetools==5.3.2
20
+ certifi==2023.11.17
21
+ cffi==1.16.0
22
+ charset-normalizer==3.3.2
23
+ click==8.1.7
24
+ comm==0.2.0
25
+ datasets==2.15.1.dev0
26
+ debugpy==1.8.0
27
+ decorator==5.1.1
28
+ deepspeed==0.12.3
29
+ defusedxml==0.7.1
30
+ dill==0.3.7
31
+ docker-pycreds==0.4.0
32
+ evaluate==0.4.1
33
+ exceptiongroup==1.1.3
34
+ executing==2.0.1
35
+ fastjsonschema==2.19.0
36
+ filelock==3.13.1
37
+ fqdn==1.5.1
38
+ frozenlist==1.4.0
39
+ fsspec==2023.10.0
40
+ gitdb==4.0.11
41
+ gitpython==3.1.40
42
+ google-auth-oauthlib==1.1.0
43
+ google-auth==2.23.4
44
+ grpcio==1.59.3
45
+ hjson==3.1.0
46
+ huggingface-hub==0.19.4
47
+ idna==3.4
48
+ iprogress==0.4
49
+ ipykernel==6.26.0
50
+ ipython==8.17.2
51
+ ipywidgets==8.1.1
52
+ isoduration==20.11.0
53
+ jedi==0.19.1
54
+ jinja2==3.1.2
55
+ jiwer==3.0.3
56
+ joblib==1.3.2
57
+ json5==0.9.14
58
+ jsonpointer==2.4
59
+ jsonschema-specifications==2023.11.1
60
+ jsonschema==4.20.0
61
+ jupyter-client==8.6.0
62
+ jupyter-console==6.6.3
63
+ jupyter-core==5.5.0
64
+ jupyter-events==0.9.0
65
+ jupyter-lsp==2.2.0
66
+ jupyter-server-terminals==0.4.4
67
+ jupyter-server==2.10.1
68
+ jupyter==1.0.0
69
+ jupyterlab-pygments==0.2.2
70
+ jupyterlab-server==2.25.1
71
+ jupyterlab-widgets==3.0.9
72
+ jupyterlab==4.0.8
73
+ lazy-loader==0.3
74
+ librosa==0.10.1
75
+ llvmlite==0.41.1
76
+ markdown==3.5.1
77
+ markupsafe==2.1.3
78
+ matplotlib-inline==0.1.6
79
+ mistune==3.0.2
80
+ more-itertools==10.1.0
81
+ mpmath==1.3.0
82
+ msgpack==1.0.7
83
+ multidict==6.0.4
84
+ multiprocess==0.70.15
85
+ nbclient==0.9.0
86
+ nbconvert==7.11.0
87
+ nbformat==5.9.2
88
+ nest-asyncio==1.5.8
89
+ networkx==3.2.1
90
+ ninja==1.11.1.1
91
+ notebook-shim==0.2.3
92
+ notebook==7.0.6
93
+ numba==0.58.1
94
+ numpy==1.26.2
95
+ nvidia-cublas-cu12==12.1.3.1
96
+ nvidia-cuda-cupti-cu12==12.1.105
97
+ nvidia-cuda-nvrtc-cu12==12.1.105
98
+ nvidia-cuda-runtime-cu12==12.1.105
99
+ nvidia-cudnn-cu12==8.9.2.26
100
+ nvidia-cufft-cu12==11.0.2.54
101
+ nvidia-curand-cu12==10.3.2.106
102
+ nvidia-cusolver-cu12==11.4.5.107
103
+ nvidia-cusparse-cu12==12.1.0.106
104
+ nvidia-nccl-cu12==2.18.1
105
+ nvidia-nvjitlink-cu12==12.3.101
106
+ nvidia-nvtx-cu12==12.1.105
107
+ oauthlib==3.2.2
108
+ overrides==7.4.0
109
+ packaging==23.2
110
+ pandas==2.1.3
111
+ pandocfilters==1.5.0
112
+ parso==0.8.3
113
+ pexpect==4.8.0
114
+ pillow==10.1.0
115
+ pip==23.3
116
+ platformdirs==4.0.0
117
+ pooch==1.8.0
118
+ prometheus-client==0.18.0
119
+ prompt-toolkit==3.0.41
120
+ protobuf==4.23.4
121
+ psutil==5.9.6
122
+ ptyprocess==0.7.0
123
+ pure-eval==0.2.2
124
+ py-cpuinfo==9.0.0
125
+ pyarrow-hotfix==0.5
126
+ pyarrow==14.0.1
127
+ pyasn1-modules==0.3.0
128
+ pyasn1==0.5.0
129
+ pycparser==2.21
130
+ pydantic-core==2.14.3
131
+ pydantic==2.5.1
132
+ pygments==2.16.1
133
+ pynvml==11.5.0
134
+ python-dateutil==2.8.2
135
+ python-json-logger==2.0.7
136
+ pytz==2023.3.post1
137
+ pyyaml==6.0.1
138
+ pyzmq==25.1.1
139
+ qtconsole==5.5.1
140
+ qtpy==2.4.1
141
+ rapidfuzz==3.5.2
142
+ referencing==0.31.0
143
+ regex==2023.10.3
144
+ requests-oauthlib==1.3.1
145
+ requests==2.31.0
146
+ responses==0.18.0
147
+ rfc3339-validator==0.1.4
148
+ rfc3986-validator==0.1.1
149
+ rpds-py==0.13.0
150
+ rsa==4.9
151
+ safetensors==0.4.0
152
+ scikit-learn==1.3.2
153
+ scipy==1.11.3
154
+ send2trash==1.8.2
155
+ sentry-sdk==1.35.0
156
+ setproctitle==1.3.3
157
+ setuptools==68.0.0
158
+ six==1.16.0
159
+ smmap==5.0.1
160
+ sniffio==1.3.0
161
+ soundfile==0.12.1
162
+ soupsieve==2.5
163
+ soxr==0.3.7
164
+ stack-data==0.6.3
165
+ sympy==1.12
166
+ tensorboard-data-server==0.7.2
167
+ tensorboard==2.15.1
168
+ terminado==0.18.0
169
+ threadpoolctl==3.2.0
170
+ tinycss2==1.2.1
171
+ tokenizers==0.15.0
172
+ tomli==2.0.1
173
+ torch==2.1.1
174
+ torchaudio==2.1.1
175
+ torchvision==0.16.1
176
+ tornado==6.3.3
177
+ tqdm==4.66.1
178
+ traitlets==5.13.0
179
+ transformers==4.36.0.dev0
180
+ triton==2.1.0
181
+ types-python-dateutil==2.8.19.14
182
+ typing-extensions==4.8.0
183
+ tzdata==2023.3
184
+ uri-template==1.3.0
185
+ urllib3==2.1.0
186
+ wandb==0.16.0
187
+ wcwidth==0.2.10
188
+ webcolors==1.13
189
+ webencodings==0.5.1
190
+ websocket-client==1.6.4
191
+ werkzeug==3.0.1
192
+ wheel==0.41.2
193
+ widgetsnbextension==4.0.9
194
+ xxhash==3.4.1
195
+ yarl==1.9.2
wandb/run-20231122_100021-qrxr9o47/files/wandb-metadata.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.123.1-microsoft-standard-WSL2-x86_64-with-glibc2.35",
3
+ "python": "3.10.13",
4
+ "heartbeatAt": "2023-11-22T08:00:22.301922",
5
+ "startedAt": "2023-11-22T08:00:21.614508",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "--local_rank=0",
10
+ "--deepspeed=ds_config.json",
11
+ "--model_name_or_path=openai/whisper-large-v3",
12
+ "--train_dataset_names",
13
+ "mozilla-foundation/common_voice_11_0",
14
+ "mozilla-foundation/common_voice_11_0",
15
+ "google/fleurs",
16
+ "Finnish-NLP/css10_audio_processed",
17
+ "Finnish-NLP/youtube_fi_110k_preprocessed",
18
+ "Finnish-NLP/voxpopuli_fi_audio_processed",
19
+ "Finnish-NLP/parliament_audio_processed",
20
+ "Finnish-NLP/aalto_eduskunta_asr_audio_processed",
21
+ "Finnish-NLP/fbc_dialog_audio_processed",
22
+ "Finnish-NLP/fbc_monolog_audio_processed",
23
+ "--train_dataset_config_names",
24
+ "fi",
25
+ "fi",
26
+ "fi_fi",
27
+ "",
28
+ "",
29
+ "",
30
+ "",
31
+ "",
32
+ "",
33
+ "",
34
+ "--train_dataset_split_name",
35
+ "train",
36
+ "validation",
37
+ "train",
38
+ "train",
39
+ "train",
40
+ "train",
41
+ "train",
42
+ "train",
43
+ "train",
44
+ "train",
45
+ "--train_dataset_text_column_names",
46
+ "sentence",
47
+ "sentence",
48
+ "raw_transcription",
49
+ "sentence",
50
+ "texts",
51
+ "sentence",
52
+ "sentence",
53
+ "sentence",
54
+ "sentence",
55
+ "sentence",
56
+ "--eval_dataset_name=mozilla-foundation/common_voice_11_0",
57
+ "--eval_dataset_config_name=fi",
58
+ "--eval_split_name=test",
59
+ "--language=finnish",
60
+ "--model_index_name=WhisperLargeV3Finnish",
61
+ "--max_steps=60000",
62
+ "--output_dir=./",
63
+ "--per_device_train_batch_size=8",
64
+ "--per_device_eval_batch_size=4",
65
+ "--gradient_accumulation_steps=2",
66
+ "--logging_steps=20",
67
+ "--learning_rate=3e-6",
68
+ "--warmup_steps=1000",
69
+ "--evaluation_strategy=steps",
70
+ "--eval_steps=1000",
71
+ "--save_strategy=steps",
72
+ "--save_steps=1000",
73
+ "--generation_max_length=225",
74
+ "--length_column_name=input_length",
75
+ "--max_duration_in_seconds=30",
76
+ "--text_column_name=sentence",
77
+ "--freeze_feature_encoder=False",
78
+ "--report_to=wandb",
79
+ "--metric_for_best_model=wer",
80
+ "--greater_is_better=False",
81
+ "--load_best_model_at_end",
82
+ "--gradient_checkpointing",
83
+ "--fp16",
84
+ "--torch_compile",
85
+ "--overwrite_output_dir",
86
+ "--do_train",
87
+ "--do_eval",
88
+ "--predict_with_generate",
89
+ "--do_normalize_eval",
90
+ "--use_auth_token",
91
+ "--push_to_hub",
92
+ "--push_to_hub_model_id=WhisperLargeFinnishV3",
93
+ "--resume_from_checkpoint=./checkpoint-26000"
94
+ ],
95
+ "state": "running",
96
+ "program": "/mnt/e/run_speech_recognition_seq2seq_streaming.py",
97
+ "codePathLocal": "run_speech_recognition_seq2seq_streaming.py",
98
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
99
+ "host": "DESKTOP-59O9VN1",
100
+ "username": "rasmus",
101
+ "executable": "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/bin/python",
102
+ "cpu_count": 4,
103
+ "cpu_count_logical": 8,
104
+ "cpu_freq": {
105
+ "current": 3911.9949999999994,
106
+ "min": 0.0,
107
+ "max": 0.0
108
+ },
109
+ "cpu_freq_per_core": [
110
+ {
111
+ "current": 3911.995,
112
+ "min": 0.0,
113
+ "max": 0.0
114
+ },
115
+ {
116
+ "current": 3911.995,
117
+ "min": 0.0,
118
+ "max": 0.0
119
+ },
120
+ {
121
+ "current": 3911.995,
122
+ "min": 0.0,
123
+ "max": 0.0
124
+ },
125
+ {
126
+ "current": 3911.995,
127
+ "min": 0.0,
128
+ "max": 0.0
129
+ },
130
+ {
131
+ "current": 3911.995,
132
+ "min": 0.0,
133
+ "max": 0.0
134
+ },
135
+ {
136
+ "current": 3911.995,
137
+ "min": 0.0,
138
+ "max": 0.0
139
+ },
140
+ {
141
+ "current": 3911.995,
142
+ "min": 0.0,
143
+ "max": 0.0
144
+ },
145
+ {
146
+ "current": 3911.995,
147
+ "min": 0.0,
148
+ "max": 0.0
149
+ }
150
+ ],
151
+ "disk": {
152
+ "/": {
153
+ "total": 1006.853931427002,
154
+ "used": 37.58251190185547
155
+ }
156
+ },
157
+ "gpu": "NVIDIA GeForce RTX 4080",
158
+ "gpu_count": 1,
159
+ "gpu_devices": [
160
+ {
161
+ "name": "NVIDIA GeForce RTX 4080",
162
+ "memory_total": 17171480576
163
+ }
164
+ ],
165
+ "memory": {
166
+ "total": 54.92573928833008
167
+ }
168
+ }
wandb/run-20231122_100021-qrxr9o47/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"train/loss": 0.0592, "train/learning_rate": 1.6780677966101696e-06, "train/epoch": 0.45, "train/global_step": 27040, "_timestamp": 1700672885.8311684, "_runtime": 32864.15361738205, "_step": 52, "eval/loss": 0.19091796875, "eval/wer": 8.95212966453072, "eval/runtime": 689.9912, "eval/samples_per_second": 2.47, "eval/steps_per_second": 0.617}
wandb/run-20231122_100021-qrxr9o47/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20231122_100021-qrxr9o47/logs/debug.log ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-11-22 10:00:21,657 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Current SDK version is 0.16.0
2
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Configure stats pid to 2217649
3
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from /home/rasmus/.config/wandb/settings
4
+ 2023-11-22 10:00:21,658 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from /mnt/e/wandb/settings
5
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Loading settings from environment variables: {}
6
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False}
7
+ 2023-11-22 10:00:21,659 INFO MainThread:2217649 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/mnt/e/run_speech_recognition_seq2seq_streaming.py', 'program': '/mnt/e/run_speech_recognition_seq2seq_streaming.py'}
8
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:_log_setup():524] Logging user logs to /mnt/e/wandb/run-20231122_100021-qrxr9o47/logs/debug.log
9
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:_log_setup():525] Logging internal logs to /mnt/e/wandb/run-20231122_100021-qrxr9o47/logs/debug-internal.log
10
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:init():564] calling init triggers
11
+ 2023-11-22 10:00:21,660 INFO MainThread:2217649 [wandb_init.py:init():571] wandb.init called with sweep_config: {}
12
+ config: {}
13
+ 2023-11-22 10:00:21,661 INFO MainThread:2217649 [wandb_init.py:init():614] starting backend
14
+ 2023-11-22 10:00:21,661 INFO MainThread:2217649 [wandb_init.py:init():618] setting up manager
15
+ 2023-11-22 10:00:21,667 INFO MainThread:2217649 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
16
+ 2023-11-22 10:00:21,676 INFO MainThread:2217649 [wandb_init.py:init():624] backend started and connected
17
+ 2023-11-22 10:00:21,717 INFO MainThread:2217649 [wandb_init.py:init():716] updated telemetry
18
+ 2023-11-22 10:00:21,722 INFO MainThread:2217649 [wandb_init.py:init():749] communicating run to backend with 90.0 second timeout
19
+ 2023-11-22 10:00:22,205 INFO MainThread:2217649 [wandb_run.py:_on_init():2254] communicating current version
20
+ 2023-11-22 10:00:22,253 INFO MainThread:2217649 [wandb_run.py:_on_init():2263] got version response
21
+ 2023-11-22 10:00:22,253 INFO MainThread:2217649 [wandb_init.py:init():800] starting run threads in backend
22
+ 2023-11-22 10:00:27,927 INFO MainThread:2217649 [wandb_run.py:_console_start():2233] atexit reg
23
+ 2023-11-22 10:00:27,928 INFO MainThread:2217649 [wandb_run.py:_redirect():2088] redirect: wrap_raw
24
+ 2023-11-22 10:00:27,929 INFO MainThread:2217649 [wandb_run.py:_redirect():2153] Wrapping output streams.
25
+ 2023-11-22 10:00:27,929 INFO MainThread:2217649 [wandb_run.py:_redirect():2178] Redirects installed.
26
+ 2023-11-22 10:00:27,930 INFO MainThread:2217649 [wandb_init.py:init():841] run started, returning control to user process
27
+ 2023-11-22 10:00:27,933 INFO MainThread:2217649 [wandb_run.py:_config_callback():1342] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': True, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'chunk_size_feed_forward': 0, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.36.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'evaluation_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'learning_rate': 3e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 60000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 1000, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Nov22_09-55-44_DESKTOP-59O9VN1', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 20, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 1000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'deepspeed': 'ds_config.json', 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-26000', 'hub_model_id': 'RASMUS/WhisperLargeFinnishV3', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'WhisperLargeFinnishV3', 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': True, 'torch_compile_backend': 'inductor', 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': False, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 225, 'generation_num_beams': None, 'generation_config': None}
wandb/run-20231122_100021-qrxr9o47/run-qrxr9o47.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a0be6c56b94619c3353d0f5558d25aa4eb59dad7a179876f436f10832e5519
3
+ size 1638874