xezpeleta commited on Oct 7, 2024

Commit

dedcdce

•

1 Parent(s): 866bdd5

End of training

Browse files

Files changed (42) hide show

.gitattributes +1 -0
README.md +64 -0
all_results.json +8 -0
eval_results.json +8 -0
generation_config.json +265 -0
model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
run.sh +0 -1
runs/Oct07_10-30-48_tknika/events.out.tfevents.1728297734.tknika.20799.0 +3 -0
training_args.bin +1 -1
wandb/debug-internal.log +10 -10
wandb/debug.log +27 -27
wandb/run-20241005_141414-821qpm7o/files/config.yaml +563 -0
wandb/run-20241005_141414-821qpm7o/files/output.log +0 -0
wandb/run-20241005_141414-821qpm7o/files/wandb-summary.json +1 -0
wandb/run-20241005_141414-821qpm7o/logs/debug-core.log +7 -0
wandb/run-20241005_141414-821qpm7o/logs/debug-internal.log +9 -0
wandb/run-20241005_141414-821qpm7o/logs/debug.log +1 -0
wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb +2 -2
wandb/run-20241007_102112-r5qja96d/files/config.yaml +508 -0
wandb/run-20241007_102112-r5qja96d/files/output.log +66 -0
wandb/run-20241007_102112-r5qja96d/files/wandb-metadata.json +87 -0
wandb/run-20241007_102112-r5qja96d/files/wandb-summary.json +1 -0
wandb/run-20241007_102112-r5qja96d/logs/debug-core.log +14 -0
wandb/run-20241007_102112-r5qja96d/logs/debug-internal.log +18 -0
wandb/run-20241007_102112-r5qja96d/logs/debug.log +29 -0
wandb/run-20241007_102112-r5qja96d/run-r5qja96d.wandb +0 -0
wandb/run-20241007_102233-fvsz65yu/files/config.yaml +515 -0
wandb/run-20241007_102233-fvsz65yu/files/output.log +68 -0
wandb/run-20241007_102233-fvsz65yu/files/wandb-metadata.json +87 -0
wandb/run-20241007_102233-fvsz65yu/files/wandb-summary.json +1 -0
wandb/run-20241007_102233-fvsz65yu/logs/debug-core.log +14 -0
wandb/run-20241007_102233-fvsz65yu/logs/debug-internal.log +18 -0
wandb/run-20241007_102233-fvsz65yu/logs/debug.log +29 -0
wandb/run-20241007_102233-fvsz65yu/run-fvsz65yu.wandb +0 -0
wandb/run-20241007_125615-a3z1jk8c/files/output.log +32 -0
wandb/run-20241007_125615-a3z1jk8c/files/requirements.txt +94 -0
wandb/run-20241007_125615-a3z1jk8c/files/wandb-metadata.json +85 -0
wandb/run-20241007_125615-a3z1jk8c/logs/debug-core.log +7 -0
wandb/run-20241007_125615-a3z1jk8c/logs/debug-internal.log +10 -0
wandb/run-20241007_125615-a3z1jk8c/logs/debug.log +28 -0
wandb/run-20241007_125615-a3z1jk8c/run-a3z1jk8c.wandb +0 -0

.gitattributes CHANGED Viewed

@@ -33,4 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+wandb/run-20241005_141414-821qpm7o/files/output.log filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+library_name: transformers
+language:
+- eu
+license: apache-2.0
+base_model: openai/whisper-large-v3
+tags:
+- whisper-event
+- generated_from_trainer
+datasets:
+- mozilla-foundation/common_voice_17_0
+model-index:
+- name: Whisper Large Basque
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Whisper Large Basque
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the mozilla-foundation/common_voice_17_0 eu dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 0.9278
+- eval_model_preparation_time: 0.0102
+- eval_wer: 44.2953
+- eval_runtime: 4165.1595
+- eval_samples_per_second: 3.272
+- eval_steps_per_second: 0.409
+- step: 0
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 4.375e-06
+- train_batch_size: 16
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- training_steps: 10000
+- mixed_precision_training: Native AMP
+### Framework versions
+- Transformers 4.46.0.dev0
+- Pytorch 2.4.1+cu121
+- Datasets 3.0.2.dev0
+- Tokenizers 0.20.0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "eval_loss": 0.9277587532997131,
+    "eval_model_preparation_time": 0.0102,
+    "eval_runtime": 4165.1595,
+    "eval_samples_per_second": 3.272,
+    "eval_steps_per_second": 0.409,
+    "eval_wer": 44.29532045879292
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "eval_loss": 0.9277587532997131,
+    "eval_model_preparation_time": 0.0102,
+    "eval_runtime": 4165.1595,
+    "eval_samples_per_second": 3.272,
+    "eval_steps_per_second": 0.409,
+    "eval_wer": 44.29532045879292
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,265 @@

+{
+  "alignment_heads": [
+    [
+      7,
+      0
+    ],
+    [
+      10,
+      17
+    ],
+    [
+      12,
+      18
+    ],
+    [
+      13,
+      12
+    ],
+    [
+      16,
+      1
+    ],
+    [
+      17,
+      14
+    ],
+    [
+      19,
+      11
+    ],
+    [
+      21,
+      4
+    ],
+    [
+      24,
+      1
+    ],
+    [
+      25,
+      6
+    ]
+  ],
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": [
+    [
+      1,
+      null
+    ],
+    [
+      2,
+      50360
+    ]
+  ],
+  "is_multilingual": true,
+  "lang_to_id": {
+    "<|af|>": 50327,
+    "<|am|>": 50334,
+    "<|ar|>": 50272,
+    "<|as|>": 50350,
+    "<|az|>": 50304,
+    "<|ba|>": 50355,
+    "<|be|>": 50330,
+    "<|bg|>": 50292,
+    "<|bn|>": 50302,
+    "<|bo|>": 50347,
+    "<|br|>": 50309,
+    "<|bs|>": 50315,
+    "<|ca|>": 50270,
+    "<|cs|>": 50283,
+    "<|cy|>": 50297,
+    "<|da|>": 50285,
+    "<|de|>": 50261,
+    "<|el|>": 50281,
+    "<|en|>": 50259,
+    "<|es|>": 50262,
+    "<|et|>": 50307,
+    "<|eu|>": 50310,
+    "<|fa|>": 50300,
+    "<|fi|>": 50277,
+    "<|fo|>": 50338,
+    "<|fr|>": 50265,
+    "<|gl|>": 50319,
+    "<|gu|>": 50333,
+    "<|haw|>": 50352,
+    "<|ha|>": 50354,
+    "<|he|>": 50279,
+    "<|hi|>": 50276,
+    "<|hr|>": 50291,
+    "<|ht|>": 50339,
+    "<|hu|>": 50286,
+    "<|hy|>": 50312,
+    "<|id|>": 50275,
+    "<|is|>": 50311,
+    "<|it|>": 50274,
+    "<|ja|>": 50266,
+    "<|jw|>": 50356,
+    "<|ka|>": 50329,
+    "<|kk|>": 50316,
+    "<|km|>": 50323,
+    "<|kn|>": 50306,
+    "<|ko|>": 50264,
+    "<|la|>": 50294,
+    "<|lb|>": 50345,
+    "<|ln|>": 50353,
+    "<|lo|>": 50336,
+    "<|lt|>": 50293,
+    "<|lv|>": 50301,
+    "<|mg|>": 50349,
+    "<|mi|>": 50295,
+    "<|mk|>": 50308,
+    "<|ml|>": 50296,
+    "<|mn|>": 50314,
+    "<|mr|>": 50320,
+    "<|ms|>": 50282,
+    "<|mt|>": 50343,
+    "<|my|>": 50346,
+    "<|ne|>": 50313,
+    "<|nl|>": 50271,
+    "<|nn|>": 50342,
+    "<|no|>": 50288,
+    "<|oc|>": 50328,
+    "<|pa|>": 50321,
+    "<|pl|>": 50269,
+    "<|ps|>": 50340,
+    "<|pt|>": 50267,
+    "<|ro|>": 50284,
+    "<|ru|>": 50263,
+    "<|sa|>": 50344,
+    "<|sd|>": 50332,
+    "<|si|>": 50322,
+    "<|sk|>": 50298,
+    "<|sl|>": 50305,
+    "<|sn|>": 50324,
+    "<|so|>": 50326,
+    "<|sq|>": 50317,
+    "<|sr|>": 50303,
+    "<|su|>": 50357,
+    "<|sv|>": 50273,
+    "<|sw|>": 50318,
+    "<|ta|>": 50287,
+    "<|te|>": 50299,
+    "<|tg|>": 50331,
+    "<|th|>": 50289,
+    "<|tk|>": 50341,
+    "<|tl|>": 50348,
+    "<|tr|>": 50268,
+    "<|tt|>": 50351,
+    "<|uk|>": 50280,
+    "<|ur|>": 50290,
+    "<|uz|>": 50337,
+    "<|vi|>": 50278,
+    "<|yi|>": 50335,
+    "<|yo|>": 50325,
+    "<|yue|>": 50358,
+    "<|zh|>": 50260
+  },
+  "max_initial_timestamp_index": 50,
+  "max_length": 448,
+  "no_timestamps_token_id": 50364,
+  "pad_token_id": 50257,
+  "prev_sot_token_id": 50362,
+  "return_timestamps": false,
+  "suppress_tokens": [
+    1,
+    2,
+    7,
+    8,
+    9,
+    10,
+    14,
+    25,
+    26,
+    27,
+    28,
+    29,
+    31,
+    58,
+    59,
+    60,
+    61,
+    62,
+    63,
+    90,
+    91,
+    92,
+    93,
+    359,
+    503,
+    522,
+    542,
+    873,
+    893,
+    902,
+    918,
+    922,
+    931,
+    1350,
+    1853,
+    1982,
+    2460,
+    2627,
+    3246,
+    3253,
+    3268,
+    3536,
+    3846,
+    3961,
+    4183,
+    4667,
+    6585,
+    6647,
+    7273,
+    9061,
+    9383,
+    10428,
+    10929,
+    11938,
+    12033,
+    12331,
+    12562,
+    13793,
+    14157,
+    14635,
+    15265,
+    15618,
+    16553,
+    16604,
+    18362,
+    18956,
+    20075,
+    21675,
+    22520,
+    26130,
+    26161,
+    26435,
+    28279,
+    29464,
+    31650,
+    32302,
+    32470,
+    36865,
+    42863,
+    47425,
+    49870,
+    50254,
+    50258,
+    50359,
+    50360,
+    50361,
+    50362,
+    50363
+  ],
+  "task_to_id": {
+    "transcribe": 50360,
+    "translate": 50359
+  },
+  "transformers_version": "4.46.0.dev0"
+}

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3df395a63ce8603b7cf792ab314a2f482a6c90cc42e7802fabed7dd8cb1b078d
 size 4993448880

 version https://git-lfs.github.com/spec/v1
+oid sha256:08e0005225b3dbaf55dd13ac62926cc7e02c1025d66fa375e6fb305ff79cd4f9
 size 4993448880

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:457499abac91f8ed2c91ba131d18af738ae39226827d0396a01f4b1f86a8db58
 size 1180663192

 version https://git-lfs.github.com/spec/v1
+oid sha256:630ca774672856d2e0e39a702e590f635a1cfc5726a64b6578ab46dd367369a9
 size 1180663192

run.sh CHANGED Viewed

@@ -31,7 +31,6 @@ WANDB_PROJECT=whisper-medium-eu \
 	--gradient_checkpointing \
 	--fp16 \
 	--overwrite_output_dir \
-	--do_train \
 	--do_eval \
 	--predict_with_generate \
 	--do_normalize_eval \

 	--gradient_checkpointing \
 	--fp16 \
 	--overwrite_output_dir \
 	--do_eval \
 	--predict_with_generate \
 	--do_normalize_eval \

runs/Oct07_10-30-48_tknika/events.out.tfevents.1728297734.tknika.20799.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f38f4db3f506f80567533906f5fc02740168f2b1e4dd86fd95027d67e4023c3c
+size 360

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:757451636d3aa41f66b5568ae6294cf0ed27d536ee8afce9c0186f687073cc5a
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b703135451bdb3fdf1b0263595ef460845b9b248a97e458a32874babc3e4138
 size 5368

wandb/debug-internal.log CHANGED Viewed

@@ -1,10 +1,10 @@
-{"time":"2024-10-05T14:14:14.99736495Z","level":"INFO","msg":"using version","core version":"0.18.3"}
-{"time":"2024-10-05T14:14:14.99738358Z","level":"INFO","msg":"created symlink","path":"/home/tknika/whisper-large-eu/wandb/run-20241005_141414-821qpm7o/logs/debug-core.log"}
-{"time":"2024-10-05T14:14:14.999080266Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
-{"time":"2024-10-05T14:14:15.006876033Z","level":"INFO","msg":"created new stream","id":"821qpm7o"}
-{"time":"2024-10-05T14:14:15.006930263Z","level":"INFO","msg":"stream: started","id":"821qpm7o"}
-{"time":"2024-10-05T14:14:15.006981772Z","level":"INFO","msg":"sender: started","stream_id":{"value":"821qpm7o"}}
-{"time":"2024-10-05T14:14:15.006988882Z","level":"INFO","msg":"handler: started","stream_id":{"value":"821qpm7o"}}
-{"time":"2024-10-05T14:14:15.006956622Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"821qpm7o"}}
-{"time":"2024-10-05T14:14:15.412186114Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
-{"time":"2024-10-05T14:14:15.414550494Z","level":"INFO","msg":"Starting system monitor"}

+{"time":"2024-10-07T12:56:15.257353437Z","level":"INFO","msg":"using version","core version":"0.18.3"}
+{"time":"2024-10-07T12:56:15.257380326Z","level":"INFO","msg":"created symlink","path":"/home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug-core.log"}
+{"time":"2024-10-07T12:56:15.259721418Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
+{"time":"2024-10-07T12:56:15.26442537Z","level":"INFO","msg":"created new stream","id":"a3z1jk8c"}
+{"time":"2024-10-07T12:56:15.264442509Z","level":"INFO","msg":"stream: started","id":"a3z1jk8c"}
+{"time":"2024-10-07T12:56:15.264458959Z","level":"INFO","msg":"handler: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.264475109Z","level":"INFO","msg":"sender: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.264497739Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.681557119Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
+{"time":"2024-10-07T12:56:15.68260129Z","level":"INFO","msg":"Starting system monitor"}

wandb/debug.log CHANGED Viewed

@@ -1,28 +1,28 @@
-2024-10-05 14:14:14,992 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Configure stats pid to 13682
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/.config/wandb/settings
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/whisper-large-eu/wandb/settings
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'project': 'whisper-medium-eu'}
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py', 'program': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py'}
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_setup.py:_flush():79] Applying login settings: {}
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:_log_setup():532] Logging user logs to /home/tknika/whisper-large-eu/wandb/run-20241005_141414-821qpm7o/logs/debug.log
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:_log_setup():533] Logging internal logs to /home/tknika/whisper-large-eu/wandb/run-20241005_141414-821qpm7o/logs/debug-internal.log
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:init():617] calling init triggers
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
 config: {}
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:init():667] starting backend
-2024-10-05 14:14:14,993 INFO    MainThread:13682 [wandb_init.py:init():671] sending inform_init request
-2024-10-05 14:14:14,995 INFO    MainThread:13682 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
-2024-10-05 14:14:14,995 INFO    MainThread:13682 [wandb_init.py:init():684] backend started and connected
-2024-10-05 14:14:14,999 INFO    MainThread:13682 [wandb_init.py:init():779] updated telemetry
-2024-10-05 14:14:15,005 INFO    MainThread:13682 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
-2024-10-05 14:14:15,407 INFO    MainThread:13682 [wandb_init.py:init():863] starting run threads in backend
-2024-10-05 14:14:15,503 INFO    MainThread:13682 [wandb_run.py:_console_start():2465] atexit reg
-2024-10-05 14:14:15,503 INFO    MainThread:13682 [wandb_run.py:_redirect():2313] redirect: wrap_raw
-2024-10-05 14:14:15,503 INFO    MainThread:13682 [wandb_run.py:_redirect():2378] Wrapping output streams.
-2024-10-05 14:14:15,503 INFO    MainThread:13682 [wandb_run.py:_redirect():2403] Redirects installed.
-2024-10-05 14:14:15,504 INFO    MainThread:13682 [wandb_init.py:init():907] run started, returning control to user process
-2024-10-05 14:14:15,506 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct05_14-14-00_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
-2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x747f37484590>>
-2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None

+2024-10-07 12:56:15,251 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Configure stats pid to 20958
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/.config/wandb/settings
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/whisper-large-eu/wandb/settings
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'project': 'whisper-medium-eu'}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py', 'program': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py'}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:_log_setup():532] Logging user logs to /home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug.log
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:_log_setup():533] Logging internal logs to /home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug-internal.log
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():617] calling init triggers
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
 config: {}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():667] starting backend
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():671] sending inform_init request
+2024-10-07 12:56:15,254 INFO    MainThread:20958 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-07 12:56:15,254 INFO    MainThread:20958 [wandb_init.py:init():684] backend started and connected
+2024-10-07 12:56:15,258 INFO    MainThread:20958 [wandb_init.py:init():779] updated telemetry
+2024-10-07 12:56:15,265 INFO    MainThread:20958 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
+2024-10-07 12:56:15,676 INFO    MainThread:20958 [wandb_init.py:init():863] starting run threads in backend
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_console_start():2465] atexit reg
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2313] redirect: wrap_raw
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2378] Wrapping output streams.
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2403] Redirects installed.
+2024-10-07 12:56:15,775 INFO    MainThread:20958 [wandb_init.py:init():907] run started, returning control to user process
+2024-10-07 12:56:15,777 INFO    MainThread:20958 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct07_11-46-39_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
+2024-10-07 12:56:15,780 INFO    MainThread:20958 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x748ced2ceae0>>
+2024-10-07 12:56:15,780 INFO    MainThread:20958 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None

wandb/run-20241005_141414-821qpm7o/files/config.yaml ADDED Viewed

	@@ -0,0 +1,563 @@

+_name_or_path:
+    value: openai/whisper-large-v3
+_wandb:
+    value:
+        cli_version: 0.18.3
+        m:
+            - "1": train/grad_norm
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": eval/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/samples_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/loss
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/runtime
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/steps_per_second
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": eval/wer
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+            - "1": train/learning_rate
+              "5": 2
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.12.3
+            "5": 0.18.3
+            "6": 4.46.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.3
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 1280
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 20
+decoder_ffn_dim:
+    value: 5120
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 32
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 20
+encoder_ffn_dim:
+    value: 5120
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 32
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 500
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 228
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 4.375e-06
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Oct05_14-14-00_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 10000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 1543490560
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 32
+num_mel_bins:
+    value: 128
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50256
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 16
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: null
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-large-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float16
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.46.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51866
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0

wandb/run-20241005_141414-821qpm7o/files/output.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20241005_141414-821qpm7o/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eval/runtime":4130.3962,"train_samples_per_second":1.426,"total_flos":5.435317790834688e+20,"_step":420,"eval/steps_per_second":0.413,"train/epoch":5.148,"train_runtime":112211.4993,"train_loss":0.08380846776664257,"eval/wer":7.07244677342519,"_wandb":{"runtime":112523},"_runtime":112210.651756658,"train/grad_norm":1.3872036933898926,"eval/samples_per_second":3.3,"eval/loss":0.12359699606895447,"train/learning_rate":1.381578947368421e-09,"_timestamp":1.728249865647773e+09,"train_steps_per_second":0.089,"train/loss":0.0504,"train/global_step":10000}

wandb/run-20241005_141414-821qpm7o/logs/debug-core.log CHANGED Viewed

@@ -5,3 +5,10 @@
 {"time":"2024-10-05T14:14:14.397628756Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:50392"}
 {"time":"2024-10-05T14:14:14.997161032Z","level":"INFO","msg":"handleInformInit: received","streamId":"821qpm7o","id":"127.0.0.1:50392"}
 {"time":"2024-10-05T14:14:15.006939443Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"821qpm7o","id":"127.0.0.1:50392"}

 {"time":"2024-10-05T14:14:14.397628756Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:50392"}
 {"time":"2024-10-05T14:14:14.997161032Z","level":"INFO","msg":"handleInformInit: received","streamId":"821qpm7o","id":"127.0.0.1:50392"}
 {"time":"2024-10-05T14:14:15.006939443Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"821qpm7o","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:38.663713629Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:38.663840158Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:38.663872928Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-10-06T21:29:38.663969078Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:42.340150906Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:42.340203005Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:50392"}
+{"time":"2024-10-06T21:29:42.340238265Z","level":"INFO","msg":"server is closed"}

wandb/run-20241005_141414-821qpm7o/logs/debug-internal.log CHANGED Viewed

@@ -8,3 +8,12 @@
 {"time":"2024-10-05T14:14:15.006956622Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"821qpm7o"}}
 {"time":"2024-10-05T14:14:15.412186114Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
 {"time":"2024-10-05T14:14:15.414550494Z","level":"INFO","msg":"Starting system monitor"}

 {"time":"2024-10-05T14:14:15.006956622Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"821qpm7o"}}
 {"time":"2024-10-05T14:14:15.412186114Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
 {"time":"2024-10-05T14:14:15.414550494Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-06T08:59:01.966379884Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/itzune/whisper-medium-eu/821qpm7o/file_stream"}
+{"time":"2024-10-06T21:29:38.663823648Z","level":"INFO","msg":"stream: closing","id":"821qpm7o"}
+{"time":"2024-10-06T21:29:38.663868898Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-10-06T21:29:38.671834293Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-10-06T21:29:41.998044798Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-10-06T21:29:42.339781659Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"821qpm7o"}}
+{"time":"2024-10-06T21:29:42.339864838Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"821qpm7o"}}
+{"time":"2024-10-06T21:29:42.339872028Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"821qpm7o"}}
+{"time":"2024-10-06T21:29:42.339997497Z","level":"INFO","msg":"stream: closed","id":"821qpm7o"}

wandb/run-20241005_141414-821qpm7o/logs/debug.log CHANGED Viewed

@@ -26,3 +26,4 @@ config: {}
 2024-10-05 14:14:15,506 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct05_14-14-00_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
 2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x747f37484590>>
 2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None

 2024-10-05 14:14:15,506 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct05_14-14-00_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
 2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x747f37484590>>
 2024-10-05 14:14:15,510 INFO    MainThread:13682 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None
+2024-10-06 21:29:38,664 WARNING MsgRouterThr:13682 [router.py:message_loop():77] message_loop has been closed

wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57538d6fe4cf3de3c7e22f43b680f5f476627d92828d497ca6d2355ed94147ad
-size 20316160

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd99a95650ad5aa391338434550f1b7dad9b0cd6e88bc592ac045c56f68b301b
+size 51687272

wandb/run-20241007_102112-r5qja96d/files/config.yaml ADDED Viewed

	@@ -0,0 +1,508 @@

+_name_or_path:
+    value: openai/whisper-large-v3
+_wandb:
+    value:
+        cli_version: 0.18.3
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 66
+            "4": 3.12.3
+            "5": 0.18.3
+            "6": 4.46.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.3
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 1280
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 20
+decoder_ffn_dim:
+    value: 5120
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 32
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 20
+encoder_ffn_dim:
+    value: 5120
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 32
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 500
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 228
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 4.375e-06
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Oct07_10-20-37_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 10000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 1543490560
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 32
+num_mel_bins:
+    value: 128
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50256
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 16
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: ./checkpoint-9000/
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-large-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float16
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.46.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51866
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0

wandb/run-20241007_102112-r5qja96d/files/output.log ADDED Viewed

	@@ -0,0 +1,66 @@

+Reading metadata...: 75336it [00:04, 15522.70it/s]                                                                                                 | 0/10000 [00:00<?, ?it/s]
+Reading metadata...: 13630it [00:00, 20518.62it/s]
+[INFO|trainer_utils.py:830] 2024-10-07 10:21:26,106 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+Traceback (most recent call last):
+  File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2070, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2372, in _inner_training_loop
+    for step, inputs in enumerate(epoch_iterator):
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 831, in __iter__
+    next_batch, next_batch_info = self._fetch_batches(main_iterator)
+                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/accelerate/data_loader.py", line 752, in _fetch_batches
+    batches.append(next(iterator))
+                   ^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 630, in __next__
+    data = self._next_data()
+           ^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/dataloader.py", line 673, in _next_data
+    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
+    data.append(next(self.dataset_iter))
+                ^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 2012, in __iter__
+    for key, example in ex_iterable:
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1203, in __iter__
+    yield from self._iter()
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1259, in _iter
+    for key, example in iterator:
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1393, in __iter__
+    for x in self.ex_iterable:
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 947, in __iter__
+    yield from self._iter()
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1027, in _iter
+    for key, example in iterator:
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1613, in __iter__
+    _apply_feature_types_on_example(example, self.features, token_per_repo_id=self.token_per_repo_id),
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py", line 1566, in _apply_feature_types_on_example
+    decoded_example = features.decode_example(encoded_example, token_per_repo_id=token_per_repo_id)
+                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/features.py", line 2042, in decode_example
+    column_name: decode_nested_example(feature, value, token_per_repo_id=token_per_repo_id)
+                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/features.py", line 1403, in decode_nested_example
+    return schema.decode_example(obj, token_per_repo_id=token_per_repo_id)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/datasets/features/audio.py", line 193, in decode_example
+    array = librosa.resample(array, orig_sr=sampling_rate, target_sr=self.sampling_rate)
+            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/librosa/core/audio.py", line 669, in resample
+    y_hat = np.apply_along_axis(
+            ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/numpy/lib/_shape_base_impl.py", line 384, in apply_along_axis
+    res = asanyarray(func1d(inarr_view[ind0], *args, **kwargs))
+                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/soxr/__init__.py", line 206, in resample
+    y = divide_proc(in_rate, out_rate, x[:, np.newaxis], q)
+        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+KeyboardInterrupt

wandb/run-20241007_102112-r5qja96d/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "os":  "Linux-6.8.0-45-generic-x86_64-with-glibc2.39",
+  "python":  "3.12.3",
+  "startedAt":  "2024-10-07T10:21:12.262430Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-large-v3",
+    "--dataset_name=mozilla-foundation/common_voice_17_0",
+    "--dataset_config_name=eu",
+    "--language=basque",
+    "--train_split_name=train+validation",
+    "--eval_split_name=test",
+    "--model_index_name=Whisper Large Basque",
+    "--max_steps=10000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=16",
+    "--per_device_eval_batch_size=8",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=4.375e-6",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=500",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=228",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--push_to_hub",
+    "--resume_from_checkpoint=./checkpoint-9000/",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-large-eu"
+  ],
+  "program":  "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-large-eu",
+    "commit":  "45227421df6af8836af459c374361e7303a68aea"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/whisper-large-eu",
+  "host":  "tknika",
+  "username":  "tknika",
+  "executable":  "/home/tknika/whisper-large-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "[NVIDIA L40-48Q]",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "314615791616",
+      "used":  "265683288064"
+    }
+  },
+  "memory":  {
+    "total":  "33654026240"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}

wandb/run-20241007_102112-r5qja96d/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":25}}

wandb/run-20241007_102112-r5qja96d/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-10-07T10:21:11.14807853Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp7x1p2y77/port-20491.txt","pid":20491,"debug":false,"disable-analytics":false}
+{"time":"2024-10-07T10:21:11.148124439Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-10-07T10:21:11.347070999Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":20491}
+{"time":"2024-10-07T10:21:11.347039129Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":33051,"Zone":""}}
+{"time":"2024-10-07T10:21:11.538033075Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:12.262950492Z","level":"INFO","msg":"handleInformInit: received","streamId":"r5qja96d","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:12.310433881Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"r5qja96d","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:37.757844263Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:37.757906193Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:37.757940542Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-10-07T10:21:37.75825213Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:41.160473282Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:41.160514042Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:44772"}
+{"time":"2024-10-07T10:21:41.160573842Z","level":"INFO","msg":"server is closed"}

wandb/run-20241007_102112-r5qja96d/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,18 @@

+{"time":"2024-10-07T10:21:12.26314249Z","level":"INFO","msg":"using version","core version":"0.18.3"}
+{"time":"2024-10-07T10:21:12.26315631Z","level":"INFO","msg":"created symlink","path":"/home/tknika/whisper-large-eu/wandb/run-20241007_102112-r5qja96d/logs/debug-core.log"}
+{"time":"2024-10-07T10:21:12.304369743Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
+{"time":"2024-10-07T10:21:12.310379031Z","level":"INFO","msg":"created new stream","id":"r5qja96d"}
+{"time":"2024-10-07T10:21:12.310426121Z","level":"INFO","msg":"stream: started","id":"r5qja96d"}
+{"time":"2024-10-07T10:21:12.31046721Z","level":"INFO","msg":"sender: started","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:12.31047811Z","level":"INFO","msg":"handler: started","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:12.310456601Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:12.736877759Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
+{"time":"2024-10-07T10:21:12.73793955Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-07T10:21:37.757898803Z","level":"INFO","msg":"stream: closing","id":"r5qja96d"}
+{"time":"2024-10-07T10:21:37.757922673Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-10-07T10:21:37.761948169Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-10-07T10:21:40.805260539Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-10-07T10:21:41.160038266Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:41.160110426Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:41.160107876Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"r5qja96d"}}
+{"time":"2024-10-07T10:21:41.160340304Z","level":"INFO","msg":"stream: closed","id":"r5qja96d"}

wandb/run-20241007_102112-r5qja96d/logs/debug.log ADDED Viewed

	@@ -0,0 +1,29 @@

+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Configure stats pid to 20491
+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/.config/wandb/settings
+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/whisper-large-eu/wandb/settings
+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'project': 'whisper-medium-eu'}
+2024-10-07 10:21:12,259 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py', 'program': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py'}
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:_log_setup():532] Logging user logs to /home/tknika/whisper-large-eu/wandb/run-20241007_102112-r5qja96d/logs/debug.log
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:_log_setup():533] Logging internal logs to /home/tknika/whisper-large-eu/wandb/run-20241007_102112-r5qja96d/logs/debug-internal.log
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:init():617] calling init triggers
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
+config: {}
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:init():667] starting backend
+2024-10-07 10:21:12,260 INFO    MainThread:20491 [wandb_init.py:init():671] sending inform_init request
+2024-10-07 10:21:12,261 INFO    MainThread:20491 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-07 10:21:12,262 INFO    MainThread:20491 [wandb_init.py:init():684] backend started and connected
+2024-10-07 10:21:12,265 INFO    MainThread:20491 [wandb_init.py:init():779] updated telemetry
+2024-10-07 10:21:12,271 INFO    MainThread:20491 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
+2024-10-07 10:21:12,731 INFO    MainThread:20491 [wandb_init.py:init():863] starting run threads in backend
+2024-10-07 10:21:12,833 INFO    MainThread:20491 [wandb_run.py:_console_start():2465] atexit reg
+2024-10-07 10:21:12,833 INFO    MainThread:20491 [wandb_run.py:_redirect():2313] redirect: wrap_raw
+2024-10-07 10:21:12,833 INFO    MainThread:20491 [wandb_run.py:_redirect():2378] Wrapping output streams.
+2024-10-07 10:21:12,833 INFO    MainThread:20491 [wandb_run.py:_redirect():2403] Redirects installed.
+2024-10-07 10:21:12,837 INFO    MainThread:20491 [wandb_init.py:init():907] run started, returning control to user process
+2024-10-07 10:21:12,838 INFO    MainThread:20491 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct07_10-20-37_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-9000/', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
+2024-10-07 10:21:12,842 INFO    MainThread:20491 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x78f7bc2a1760>>
+2024-10-07 10:21:12,842 INFO    MainThread:20491 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None
+2024-10-07 10:21:37,758 WARNING MsgRouterThr:20491 [router.py:message_loop():77] message_loop has been closed

wandb/run-20241007_102112-r5qja96d/run-r5qja96d.wandb ADDED Viewed

Binary file (22.3 kB). View file

wandb/run-20241007_102233-fvsz65yu/files/config.yaml ADDED Viewed

	@@ -0,0 +1,515 @@

+_name_or_path:
+    value: openai/whisper-large-v3
+_wandb:
+    value:
+        cli_version: 0.18.3
+        m:
+            - "1": train/global_step
+              "6":
+                - 3
+              "7": []
+            - "1": train/epoch
+              "5": 1
+              "6":
+                - 1
+                - 3
+              "7": []
+        python_version: 3.12.3
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "2":
+                - 1
+                - 5
+                - 11
+                - 49
+                - 51
+                - 53
+                - 55
+                - 71
+                - 100
+            "3":
+                - 7
+                - 13
+                - 19
+                - 23
+                - 55
+                - 62
+                - 66
+            "4": 3.12.3
+            "5": 0.18.3
+            "6": 4.46.0.dev0
+            "8":
+                - 5
+            "9":
+                "1": transformers_trainer
+            "12": 0.18.3
+            "13": linux-x86_64
+accelerator_config:
+    value:
+        dispatch_batches: null
+        even_batches: true
+        gradient_accumulation_kwargs: null
+        non_blocking: false
+        split_batches: false
+        use_seedable_sampler: true
+activation_dropout:
+    value: 0
+activation_function:
+    value: gelu
+adafactor:
+    value: false
+adam_beta1:
+    value: 0.9
+adam_beta2:
+    value: 0.999
+adam_epsilon:
+    value: 1e-08
+add_cross_attention:
+    value: false
+apply_spec_augment:
+    value: false
+architectures:
+    value:
+        - WhisperForConditionalGeneration
+attention_dropout:
+    value: 0
+auto_find_batch_size:
+    value: false
+bad_words_ids:
+    value: null
+batch_eval_metrics:
+    value: false
+begin_suppress_tokens:
+    value:
+        - 220
+        - 50257
+bf16:
+    value: false
+bf16_full_eval:
+    value: false
+bos_token_id:
+    value: 50257
+chunk_size_feed_forward:
+    value: 0
+classifier_proj_size:
+    value: 256
+cross_attention_hidden_size:
+    value: null
+d_model:
+    value: 1280
+data_seed:
+    value: null
+dataloader_drop_last:
+    value: false
+dataloader_num_workers:
+    value: 0
+dataloader_persistent_workers:
+    value: false
+dataloader_pin_memory:
+    value: true
+dataloader_prefetch_factor:
+    value: null
+ddp_backend:
+    value: null
+ddp_broadcast_buffers:
+    value: null
+ddp_bucket_cap_mb:
+    value: null
+ddp_find_unused_parameters:
+    value: null
+ddp_timeout:
+    value: 1800
+debug:
+    value: []
+decoder_attention_heads:
+    value: 20
+decoder_ffn_dim:
+    value: 5120
+decoder_layerdrop:
+    value: 0
+decoder_layers:
+    value: 32
+decoder_start_token_id:
+    value: 50258
+deepspeed:
+    value: null
+disable_tqdm:
+    value: false
+dispatch_batches:
+    value: null
+diversity_penalty:
+    value: 0
+do_eval:
+    value: true
+do_predict:
+    value: false
+do_sample:
+    value: false
+do_train:
+    value: true
+dropout:
+    value: 0
+early_stopping:
+    value: false
+encoder_attention_heads:
+    value: 20
+encoder_ffn_dim:
+    value: 5120
+encoder_layerdrop:
+    value: 0
+encoder_layers:
+    value: 32
+encoder_no_repeat_ngram_size:
+    value: 0
+eos_token_id:
+    value: 50257
+eval_accumulation_steps:
+    value: null
+eval_delay:
+    value: 0
+eval_do_concat_batches:
+    value: true
+eval_on_start:
+    value: false
+eval_steps:
+    value: 500
+eval_strategy:
+    value: steps
+eval_use_gather_object:
+    value: false
+evaluation_strategy:
+    value: steps
+exponential_decay_length_penalty:
+    value: null
+finetuning_task:
+    value: null
+forced_bos_token_id:
+    value: null
+forced_decoder_ids:
+    value: null
+forced_eos_token_id:
+    value: null
+fp16:
+    value: true
+fp16_backend:
+    value: auto
+fp16_full_eval:
+    value: false
+fp16_opt_level:
+    value: O1
+fsdp:
+    value: []
+fsdp_config:
+    value:
+        min_num_params: 0
+        xla: false
+        xla_fsdp_grad_ckpt: false
+        xla_fsdp_v2: false
+fsdp_min_num_params:
+    value: 0
+fsdp_transformer_layer_cls_to_wrap:
+    value: null
+full_determinism:
+    value: false
+generation_config:
+    value: null
+generation_max_length:
+    value: 228
+generation_num_beams:
+    value: null
+gradient_accumulation_steps:
+    value: 1
+gradient_checkpointing:
+    value: true
+gradient_checkpointing_kwargs:
+    value: null
+greater_is_better:
+    value: false
+group_by_length:
+    value: false
+half_precision_backend:
+    value: auto
+hub_always_push:
+    value: false
+hub_model_id:
+    value: null
+hub_private_repo:
+    value: false
+hub_strategy:
+    value: every_save
+hub_token:
+    value: <HUB_TOKEN>
+id2label:
+    value:
+        "0": LABEL_0
+        "1": LABEL_1
+ignore_data_skip:
+    value: false
+include_for_metrics:
+    value: []
+include_inputs_for_metrics:
+    value: false
+include_num_input_tokens_seen:
+    value: false
+include_tokens_per_second:
+    value: false
+init_std:
+    value: 0.02
+is_decoder:
+    value: false
+is_encoder_decoder:
+    value: true
+jit_mode_eval:
+    value: false
+label_names:
+    value: null
+label_smoothing_factor:
+    value: 0
+label2id:
+    value:
+        LABEL_0: 0
+        LABEL_1: 1
+learning_rate:
+    value: 4.375e-06
+length_column_name:
+    value: input_length
+length_penalty:
+    value: 1
+load_best_model_at_end:
+    value: true
+local_rank:
+    value: 0
+log_level:
+    value: passive
+log_level_replica:
+    value: warning
+log_on_each_node:
+    value: true
+logging_dir:
+    value: ./runs/Oct07_10-22-04_tknika
+logging_first_step:
+    value: false
+logging_nan_inf_filter:
+    value: true
+logging_steps:
+    value: 25
+logging_strategy:
+    value: steps
+lr_scheduler_type:
+    value: linear
+mask_feature_length:
+    value: 10
+mask_feature_min_masks:
+    value: 0
+mask_feature_prob:
+    value: 0
+mask_time_length:
+    value: 10
+mask_time_min_masks:
+    value: 2
+mask_time_prob:
+    value: 0.05
+max_grad_norm:
+    value: 1
+max_length:
+    value: 448
+max_source_positions:
+    value: 1500
+max_steps:
+    value: 1000
+max_target_positions:
+    value: 448
+median_filter_width:
+    value: 7
+metric_for_best_model:
+    value: wer
+min_length:
+    value: 0
+model/num_parameters:
+    value: 1543490560
+model_type:
+    value: whisper
+mp_parameters:
+    value: ""
+neftune_noise_alpha:
+    value: null
+no_cuda:
+    value: false
+no_repeat_ngram_size:
+    value: 0
+num_beam_groups:
+    value: 1
+num_beams:
+    value: 1
+num_hidden_layers:
+    value: 32
+num_mel_bins:
+    value: 128
+num_return_sequences:
+    value: 1
+num_train_epochs:
+    value: 3
+optim:
+    value: adamw_torch
+optim_args:
+    value: null
+optim_target_modules:
+    value: null
+output_attentions:
+    value: false
+output_dir:
+    value: ./
+output_hidden_states:
+    value: false
+output_scores:
+    value: false
+overwrite_output_dir:
+    value: true
+pad_token_id:
+    value: 50256
+past_index:
+    value: -1
+per_device_eval_batch_size:
+    value: 8
+per_device_train_batch_size:
+    value: 16
+per_gpu_eval_batch_size:
+    value: null
+per_gpu_train_batch_size:
+    value: null
+predict_with_generate:
+    value: true
+prediction_loss_only:
+    value: false
+prefix:
+    value: null
+problem_type:
+    value: null
+push_to_hub:
+    value: true
+push_to_hub_model_id:
+    value: null
+push_to_hub_organization:
+    value: null
+push_to_hub_token:
+    value: <PUSH_TO_HUB_TOKEN>
+ray_scope:
+    value: last
+remove_invalid_values:
+    value: false
+remove_unused_columns:
+    value: true
+repetition_penalty:
+    value: 1
+report_to:
+    value:
+        - wandb
+restore_callback_states_from_checkpoint:
+    value: false
+resume_from_checkpoint:
+    value: ./checkpoint-9000/
+return_dict:
+    value: true
+return_dict_in_generate:
+    value: false
+run_name:
+    value: whisper-large-eu
+save_on_each_node:
+    value: false
+save_only_model:
+    value: false
+save_safetensors:
+    value: true
+save_steps:
+    value: 1000
+save_strategy:
+    value: steps
+save_total_limit:
+    value: null
+scale_embedding:
+    value: false
+seed:
+    value: 42
+sep_token_id:
+    value: null
+skip_memory_metrics:
+    value: true
+sortish_sampler:
+    value: false
+split_batches:
+    value: null
+suppress_tokens:
+    value: null
+task_specific_params:
+    value: null
+temperature:
+    value: 1
+tf_legacy_loss:
+    value: false
+tf32:
+    value: null
+tie_encoder_decoder:
+    value: false
+tie_word_embeddings:
+    value: true
+tokenizer_class:
+    value: null
+top_k:
+    value: 50
+top_p:
+    value: 1
+torch_compile:
+    value: false
+torch_compile_backend:
+    value: null
+torch_compile_mode:
+    value: null
+torch_dtype:
+    value: float16
+torch_empty_cache_steps:
+    value: null
+torchdynamo:
+    value: null
+torchscript:
+    value: false
+tpu_metrics_debug:
+    value: false
+tpu_num_cores:
+    value: null
+transformers_version:
+    value: 4.46.0.dev0
+typical_p:
+    value: 1
+use_bfloat16:
+    value: false
+use_cache:
+    value: false
+use_cpu:
+    value: false
+use_ipex:
+    value: false
+use_legacy_prediction_loop:
+    value: false
+use_liger_kernel:
+    value: false
+use_mps_device:
+    value: false
+use_weighted_layer_sum:
+    value: false
+vocab_size:
+    value: 51866
+warmup_ratio:
+    value: 0
+warmup_steps:
+    value: 500
+weight_decay:
+    value: 0

wandb/run-20241007_102233-fvsz65yu/files/output.log ADDED Viewed

	@@ -0,0 +1,68 @@

+  0%|                                                                                                                                               | 0/1000 [00:00<?, ?it/s]/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py:2974: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
+  checkpoint_rng_state = torch.load(rng_file)
+Reading metadata...: 75336it [00:02, 29074.31it/s]
+Reading metadata...: 13630it [00:00, 14208.60it/s]
+[INFO|trainer_utils.py:830] 2024-10-07 10:22:46,488 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
+  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
+9001it [00:15, 576.66it/s]                                                                                                                                                   [INFO|trainer.py:3738] 2024-10-07 10:22:49,598 >> Saving model checkpoint to ./checkpoint-9001
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2774: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.
+  warnings.warn(
+[INFO|configuration_utils.py:410] 2024-10-07 10:22:49,601 >> Configuration saved in ./checkpoint-9001/config.json
+[INFO|configuration_utils.py:868] 2024-10-07 10:22:49,602 >> Configuration saved in ./checkpoint-9001/generation_config.json
+[INFO|modeling_utils.py:3000] 2024-10-07 10:22:55,796 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at ./checkpoint-9001/model.safetensors.index.json.
+[INFO|feature_extraction_utils.py:435] 2024-10-07 10:22:55,797 >> Feature extractor saved in ./checkpoint-9001/preprocessor_config.json
+9001it [00:25, 576.66it/s][INFO|feature_extraction_utils.py:435] 2024-10-07 10:23:19,205 >> Feature extractor saved in ./preprocessor_config.json
+[INFO|trainer.py:2532] 2024-10-07 10:23:19,229 >>
+Training completed. Do not forget to share your model on huggingface.co/models =)
+[INFO|trainer.py:2770] 2024-10-07 10:23:19,230 >> Loading best model from ./checkpoint-9000 (score: 7.215361500971087).
+[WARNING|trainer.py:2892] 2024-10-07 10:23:25,170 >> There were missing keys in the checkpoint model loaded: ['proj_out.weight'].
+9001it [00:51, 175.86it/s]
+{'train_runtime': 52.4848, 'train_samples_per_second': 304.85, 'train_steps_per_second': 19.053, 'train_loss': 7.723795109133682e-07, 'epoch': 9.0}
+[INFO|trainer.py:4519] 2024-10-07 10:23:25,172 >> Waiting for the current checkpoint push to be finished, this might take a couple of minutes.
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+Traceback (most recent call last):
+  File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 630, in <module>
+    main()
+  File "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py", line 579, in main
+    train_result = trainer.train(resume_from_checkpoint=checkpoint)
+                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2070, in train
+    return inner_training_loop(
+           ^^^^^^^^^^^^^^^^^^^^
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 2579, in _inner_training_loop
+    self._finish_current_push()
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/trainer.py", line 4520, in _finish_current_push
+    self.push_in_progress.wait_until_done()
+  File "/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/utils/hub.py", line 1305, in wait_until_done
+    futures.wait(self.jobs)
+  File "/usr/lib/python3.12/concurrent/futures/_base.py", line 305, in wait
+    waiter.event.wait(timeout)
+  File "/usr/lib/python3.12/threading.py", line 655, in wait
+    signaled = self._cond.wait(timeout)
+               ^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/lib/python3.12/threading.py", line 355, in wait
+    waiter.acquire()
+KeyboardInterrupt
+Exception ignored in: <module 'threading' from '/usr/lib/python3.12/threading.py'>
+Traceback (most recent call last):
+  File "/usr/lib/python3.12/threading.py", line 1592, in _shutdown
+    atexit_call()
+  File "/usr/lib/python3.12/concurrent/futures/thread.py", line 31, in _python_exit
+    t.join()
+  File "/usr/lib/python3.12/threading.py", line 1147, in join
+    self._wait_for_tstate_lock()
+  File "/usr/lib/python3.12/threading.py", line 1167, in _wait_for_tstate_lock
+    if lock.acquire(block, timeout):
+       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+KeyboardInterrupt:

wandb/run-20241007_102233-fvsz65yu/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "os":  "Linux-6.8.0-45-generic-x86_64-with-glibc2.39",
+  "python":  "3.12.3",
+  "startedAt":  "2024-10-07T10:22:33.418638Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-large-v3",
+    "--dataset_name=mozilla-foundation/common_voice_17_0",
+    "--dataset_config_name=eu",
+    "--language=basque",
+    "--train_split_name=train+validation",
+    "--eval_split_name=test",
+    "--model_index_name=Whisper Large Basque",
+    "--max_steps=1000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=16",
+    "--per_device_eval_batch_size=8",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=4.375e-6",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=500",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=228",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_train",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--push_to_hub",
+    "--resume_from_checkpoint=./checkpoint-9000/",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-large-eu"
+  ],
+  "program":  "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-large-eu",
+    "commit":  "45227421df6af8836af459c374361e7303a68aea"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/whisper-large-eu",
+  "host":  "tknika",
+  "username":  "tknika",
+  "executable":  "/home/tknika/whisper-large-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "[NVIDIA L40-48Q]",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "314615791616",
+      "used":  "265683410944"
+    }
+  },
+  "memory":  {
+    "total":  "33654026240"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}

wandb/run-20241007_102233-fvsz65yu/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_step":0,"train_runtime":52.4848,"_timestamp":1.7282966051712844e+09,"total_flos":4.8922616615141376e+20,"_runtime":51.793285945,"train/epoch":9.001,"_wandb":{"runtime":276},"train/global_step":9001,"train_steps_per_second":19.053,"train_loss":7.723795109133682e-07,"train_samples_per_second":304.85}

wandb/run-20241007_102233-fvsz65yu/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2024-10-07T10:22:32.752507789Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmpvaw3g7ob/port-20571.txt","pid":20571,"debug":false,"disable-analytics":false}
+{"time":"2024-10-07T10:22:32.752679238Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-10-07T10:22:32.766495142Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":20571}
+{"time":"2024-10-07T10:22:32.766421472Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":42167,"Zone":""}}
+{"time":"2024-10-07T10:22:32.942067452Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:22:33.420767Z","level":"INFO","msg":"handleInformInit: received","streamId":"fvsz65yu","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:22:33.432081096Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fvsz65yu","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:09.487145656Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:09.487320605Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2024-10-07T10:27:09.487319855Z","level":"INFO","msg":"connection: Close: initiating connection closure","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:09.487457674Z","level":"INFO","msg":"connection: Close: connection successfully closed","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:12.377332382Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:12.377385972Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:56158"}
+{"time":"2024-10-07T10:27:12.377422151Z","level":"INFO","msg":"server is closed"}

wandb/run-20241007_102233-fvsz65yu/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,18 @@

+{"time":"2024-10-07T10:22:33.421062458Z","level":"INFO","msg":"using version","core version":"0.18.3"}
+{"time":"2024-10-07T10:22:33.421087507Z","level":"INFO","msg":"created symlink","path":"/home/tknika/whisper-large-eu/wandb/run-20241007_102233-fvsz65yu/logs/debug-core.log"}
+{"time":"2024-10-07T10:22:33.423573147Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
+{"time":"2024-10-07T10:22:33.432020856Z","level":"INFO","msg":"created new stream","id":"fvsz65yu"}
+{"time":"2024-10-07T10:22:33.432071516Z","level":"INFO","msg":"stream: started","id":"fvsz65yu"}
+{"time":"2024-10-07T10:22:33.432108776Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:22:33.432120206Z","level":"INFO","msg":"sender: started","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:22:33.432199375Z","level":"INFO","msg":"handler: started","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:22:33.862313722Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
+{"time":"2024-10-07T10:22:33.862904357Z","level":"INFO","msg":"Starting system monitor"}
+{"time":"2024-10-07T10:27:09.487255295Z","level":"INFO","msg":"stream: closing","id":"fvsz65yu"}
+{"time":"2024-10-07T10:27:09.487299825Z","level":"INFO","msg":"Stopping system monitor"}
+{"time":"2024-10-07T10:27:09.495096519Z","level":"INFO","msg":"Stopped system monitor"}
+{"time":"2024-10-07T10:27:12.041470803Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2024-10-07T10:27:12.376948825Z","level":"INFO","msg":"handler: closed","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:27:12.377003355Z","level":"INFO","msg":"writer: Close: closed","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:27:12.377060484Z","level":"INFO","msg":"sender: closed","stream_id":{"value":"fvsz65yu"}}
+{"time":"2024-10-07T10:27:12.377206333Z","level":"INFO","msg":"stream: closed","id":"fvsz65yu"}

wandb/run-20241007_102233-fvsz65yu/logs/debug.log ADDED Viewed

	@@ -0,0 +1,29 @@

+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Configure stats pid to 20571
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/.config/wandb/settings
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/whisper-large-eu/wandb/settings
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'project': 'whisper-medium-eu'}
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py', 'program': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py'}
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_init.py:_log_setup():532] Logging user logs to /home/tknika/whisper-large-eu/wandb/run-20241007_102233-fvsz65yu/logs/debug.log
+2024-10-07 10:22:33,415 INFO    MainThread:20571 [wandb_init.py:_log_setup():533] Logging internal logs to /home/tknika/whisper-large-eu/wandb/run-20241007_102233-fvsz65yu/logs/debug-internal.log
+2024-10-07 10:22:33,416 INFO    MainThread:20571 [wandb_init.py:init():617] calling init triggers
+2024-10-07 10:22:33,416 INFO    MainThread:20571 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
+config: {}
+2024-10-07 10:22:33,416 INFO    MainThread:20571 [wandb_init.py:init():667] starting backend
+2024-10-07 10:22:33,416 INFO    MainThread:20571 [wandb_init.py:init():671] sending inform_init request
+2024-10-07 10:22:33,417 INFO    MainThread:20571 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-07 10:22:33,418 INFO    MainThread:20571 [wandb_init.py:init():684] backend started and connected
+2024-10-07 10:22:33,422 INFO    MainThread:20571 [wandb_init.py:init():779] updated telemetry
+2024-10-07 10:22:33,430 INFO    MainThread:20571 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
+2024-10-07 10:22:33,857 INFO    MainThread:20571 [wandb_init.py:init():863] starting run threads in backend
+2024-10-07 10:22:33,981 INFO    MainThread:20571 [wandb_run.py:_console_start():2465] atexit reg
+2024-10-07 10:22:33,981 INFO    MainThread:20571 [wandb_run.py:_redirect():2313] redirect: wrap_raw
+2024-10-07 10:22:33,981 INFO    MainThread:20571 [wandb_run.py:_redirect():2378] Wrapping output streams.
+2024-10-07 10:22:33,981 INFO    MainThread:20571 [wandb_run.py:_redirect():2403] Redirects installed.
+2024-10-07 10:22:33,983 INFO    MainThread:20571 [wandb_init.py:init():907] run started, returning control to user process
+2024-10-07 10:22:33,984 INFO    MainThread:20571 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': True, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 1000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct07_10-22-04_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': './checkpoint-9000/', 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
+2024-10-07 10:22:33,988 INFO    MainThread:20571 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f01bf62b7a0>>
+2024-10-07 10:22:33,988 INFO    MainThread:20571 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None
+2024-10-07 10:27:09,487 WARNING MsgRouterThr:20571 [router.py:message_loop():77] message_loop has been closed

wandb/run-20241007_102233-fvsz65yu/run-fvsz65yu.wandb ADDED Viewed

Binary file (56.8 kB). View file

wandb/run-20241007_125615-a3z1jk8c/files/output.log ADDED Viewed

	@@ -0,0 +1,32 @@

+***** eval metrics *****
+  eval_loss                   =     0.9278
+  eval_model_preparation_time =     0.0102
+  eval_runtime                = 1:09:25.15
+  eval_samples_per_second     =      3.272
+  eval_steps_per_second       =      0.409
+  eval_wer                    =    44.2953
+[INFO|trainer.py:3738] 2024-10-07 12:56:15,790 >> Saving model checkpoint to ./
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/transformers/modeling_utils.py:2774: UserWarning: Moving the following attributes in the config to the generation config: {'max_length': 448, 'begin_suppress_tokens': [220, 50257]}. You are seeing this warning because you've set generation parameters in the model config, as opposed to in the generation config.
+  warnings.warn(
+[INFO|configuration_utils.py:410] 2024-10-07 12:56:15,792 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:868] 2024-10-07 12:56:15,793 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:3000] 2024-10-07 12:56:27,544 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at ./model.safetensors.index.json.
+[INFO|feature_extraction_utils.py:435] 2024-10-07 12:56:27,545 >> Feature extractor saved in ./preprocessor_config.json
+[INFO|modelcard.py:449] 2024-10-07 12:56:27,732 >> Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'mozilla-foundation/common_voice_17_0 eu', 'type': 'mozilla-foundation/common_voice_17_0', 'config': 'eu', 'split': 'test', 'args': 'eu'}}
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.all-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.column-metadata-handling.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+/home/tknika/whisper-large-eu/.venv/lib/python3.12/site-packages/huggingface_hub/hf_api.py:3889: UserWarning: It seems that you are about to commit a data file (.venv/lib/python3.12/site-packages/pyarrow/tests/data/parquet/v0.7.1.some-named-index.parquet) to a model repository. You are sure this is intended? If you are trying to upload a dataset, please set `repo_type='dataset'` or `--repo-type=dataset` in a CLI.
+  warnings.warn(
+events.out.tfevents.1728297734.tknika.20799.0: 100%|████████████████████████████████████████████████████████████████████████████████████████| 360/360 [00:00<00:00, 1.10kB/s]
+training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5.37k/5.37k [00:00<00:00, 9.18kB/s]
+model-00002-of-00002.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 1.18G/1.18G [00:47<00:00, 25.0MB/s]
+model-00001-of-00002.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 4.99G/4.99G [03:00<00:00, 27.6MB/s]
+Upload 4 LFS files: 100%|██████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████| 4/4 [03:01<00:00, 45.33s/it]
+Upload 4 LFS files:  25%|█████████████████████████████▎                                                                                       | 1/4 [03:01<09:04, 181.33s/it]

wandb/run-20241007_125615-a3z1jk8c/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,94 @@

+Markdown==3.7
+requests==2.32.3
+RapidFuzz==3.10.0
+yarl==1.13.1
+pyarrow==17.0.0
+docker-pycreds==0.4.0
+nvidia-cufft-cu12==11.0.2.54
+PyYAML==6.0.2
+packaging==24.1
+librosa==0.10.2.post1
+soxr==0.5.0.post1
+multiprocess==0.70.16
+nvidia-nvjitlink-cu12==12.6.77
+safetensors==0.4.5
+joblib==1.4.2
+pip==24.0
+wandb==0.18.3
+networkx==3.3
+numba==0.60.0
+scipy==1.14.1
+MarkupSafe==2.1.5
+GitPython==3.1.43
+aiohttp==3.10.9
+msgpack==1.1.0
+mpmath==1.3.0
+tzdata==2024.2
+nvidia-cudnn-cu12==9.1.0.70
+scikit-learn==1.5.2
+pytz==2024.2
+dill==0.3.8
+nvidia-cusparse-cu12==12.1.0.106
+soundfile==0.12.1
+aiosignal==1.3.1
+gitdb==4.0.11
+Jinja2==3.1.4
+jiwer==3.0.4
+decorator==5.1.1
+nvidia-cusolver-cu12==11.4.5.107
+protobuf==5.28.2
+idna==3.10
+tqdm==4.66.5
+pandas==2.2.3
+python-dateutil==2.9.0.post0
+Werkzeug==3.0.4
+click==8.1.7
+regex==2024.9.11
+typing_extensions==4.12.2
+nvidia-cublas-cu12==12.1.3.1
+transformers==4.46.0.dev0
+nvidia-nccl-cu12==2.20.5
+nvidia-cuda-cupti-cu12==12.1.105
+triton==3.0.0
+pooch==1.8.2
+smmap==5.0.1
+grpcio==1.66.2
+setuptools==75.1.0
+setproctitle==1.3.3
+accelerate==0.34.2
+nvidia-cuda-nvrtc-cu12==12.1.105
+tensorboard==2.18.0
+absl-py==2.1.0
+nvidia-nvtx-cu12==12.1.105
+fsspec==2024.6.1
+pycparser==2.22
+lazy_loader==0.4
+tensorboard-data-server==0.7.2
+urllib3==2.2.3
+threadpoolctl==3.5.0
+llvmlite==0.43.0
+sympy==1.13.3
+audioread==3.0.1
+tokenizers==0.20.0
+more-itertools==10.5.0
+cffi==1.17.1
+evaluate==0.4.3
+nvidia-curand-cu12==10.3.2.106
+psutil==6.0.0
+filelock==3.16.1
+attrs==24.2.0
+six==1.16.0
+frozenlist==1.4.1
+sentry-sdk==2.15.0
+nvidia-cuda-runtime-cu12==12.1.105
+xxhash==3.5.0
+platformdirs==4.3.6
+multidict==6.1.0
+aiohappyeyeballs==2.4.3
+torch==2.4.1
+huggingface-hub==0.25.1
+numpy==2.0.2
+datasets==3.0.2.dev0
+torchaudio==2.4.1
+charset-normalizer==3.3.2
+certifi==2024.8.30

wandb/run-20241007_125615-a3z1jk8c/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "os":  "Linux-6.8.0-45-generic-x86_64-with-glibc2.39",
+  "python":  "3.12.3",
+  "startedAt":  "2024-10-07T12:56:15.255202Z",
+  "args":  [
+    "--model_name_or_path=openai/whisper-large-v3",
+    "--dataset_name=mozilla-foundation/common_voice_17_0",
+    "--dataset_config_name=eu",
+    "--language=basque",
+    "--train_split_name=train+validation",
+    "--eval_split_name=test",
+    "--model_index_name=Whisper Large Basque",
+    "--max_steps=10000",
+    "--output_dir=./",
+    "--per_device_train_batch_size=16",
+    "--per_device_eval_batch_size=8",
+    "--gradient_accumulation_steps=1",
+    "--logging_steps=25",
+    "--learning_rate=4.375e-6",
+    "--warmup_steps=500",
+    "--evaluation_strategy=steps",
+    "--eval_steps=500",
+    "--save_strategy=steps",
+    "--save_steps=1000",
+    "--generation_max_length=228",
+    "--length_column_name=input_length",
+    "--max_duration_in_seconds=30",
+    "--text_column_name=sentence",
+    "--freeze_feature_encoder=False",
+    "--report_to=tensorboard",
+    "--metric_for_best_model=wer",
+    "--greater_is_better=False",
+    "--load_best_model_at_end",
+    "--gradient_checkpointing",
+    "--fp16",
+    "--overwrite_output_dir",
+    "--do_eval",
+    "--predict_with_generate",
+    "--do_normalize_eval",
+    "--streaming",
+    "--push_to_hub",
+    "--report_to",
+    "wandb",
+    "--run_name",
+    "whisper-large-eu"
+  ],
+  "program":  "/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py",
+  "codePath":  "run_speech_recognition_seq2seq_streaming.py",
+  "git":  {
+    "remote":  "https://huggingface.co/xezpeleta/whisper-large-eu",
+    "commit":  "45227421df6af8836af459c374361e7303a68aea"
+  },
+  "email":  "xezpeleta@gmail.com",
+  "root":  "/home/tknika/whisper-large-eu",
+  "host":  "tknika",
+  "username":  "tknika",
+  "executable":  "/home/tknika/whisper-large-eu/.venv/bin/python",
+  "codePathLocal":  "run_speech_recognition_seq2seq_streaming.py",
+  "cpu_count":  8,
+  "cpu_count_logical":  8,
+  "gpu":  "[NVIDIA L40-48Q]",
+  "gpu_count":  1,
+  "disk":  {
+    "/":  {
+      "total":  "314615791616",
+      "used":  "265684000768"
+    }
+  },
+  "memory":  {
+    "total":  "33654026240"
+  },
+  "cpu":  {
+    "count":  8,
+    "countLogical":  8
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA L40-48Q",
+      "memoryTotal":  "51539607552",
+      "cudaCores":  18176,
+      "architecture":  "Ada"
+    }
+  ],
+  "cudaVersion":  "12.4"
+}

wandb/run-20241007_125615-a3z1jk8c/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2024-10-07T12:56:14.506819991Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp2qen0_gz/port-20958.txt","pid":20958,"debug":false,"disable-analytics":false}
+{"time":"2024-10-07T12:56:14.506845531Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
+{"time":"2024-10-07T12:56:14.510463142Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":45237,"Zone":""}}
+{"time":"2024-10-07T12:56:14.510485482Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":20958}
+{"time":"2024-10-07T12:56:14.698116167Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:49446"}
+{"time":"2024-10-07T12:56:15.257025559Z","level":"INFO","msg":"handleInformInit: received","streamId":"a3z1jk8c","id":"127.0.0.1:49446"}
+{"time":"2024-10-07T12:56:15.264445669Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"a3z1jk8c","id":"127.0.0.1:49446"}

wandb/run-20241007_125615-a3z1jk8c/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,10 @@

+{"time":"2024-10-07T12:56:15.257353437Z","level":"INFO","msg":"using version","core version":"0.18.3"}
+{"time":"2024-10-07T12:56:15.257380326Z","level":"INFO","msg":"created symlink","path":"/home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug-core.log"}
+{"time":"2024-10-07T12:56:15.259721418Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
+{"time":"2024-10-07T12:56:15.26442537Z","level":"INFO","msg":"created new stream","id":"a3z1jk8c"}
+{"time":"2024-10-07T12:56:15.264442509Z","level":"INFO","msg":"stream: started","id":"a3z1jk8c"}
+{"time":"2024-10-07T12:56:15.264458959Z","level":"INFO","msg":"handler: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.264475109Z","level":"INFO","msg":"sender: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.264497739Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"a3z1jk8c"}}
+{"time":"2024-10-07T12:56:15.681557119Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
+{"time":"2024-10-07T12:56:15.68260129Z","level":"INFO","msg":"Starting system monitor"}

wandb/run-20241007_125615-a3z1jk8c/logs/debug.log ADDED Viewed

	@@ -0,0 +1,28 @@

+2024-10-07 12:56:15,251 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Configure stats pid to 20958
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/.config/wandb/settings
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from /home/tknika/whisper-large-eu/wandb/settings
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'project': 'whisper-medium-eu'}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': 'run_speech_recognition_seq2seq_streaming.py', 'program_abspath': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py', 'program': '/home/tknika/whisper-large-eu/run_speech_recognition_seq2seq_streaming.py'}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_setup.py:_flush():79] Applying login settings: {}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:_log_setup():532] Logging user logs to /home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug.log
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:_log_setup():533] Logging internal logs to /home/tknika/whisper-large-eu/wandb/run-20241007_125615-a3z1jk8c/logs/debug-internal.log
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():617] calling init triggers
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
+config: {}
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():667] starting backend
+2024-10-07 12:56:15,252 INFO    MainThread:20958 [wandb_init.py:init():671] sending inform_init request
+2024-10-07 12:56:15,254 INFO    MainThread:20958 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
+2024-10-07 12:56:15,254 INFO    MainThread:20958 [wandb_init.py:init():684] backend started and connected
+2024-10-07 12:56:15,258 INFO    MainThread:20958 [wandb_init.py:init():779] updated telemetry
+2024-10-07 12:56:15,265 INFO    MainThread:20958 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
+2024-10-07 12:56:15,676 INFO    MainThread:20958 [wandb_init.py:init():863] starting run threads in backend
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_console_start():2465] atexit reg
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2313] redirect: wrap_raw
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2378] Wrapping output streams.
+2024-10-07 12:56:15,774 INFO    MainThread:20958 [wandb_run.py:_redirect():2403] Redirects installed.
+2024-10-07 12:56:15,775 INFO    MainThread:20958 [wandb_init.py:init():907] run started, returning control to user process
+2024-10-07 12:56:15,777 INFO    MainThread:20958 [wandb_run.py:_config_callback():1394] config_cb None None {'vocab_size': 51866, 'num_mel_bins': 128, 'd_model': 1280, 'encoder_layers': 32, 'encoder_attention_heads': 20, 'decoder_layers': 32, 'decoder_attention_heads': 20, 'decoder_ffn_dim': 5120, 'encoder_ffn_dim': 5120, 'dropout': 0.0, 'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0, 'use_cache': False, 'num_hidden_layers': 32, 'scale_embedding': False, 'max_source_positions': 1500, 'max_target_positions': 448, 'classifier_proj_size': 256, 'use_weighted_layer_sum': False, 'apply_spec_augment': False, 'mask_time_prob': 0.05, 'mask_time_length': 10, 'mask_time_min_masks': 2, 'mask_feature_prob': 0.0, 'mask_feature_length': 10, 'mask_feature_min_masks': 0, 'median_filter_width': 7, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 448, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': [220, 50257], 'architectures': ['WhisperForConditionalGeneration'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 50257, 'pad_token_id': 50256, 'eos_token_id': 50257, 'sep_token_id': None, 'decoder_start_token_id': 50258, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'openai/whisper-large-v3', 'transformers_version': '4.46.0.dev0', 'model_type': 'whisper', 'forced_decoder_ids': None, 'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 4.375e-06, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3.0, 'max_steps': 10000, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 500, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './runs/Oct07_11-46-39_tknika', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 25, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 1000, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 500, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': 'whisper-large-eu', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'wer', 'greater_is_better': False, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'input_length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': False, 'hub_always_push': False, 'gradient_checkpointing': True, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': 'steps', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'sortish_sampler': False, 'predict_with_generate': True, 'generation_max_length': 228, 'generation_num_beams': None, 'generation_config': None}
+2024-10-07 12:56:15,780 INFO    MainThread:20958 [wandb_config.py:__setitem__():154] config set model/num_parameters = 1543490560 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x748ced2ceae0>>
+2024-10-07 12:56:15,780 INFO    MainThread:20958 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 1543490560 None

wandb/run-20241007_125615-a3z1jk8c/run-a3z1jk8c.wandb ADDED Viewed

Binary file (393 kB). View file