diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,996 @@ +05/10/2023 09:41:07 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 2distributed training: True, 16-bits training: True +05/10/2023 09:41:07 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +_n_gpu=2, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_backend=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generation_config=None, +generation_max_length=225, +generation_num_beams=None, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +greater_is_better=False, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=1e-05, +length_column_name=input_length, +load_best_model_at_end=True, +local_rank=0, +log_level=passive, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=./runs/May10_09-41-06_crimv3mgpu016, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=25, +logging_strategy=steps, +lr_scheduler_type=linear, +max_grad_norm=1.0, +max_steps=5000, +metric_for_best_model=wer, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=adamw_hf, +optim_args=None, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +predict_with_generate=True, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_safetensors=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +sortish_sampler=False, +tf32=None, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +use_mps_device=False, +warmup_ratio=0.0, +warmup_steps=500, +weight_decay=0.0, +xpu_backend=None, +) +05/10/2023 09:41:07 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( +_n_gpu=2, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +bf16=False, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_backend=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_delay=0, +eval_steps=1000, +evaluation_strategy=steps, +fp16=True, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generation_config=None, +generation_max_length=225, +generation_num_beams=None, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +greater_is_better=False, +group_by_length=False, +half_precision_backend=auto, +hub_model_id=None, +hub_private_repo=False, +hub_strategy=every_save, +hub_token=, +ignore_data_skip=False, +include_inputs_for_metrics=False, +jit_mode_eval=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=1e-05, +length_column_name=input_length, +load_best_model_at_end=True, +local_rank=0, +log_level=passive, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=./runs/May10_09-41-06_crimv3mgpu016, +logging_first_step=False, +logging_nan_inf_filter=True, +logging_steps=25, +logging_strategy=steps, +lr_scheduler_type=linear, +max_grad_norm=1.0, +max_steps=5000, +metric_for_best_model=wer, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +optim=adamw_hf, +optim_args=None, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=32, +per_device_train_batch_size=32, +predict_with_generate=True, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +remove_unused_columns=True, +report_to=['wandb'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_safetensors=False, +save_steps=1000, +save_strategy=steps, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +sortish_sampler=False, +tf32=None, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torchdynamo=None, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_ipex=False, +use_legacy_prediction_loop=False, +use_mps_device=False, +warmup_ratio=0.0, +warmup_steps=500, +weight_decay=0.0, +xpu_backend=None, +) +[INFO|configuration_utils.py:669] 2023-05-10 09:41:19,641 >> loading configuration file config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/config.json +[INFO|configuration_utils.py:725] 2023-05-10 09:41:19,653 >> Model config WhisperConfig { + "_name_or_path": "openai/whisper-small", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 768, + "decoder_attention_heads": 12, + "decoder_ffn_dim": 3072, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 12, + "encoder_ffn_dim": 3072, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "model_type": "whisper", + "num_hidden_layers": 12, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50360, + 50361, + 50362 + ], + "torch_dtype": "float32", + "transformers_version": "4.29.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} + +[INFO|feature_extraction_utils.py:469] 2023-05-10 09:41:19,843 >> loading configuration file preprocessor_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/preprocessor_config.json +[INFO|feature_extraction_utils.py:511] 2023-05-10 09:41:19,849 >> Feature extractor WhisperFeatureExtractor { + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} + +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file vocab.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/vocab.json +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file tokenizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer.json +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file merges.txt from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/merges.txt +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file normalizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/normalizer.json +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file added_tokens.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/added_tokens.json +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file special_tokens_map.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/special_tokens_map.json +[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file tokenizer_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer_config.json +[INFO|modeling_utils.py:2542] 2023-05-10 09:41:20,144 >> loading weights file pytorch_model.bin from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/pytorch_model.bin +[INFO|configuration_utils.py:577] 2023-05-10 09:41:20,754 >> Generate config GenerationConfig { + "_from_model_config": true, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "max_length": 448, + "pad_token_id": 50257, + "transformers_version": "4.29.0.dev0", + "use_cache": false +} + +[INFO|modeling_utils.py:3211] 2023-05-10 09:41:23,296 >> All model checkpoint weights were used when initializing WhisperForConditionalGeneration. + +[INFO|modeling_utils.py:3219] 2023-05-10 09:41:23,296 >> All the weights of WhisperForConditionalGeneration were initialized from the model checkpoint at openai/whisper-small. +If your task is similar to the task the model of the checkpoint was trained on, you can already use WhisperForConditionalGeneration for predictions without further training. +[INFO|configuration_utils.py:539] 2023-05-10 09:41:23,501 >> loading configuration file generation_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/generation_config.json +[INFO|configuration_utils.py:577] 2023-05-10 09:41:23,502 >> Generate config GenerationConfig { + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.29.0.dev0" +} + +[INFO|feature_extraction_utils.py:369] 2023-05-10 09:41:24,661 >> Feature extractor saved in ./preprocessor_config.json +[INFO|tokenization_utils_base.py:2181] 2023-05-10 09:41:24,666 >> tokenizer config file saved in ./tokenizer_config.json +[INFO|tokenization_utils_base.py:2188] 2023-05-10 09:41:24,671 >> Special tokens file saved in ./special_tokens_map.json +[INFO|configuration_utils.py:458] 2023-05-10 09:41:24,826 >> Configuration saved in ./config.json +[INFO|image_processing_utils.py:307] 2023-05-10 09:41:24,827 >> loading configuration file ./preprocessor_config.json +[INFO|feature_extraction_utils.py:467] 2023-05-10 09:41:24,834 >> loading configuration file ./preprocessor_config.json +[INFO|feature_extraction_utils.py:511] 2023-05-10 09:41:24,835 >> Feature extractor WhisperFeatureExtractor { + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} + +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file vocab.json +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file tokenizer.json +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file merges.txt +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file normalizer.json +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file tokenizer_config.json +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|startoftranscript|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|en|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|zh|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|de|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|es|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ru|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ko|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|fr|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ja|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|pt|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|tr|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|pl|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ca|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|nl|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ar|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|sv|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|it|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|id|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|hi|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|fi|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|vi|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|he|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|uk|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|el|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ms|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|cs|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ro|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|da|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|hu|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ta|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|no|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|th|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ur|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|hr|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bg|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|lt|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|la|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mi|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ml|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|cy|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sk|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|te|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|fa|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|lv|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bn|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sr|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|az|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sl|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|kn|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|et|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mk|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|br|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|eu|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|is|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|hy|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ne|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mn|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bs|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|kk|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sq|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sw|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|gl|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mr|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|pa|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|si|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|km|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sn|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|yo|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|so|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|af|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|oc|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ka|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|be|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|tg|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|sd|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|gu|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|am|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|yi|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|lo|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|uz|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|fo|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ht|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ps|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tk|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|nn|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|mt|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|sa|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|lb|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|my|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|bo|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tl|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|mg|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|as|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tt|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|haw|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ln|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ha|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ba|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|jw|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|su|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|translate|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|transcribe|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|startoflm|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|startofprev|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|nocaptions|> to the vocabulary +[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|notimestamps|> to the vocabulary +/home/local/QCRI/dizham/kanari/whisper/whisper-small-es/./ is already a clone of https://huggingface.co/danielizham/whisper-small-es. Make sure you pull the latest changes with `repo.git_pull()`. +05/10/2023 09:41:27 - WARNING - huggingface_hub.repository - /home/local/QCRI/dizham/kanari/whisper/whisper-small-es/./ is already a clone of https://huggingface.co/danielizham/whisper-small-es. Make sure you pull the latest changes with `repo.git_pull()`. +[INFO|trainer.py:565] 2023-05-10 09:41:30,128 >> max_steps is given, it will override any value given in num_train_epochs +[INFO|trainer.py:622] 2023-05-10 09:41:30,129 >> Using cuda_amp half precision backend +/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning + warnings.warn( +[INFO|trainer.py:1771] 2023-05-10 09:41:30,142 >> ***** Running training ***** +[INFO|trainer.py:1772] 2023-05-10 09:41:30,142 >> Num examples = 640,000 +[INFO|trainer.py:1773] 2023-05-10 09:41:30,142 >> Num Epochs = 9,223,372,036,854,775,807 +[INFO|trainer.py:1774] 2023-05-10 09:41:30,142 >> Instantaneous batch size per device = 32 +[INFO|trainer.py:1775] 2023-05-10 09:41:30,142 >> Total train batch size (w. parallel, distributed & accumulation) = 128 +[INFO|trainer.py:1776] 2023-05-10 09:41:30,142 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:1777] 2023-05-10 09:41:30,142 >> Total optimization steps = 5,000 +[INFO|trainer.py:1778] 2023-05-10 09:41:30,143 >> Number of trainable parameters = 241,734,912 +[INFO|integrations.py:720] 2023-05-10 09:41:30,144 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +wandb: Currently logged in as: danielizham. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.15.2 +wandb: Run data is saved locally in /home/local/QCRI/dizham/kanari/whisper/whisper-small-es/wandb/run-20230510_094132-lvsln7ks +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run astral-silence-4 +wandb: ⭐️ View project at https://wandb.ai/danielizham/huggingface +wandb: 🚀 View run at https://wandb.ai/danielizham/huggingface/runs/lvsln7ks + 0%| | 0/5000 [00:00> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. +/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector. + warnings.warn('Was asked to gather along dimension 0, but all ' + 0%| | 1/5000 [02:24<200:03:10, 144.07s/it] 0%| | 2/5000 [02:54<107:22:57, 77.35s/it] 0%| | 3/5000 [03:22<76:05:16, 54.82s/it] 0%| | 4/5000 [03:52<62:10:56, 44.81s/it] 0%| | 5/5000 [04:19<53:36:03, 38.63s/it] 0%| | 6/5000 [04:49<49:12:44, 35.48s/it] 0%| | 7/5000 [05:16<45:26:55, 32.77s/it] 0%| | 8/5000 [05:45<43:41:35, 31.51s/it] 0%| | 9/5000 [06:12<41:42:07, 30.08s/it] 0%| | 10/5000 [06:41<41:26:32, 29.90s/it] 0%| | 11/5000 [07:08<40:12:15, 29.01s/it] 0%| | 12/5000 [07:39<40:55:33, 29.54s/it] 0%| | 13/5000 [08:09<41:07:16, 29.68s/it] 0%| | 14/5000 [08:36<40:00:34, 28.89s/it] 0%| | 15/5000 [09:06<40:32:59, 29.28s/it] 0%| | 16/5000 [09:33<39:37:45, 28.62s/it] 0%| | 17/5000 [10:03<40:16:19, 29.09s/it] 0%| | 18/5000 [10:39<43:07:28, 31.16s/it] 0%| | 19/5000 [11:06<41:22:59, 29.91s/it] 0%| | 20/5000 [11:37<41:28:17, 29.98s/it] 0%| | 21/5000 [12:04<40:16:22, 29.12s/it] 0%| | 22/5000 [12:34<40:42:41, 29.44s/it] 0%| | 23/5000 [13:01<39:32:57, 28.61s/it] 0%| | 24/5000 [13:30<40:01:18, 28.95s/it] 0%| | 25/5000 [13:57<39:16:32, 28.42s/it] 0%| | 25/5000 [13:57<39:16:32, 28.42s/it] 1%| | 26/5000 [14:25<38:51:21, 28.12s/it] 1%| | 27/5000 [14:55<39:32:56, 28.63s/it] 1%| | 28/5000 [15:23<39:13:02, 28.40s/it] 1%| | 29/5000 [15:50<38:58:56, 28.23s/it] 1%| | 30/5000 [16:19<38:59:29, 28.24s/it] 1%| | 31/5000 [16:48<39:33:35, 28.66s/it] 1%| | 32/5000 [17:17<39:24:55, 28.56s/it] 1%| | 33/5000 [17:50<41:29:21, 30.07s/it] 1%| | 34/5000 [18:20<41:10:06, 29.84s/it] 1%| | 35/5000 [18:51<42:01:33, 30.47s/it] 1%| | 36/5000 [19:19<40:41:05, 29.51s/it] 1%| | 37/5000 [19:46<39:54:21, 28.95s/it] 1%| | 38/5000 [20:14<39:24:32, 28.59s/it] 1%| | 39/5000 [20:41<38:45:04, 28.12s/it] 1%| | 40/5000 [21:11<39:39:26, 28.78s/it] 1%| | 41/5000 [21:39<39:08:27, 28.41s/it] 1%| | 42/5000 [22:06<38:41:21, 28.09s/it] 1%| | 43/5000 [22:42<41:44:40, 30.32s/it] 1%| | 44/5000 [23:09<40:21:48, 29.32s/it] 1%| | 45/5000 [23:36<39:31:21, 28.71s/it] 1%| | 46/5000 [24:04<38:58:42, 28.33s/it] 1%| | 47/5000 [24:31<38:38:47, 28.09s/it] 1%| | 48/5000 [25:02<39:48:27, 28.94s/it] 1%| | 49/5000 [25:31<39:52:07, 28.99s/it] 1%| | 50/5000 [26:04<41:27:08, 30.15s/it] 1%| | 50/5000 [26:04<41:27:08, 30.15s/it] 1%| | 51/5000 [26:31<40:16:34, 29.30s/it] 1%| | 52/5000 [27:01<40:28:01, 29.44s/it] 1%| | 53/5000 [27:29<39:51:16, 29.00s/it] 1%| | 54/5000 [27:57<39:29:05, 28.74s/it] 1%| | 55/5000 [28:25<39:07:40, 28.49s/it] 1%| | 56/5000 [29:03<42:57:48, 31.28s/it] 1%| | 57/5000 [29:30<41:09:56, 29.98s/it] 1%| | 58/5000 [30:01<41:46:03, 30.43s/it] 1%| | 59/5000 [30:29<40:26:51, 29.47s/it] 1%| | 60/5000 [30:57<40:00:21, 29.15s/it] 1%| | 61/5000 [31:24<39:11:00, 28.56s/it] 1%| | 62/5000 [31:51<38:36:45, 28.15s/it] 1%|▏ | 63/5000 [32:19<38:13:12, 27.87s/it] 1%|▏ | 64/5000 [32:45<37:33:23, 27.39s/it] 1%|▏ | 65/5000 [33:12<37:29:43, 27.35s/it] 1%|▏ | 66/5000 [33:40<37:30:33, 27.37s/it] 1%|▏ | 67/5000 [34:07<37:35:52, 27.44s/it] 1%|▏ | 68/5000 [34:35<37:44:18, 27.55s/it] 1%|▏ | 69/5000 [35:05<38:34:41, 28.17s/it] 1%|▏ | 70/5000 [35:33<38:41:01, 28.25s/it] 1%|▏ | 71/5000 [36:04<39:45:54, 29.04s/it] 1%|▏ | 72/5000 [36:33<39:57:03, 29.18s/it] 1%|▏ | 73/5000 [37:01<39:17:44, 28.71s/it] 1%|▏ | 74/5000 [37:35<41:24:44, 30.26s/it] 2%|▏ | 75/5000 [38:05<41:31:52, 30.36s/it] 2%|▏ | 75/5000 [38:05<41:31:52, 30.36s/it] 2%|▏ | 76/5000 [38:34<40:37:48, 29.71s/it] 2%|▏ | 77/5000 [39:03<40:31:44, 29.64s/it] 2%|▏ | 78/5000 [39:33<40:48:16, 29.84s/it] 2%|▏ | 79/5000 [40:07<42:19:07, 30.96s/it] 2%|▏ | 80/5000 [40:34<40:46:52, 29.84s/it] 2%|▏ | 81/5000 [41:03<40:32:12, 29.67s/it] 2%|▏ | 82/5000 [41:31<39:46:41, 29.12s/it] 2%|▏ | 83/5000 [42:05<41:31:05, 30.40s/it] 2%|▏ | 84/5000 [42:32<40:02:43, 29.33s/it] 2%|▏ | 85/5000 [43:01<39:59:18, 29.29s/it] 2%|▏ | 86/5000 [43:28<38:59:26, 28.56s/it] 2%|▏ | 87/5000 [44:06<42:53:09, 31.42s/it] 2%|▏ | 88/5000 [44:43<45:26:03, 33.30s/it] 2%|▏ | 89/5000 [45:10<42:50:12, 31.40s/it] 2%|▏ | 90/5000 [45:41<42:30:02, 31.16s/it] 2%|▏ | 91/5000 [46:08<40:59:26, 30.06s/it] 2%|▏ | 92/5000 [46:41<42:10:18, 30.93s/it] 2%|▏ | 93/5000 [47:13<42:35:51, 31.25s/it] 2%|▏ | 94/5000 [47:41<40:53:55, 30.01s/it] 2%|▏ | 95/5000 [48:10<40:45:16, 29.91s/it] 2%|▏ | 96/5000 [48:37<39:36:05, 29.07s/it] 2%|▏ | 97/5000 [49:07<39:53:30, 29.29s/it] 2%|▏ | 98/5000 [49:35<39:22:36, 28.92s/it] 2%|▏ | 99/5000 [50:05<39:34:07, 29.07s/it] 2%|▏ | 100/5000 [50:33<39:07:41, 28.75s/it] 2%|▏ | 100/5000 [50:33<39:07:41, 28.75s/it] 2%|▏ | 101/5000 [51:03<39:37:29, 29.12s/it] 2%|▏ | 102/5000 [51:32<39:46:16, 29.23s/it] 2%|▏ | 103/5000 [52:03<40:16:49, 29.61s/it] 2%|▏ | 104/5000 [52:30<39:16:13, 28.88s/it] 2%|▏ | 105/5000 [53:05<41:54:06, 30.82s/it] 2%|▏ | 106/5000 [53:33<40:50:31, 30.04s/it] 2%|▏ | 107/5000 [54:02<40:24:41, 29.73s/it] 2%|▏ | 108/5000 [54:30<39:36:56, 29.15s/it] 2%|▏ | 109/5000 [55:00<39:44:16, 29.25s/it] 2%|▏ | 110/5000 [55:28<39:30:49, 29.09s/it] 2%|▏ | 111/5000 [55:58<39:43:10, 29.25s/it] 2%|▏ | 112/5000 [56:26<39:08:06, 28.82s/it] 2%|▏ | 113/5000 [56:59<40:59:40, 30.20s/it] 2%|▏ | 114/5000 [57:26<39:36:32, 29.18s/it] 2%|▏ | 115/5000 [57:57<40:11:03, 29.61s/it] 2%|▏ | 116/5000 [58:23<38:56:05, 28.70s/it] 2%|▏ | 117/5000 [58:50<38:19:58, 28.26s/it] 2%|▏ | 118/5000 [59:19<38:27:04, 28.35s/it] 2%|▏ | 119/5000 [59:49<39:01:08, 28.78s/it] 2%|▏ | 120/5000 [1:00:16<38:35:47, 28.47s/it] 2%|▏ | 121/5000 [1:00:46<39:11:40, 28.92s/it] 2%|▏ | 122/5000 [1:01:14<38:41:56, 28.56s/it] 2%|▏ | 123/5000 [1:01:42<38:21:39, 28.32s/it] 2%|▏ | 124/5000 [1:02:09<37:55:13, 28.00s/it] 2%|▎ | 125/5000 [1:02:40<39:01:31, 28.82s/it] 2%|▎ | 125/5000 [1:02:40<39:01:31, 28.82s/it] 3%|▎ | 126/5000 [1:03:08<38:39:31, 28.55s/it] 3%|▎ | 127/5000 [1:03:37<39:03:46, 28.86s/it] 3%|▎ | 128/5000 [1:04:06<39:06:47, 28.90s/it] 3%|▎ | 129/5000 [1:04:42<41:43:08, 30.83s/it] 3%|▎ | 130/5000 [1:05:10<40:51:44, 30.21s/it] 3%|▎ | 131/5000 [1:05:40<40:30:48, 29.95s/it] 3%|▎ | 132/5000 [1:06:08<39:35:09, 29.27s/it] 3%|▎ | 133/5000 [1:06:36<39:11:03, 28.98s/it] 3%|▎ | 134/5000 [1:07:04<38:48:37, 28.71s/it] 3%|▎ | 135/5000 [1:07:32<38:29:33, 28.48s/it] 3%|▎ | 136/5000 [1:08:00<38:24:21, 28.43s/it] 3%|▎ | 137/5000 [1:08:27<37:53:50, 28.05s/it] 3%|▎ | 138/5000 [1:08:57<38:21:46, 28.41s/it] 3%|▎ | 139/5000 [1:09:32<41:20:50, 30.62s/it] 3%|▎ | 140/5000 [1:10:00<40:14:41, 29.81s/it] 3%|▎ | 141/5000 [1:10:30<40:20:22, 29.89s/it] 3%|▎ | 142/5000 [1:10:57<39:11:04, 29.04s/it] 3%|▎ | 143/5000 [1:11:28<39:57:00, 29.61s/it] 3%|▎ | 144/5000 [1:11:56<39:00:59, 28.93s/it] 3%|▎ | 145/5000 [1:12:26<39:24:42, 29.22s/it] 3%|▎ | 146/5000 [1:12:54<39:06:52, 29.01s/it] 3%|▎ | 147/5000 [1:13:22<38:44:26, 28.74s/it] 3%|▎ | 148/5000 [1:13:49<37:52:18, 28.10s/it] 3%|▎ | 149/5000 [1:14:18<38:28:23, 28.55s/it] 3%|▎ | 150/5000 [1:14:46<38:01:15, 28.22s/it] 3%|▎ | 150/5000 [1:14:46<38:01:15, 28.22s/it] 3%|▎ | 151/5000 [1:15:13<37:42:14, 27.99s/it] 3%|▎ | 152/5000 [1:15:48<40:16:30, 29.91s/it] 3%|▎ | 153/5000 [1:16:16<39:25:12, 29.28s/it] 3%|▎ | 154/5000 [1:16:43<38:45:06, 28.79s/it] 3%|▎ | 155/5000 [1:17:10<38:00:43, 28.24s/it] 3%|▎ | 156/5000 [1:17:38<37:59:49, 28.24s/it] 3%|▎ | 157/5000 [1:18:07<38:18:46, 28.48s/it] 3%|▎ | 158/5000 [1:18:35<38:03:05, 28.29s/it] 3%|▎ | 159/5000 [1:19:03<37:52:46, 28.17s/it] 3%|▎ | 160/5000 [1:19:33<38:26:07, 28.59s/it] 3%|▎ | 161/5000 [1:20:00<38:05:37, 28.34s/it] 3%|▎ | 162/5000 [1:20:28<37:36:59, 27.99s/it] 3%|▎ | 163/5000 [1:20:55<37:31:20, 27.93s/it] 3%|▎ | 164/5000 [1:21:23<37:24:09, 27.84s/it] 3%|▎ | 165/5000 [1:21:50<37:04:35, 27.61s/it] 3%|▎ | 166/5000 [1:22:18<37:08:43, 27.66s/it] 3%|▎ | 167/5000 [1:22:45<36:59:38, 27.56s/it] 3%|▎ | 168/5000 [1:23:14<37:16:54, 27.78s/it] 3%|▎ | 169/5000 [1:23:41<37:03:58, 27.62s/it] 3%|▎ | 170/5000 [1:24:11<38:12:34, 28.48s/it] 3%|▎ | 171/5000 [1:24:40<38:22:43, 28.61s/it] 3%|▎ | 172/5000 [1:25:09<38:36:28, 28.79s/it] 3%|▎ | 173/5000 [1:25:37<38:12:00, 28.49s/it] 3%|▎ | 174/5000 [1:26:08<38:57:16, 29.06s/it] 4%|▎ | 175/5000 [1:26:36<38:31:03, 28.74s/it] 4%|▎ | 175/5000 [1:26:36<38:31:03, 28.74s/it] 4%|▎ | 176/5000 [1:27:03<37:59:57, 28.36s/it] 4%|▎ | 177/5000 [1:27:31<37:46:21, 28.19s/it] 4%|▎ | 178/5000 [1:28:00<38:18:46, 28.60s/it] 4%|▎ | 179/5000 [1:28:28<37:50:52, 28.26s/it] 4%|▎ | 180/5000 [1:28:58<38:23:11, 28.67s/it] 4%|▎ | 181/5000 [1:29:28<39:12:56, 29.30s/it] 4%|▎ | 182/5000 [1:29:56<38:26:11, 28.72s/it] 4%|▎ | 183/5000 [1:30:25<38:45:28, 28.97s/it] 4%|▎ | 184/5000 [1:30:56<39:39:35, 29.65s/it] 4%|▎ | 185/5000 [1:31:26<39:41:49, 29.68s/it] 4%|▎ | 186/5000 [1:31:55<39:22:25, 29.44s/it] 4%|▎ | 187/5000 [1:32:25<39:31:26, 29.56s/it] 4%|▍ | 188/5000 [1:32:53<39:01:26, 29.20s/it] 4%|▍ | 189/5000 [1:33:23<39:15:20, 29.37s/it] 4%|▍ | 190/5000 [1:33:54<39:43:38, 29.73s/it] 4%|▍ | 191/5000 [1:34:21<38:42:36, 28.98s/it] 4%|▍ | 192/5000 [1:34:53<40:05:43, 30.02s/it] 4%|▍ | 193/5000 [1:35:24<40:14:02, 30.13s/it] 4%|▍ | 194/5000 [1:35:53<39:46:50, 29.80s/it] 4%|▍ | 195/5000 [1:36:24<40:14:35, 30.15s/it] 4%|▍ | 196/5000 [1:36:51<39:05:26, 29.29s/it] 4%|▍ | 197/5000 [1:37:21<39:16:34, 29.44s/it] 4%|▍ | 198/5000 [1:37:53<40:17:59, 30.21s/it] 4%|▍ | 199/5000 [1:38:20<39:04:42, 29.30s/it] 4%|▍ | 200/5000 [1:38:53<40:24:10, 30.30s/it] 4%|▍ | 200/5000 [1:38:53<40:24:10, 30.30s/it] 4%|▍ | 201/5000 [1:39:20<39:11:20, 29.40s/it] 4%|▍ | 202/5000 [1:39:52<40:18:48, 30.25s/it] 4%|▍ | 203/5000 [1:40:19<39:01:40, 29.29s/it] 4%|▍ | 204/5000 [1:40:51<39:51:14, 29.92s/it] 4%|▍ | 205/5000 [1:41:24<41:21:24, 31.05s/it] 4%|▍ | 206/5000 [1:41:54<40:40:34, 30.55s/it] 4%|▍ | 207/5000 [1:42:23<40:06:08, 30.12s/it] 4%|▍ | 208/5000 [1:42:53<40:04:48, 30.11s/it] 4%|▍ | 209/5000 [1:43:23<40:03:05, 30.10s/it] 4%|▍ | 210/5000 [1:43:55<40:49:29, 30.68s/it] 4%|▍ | 211/5000 [1:44:22<39:29:45, 29.69s/it] 4%|▍ | 212/5000 [1:44:55<40:39:13, 30.57s/it] 4%|▍ | 213/5000 [1:45:22<39:17:59, 29.55s/it] 4%|▍ | 214/5000 [1:45:54<40:10:39, 30.22s/it] 4%|▍ | 215/5000 [1:46:20<38:41:43, 29.11s/it] 4%|▍ | 216/5000 [1:46:52<39:48:04, 29.95s/it] 4%|▍ | 217/5000 [1:47:19<38:38:33, 29.09s/it] 4%|▍ | 218/5000 [1:47:50<39:05:00, 29.42s/it] 4%|▍ | 219/5000 [1:48:22<40:14:58, 30.31s/it] 4%|▍ | 220/5000 [1:48:49<39:01:12, 29.39s/it] 4%|▍ | 221/5000 [1:49:28<42:39:21, 32.13s/it] 4%|▍ | 222/5000 [1:49:54<40:22:58, 30.43s/it] 4%|▍ | 223/5000 [1:50:25<40:35:11, 30.59s/it] 4%|▍ | 224/5000 [1:50:55<40:25:15, 30.47s/it] 4%|▍ | 225/5000 [1:51:23<39:09:10, 29.52s/it] 4%|▍ | 225/5000 [1:51:23<39:09:10, 29.52s/it] 5%|▍ | 226/5000 [1:51:55<40:10:10, 30.29s/it] 5%|▍ | 227/5000 [1:52:22<38:54:39, 29.35s/it] 5%|▍ | 228/5000 [1:52:54<39:55:45, 30.12s/it] 5%|▍ | 229/5000 [1:53:20<38:32:47, 29.09s/it] 5%|▍ | 230/5000 [1:53:52<39:21:26, 29.70s/it] 5%|▍ | 231/5000 [1:54:21<39:22:20, 29.72s/it] 5%|▍ | 232/5000 [1:54:48<38:19:35, 28.94s/it] 5%|▍ | 233/5000 [1:55:21<39:38:40, 29.94s/it] 5%|▍ | 234/5000 [1:55:48<38:32:29, 29.11s/it] 5%|▍ | 235/5000 [1:56:17<38:35:33, 29.16s/it] 5%|▍ | 236/5000 [1:56:47<38:48:54, 29.33s/it] 5%|▍ | 237/5000 [1:57:16<38:51:52, 29.37s/it] 5%|▍ | 238/5000 [1:57:47<39:14:27, 29.67s/it] 5%|▍ | 239/5000 [1:58:09<36:24:34, 27.53s/it] 5%|▍ | 240/5000 [1:58:20<29:42:14, 22.47s/it] 5%|▍ | 241/5000 [1:58:30<24:56:47, 18.87s/it] 5%|▍ | 242/5000 [1:58:41<21:36:39, 16.35s/it]{'loss': 1.0543, 'learning_rate': 4.800000000000001e-07, 'epoch': 0.01} +{'loss': 0.8847, 'learning_rate': 9.800000000000001e-07, 'epoch': 0.01} +{'loss': 0.54, 'learning_rate': 1.48e-06, 'epoch': 0.01} +{'loss': 0.304, 'learning_rate': 1.98e-06, 'epoch': 0.02} +{'loss': 0.2861, 'learning_rate': 2.4800000000000004e-06, 'epoch': 0.03} +{'loss': 0.2395, 'learning_rate': 2.9800000000000003e-06, 'epoch': 0.03} +{'loss': 0.2282, 'learning_rate': 3.48e-06, 'epoch': 0.04} +{'loss': 0.2209, 'learning_rate': 3.980000000000001e-06, 'epoch': 0.04} +{'loss': 0.2299, 'learning_rate': 4.48e-06, 'epoch': 0.04} + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:00, 1.28it/s] + Reading metadata...: 14726it [00:00, 22755.29it/s] + Reading metadata...: 23388it [00:03, 6340.58it/s]  + Reading metadata...: 37933it [00:03, 12680.17it/s] + Reading metadata...: 46618it [00:06, 6782.81it/s]  + Reading metadata...: 59039it [00:06, 9618.95it/s] + Reading metadata...: 72546it [00:06, 14763.51it/s] + Reading metadata...: 80136it [00:06, 16119.07it/s] + Reading metadata...: 93827it [00:07, 24044.83it/s] + Reading metadata...: 102171it [00:15, 3763.63it/s] + Reading metadata...: 114047it [00:15, 5268.91it/s] + Reading metadata...: 127993it [00:15, 8041.86it/s] + Reading metadata...: 135601it [00:15, 9277.73it/s] + Reading metadata...: 149664it [00:23, 3971.36it/s] + Reading metadata...: 164498it [00:23, 6070.15it/s] + Reading metadata...: 171902it [00:23, 6887.15it/s] + Reading metadata...: 184380it [00:24, 9253.56it/s] + Reading metadata...: 199395it [00:24, 13975.77it/s] + Reading metadata...: 207317it [00:24, 15201.63it/s] + Reading metadata...: 221296it [00:24, 22089.09it/s] + Reading metadata...: 229844it [00:24, 23238.20it/s] Reading metadata...: 230467it [00:24, 9256.50it/s] + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:00, 3.85it/s] + Reading metadata...: 15233it [00:00, 54060.21it/s] Reading metadata...: 15520it [00:00, 42867.71it/s] + 5%|▍ | 243/5000 [2:00:45<64:12:40, 48.59s/it] 5%|▍ | 244/5000 [2:01:12<55:50:19, 42.27s/it] 5%|▍ | 245/5000 [2:01:42<50:47:40, 38.46s/it] 5%|▍ | 246/5000 [2:02:09<46:29:44, 35.21s/it] 5%|▍ | 247/5000 [2:02:40<44:32:06, 33.73s/it] 5%|▍ | 248/5000 [2:03:07<41:58:11, 31.80s/it] 5%|▍ | 249/5000 [2:03:35<40:37:24, 30.78s/it] 5%|▌ | 250/5000 [2:04:05<40:04:27, 30.37s/it] 5%|▌ | 250/5000 [2:04:05<40:04:27, 30.37s/it] 5%|▌ | 251/5000 [2:04:33<39:02:30, 29.60s/it] 5%|▌ | 252/5000 [2:05:03<39:12:04, 29.72s/it] 5%|▌ | 253/5000 [2:05:30<38:16:42, 29.03s/it] 5%|▌ | 254/5000 [2:06:00<38:39:51, 29.33s/it] 5%|▌ | 255/5000 [2:06:28<37:54:41, 28.76s/it] 5%|▌ | 256/5000 [2:06:58<38:39:26, 29.34s/it] 5%|▌ | 257/5000 [2:07:25<37:47:14, 28.68s/it] 5%|▌ | 258/5000 [2:07:55<38:13:11, 29.02s/it] 5%|▌ | 259/5000 [2:08:23<37:37:37, 28.57s/it] 5%|▌ | 260/5000 [2:08:52<38:01:05, 28.87s/it] 5%|▌ | 261/5000 [2:09:20<37:29:01, 28.47s/it] 5%|▌ | 262/5000 [2:09:50<38:05:16, 28.94s/it] 5%|▌ | 263/5000 [2:10:18<37:47:33, 28.72s/it] 5%|▌ | 264/5000 [2:10:48<38:25:50, 29.21s/it] 5%|▌ | 265/5000 [2:11:15<37:35:39, 28.58s/it] 5%|▌ | 266/5000 [2:11:46<38:19:21, 29.14s/it] 5%|▌ | 267/5000 [2:12:14<37:51:22, 28.79s/it] 5%|▌ | 268/5000 [2:12:44<38:11:47, 29.06s/it] 5%|▌ | 269/5000 [2:13:11<37:28:11, 28.51s/it] 5%|▌ | 270/5000 [2:13:44<39:22:49, 29.97s/it] 5%|▌ | 271/5000 [2:14:12<38:21:12, 29.20s/it] 5%|▌ | 272/5000 [2:14:42<38:39:22, 29.43s/it] 5%|▌ | 273/5000 [2:15:08<37:26:08, 28.51s/it] 5%|▌ | 274/5000 [2:15:38<38:11:11, 29.09s/it] 6%|▌ | 275/5000 [2:16:06<37:28:33, 28.55s/it] 6%|▌ | 275/5000 [2:16:06<37:28:33, 28.55s/it] 6%|▌ | 276/5000 [2:16:37<38:33:53, 29.39s/it] 6%|▌ | 277/5000 [2:17:05<37:52:30, 28.87s/it] 6%|▌ | 278/5000 [2:17:32<37:09:55, 28.33s/it] 6%|▌ | 279/5000 [2:18:01<37:34:06, 28.65s/it] 6%|▌ | 280/5000 [2:18:28<36:57:51, 28.19s/it] 6%|▌ | 281/5000 [2:18:59<37:50:16, 28.87s/it] 6%|▌ | 282/5000 [2:19:27<37:36:41, 28.70s/it] 6%|▌ | 283/5000 [2:19:56<37:47:32, 28.84s/it] 6%|▌ | 284/5000 [2:20:25<37:38:39, 28.74s/it] 6%|▌ | 285/5000 [2:20:54<37:55:58, 28.96s/it] 6%|▌ | 286/5000 [2:21:22<37:26:45, 28.60s/it] 6%|▌ | 287/5000 [2:21:52<38:01:05, 29.04s/it] 6%|▌ | 288/5000 [2:22:19<37:20:12, 28.53s/it] 6%|▌ | 289/5000 [2:22:52<39:08:31, 29.91s/it] 6%|▌ | 290/5000 [2:23:20<38:05:33, 29.12s/it] 6%|▌ | 291/5000 [2:23:50<38:28:38, 29.42s/it] 6%|▌ | 292/5000 [2:24:19<38:16:47, 29.27s/it] 6%|▌ | 293/5000 [2:24:49<38:48:34, 29.68s/it] 6%|▌ | 294/5000 [2:25:17<38:06:49, 29.16s/it] 6%|▌ | 295/5000 [2:25:53<40:29:24, 30.98s/it] 6%|▌ | 296/5000 [2:26:20<38:56:35, 29.80s/it] 6%|▌ | 297/5000 [2:26:49<38:50:01, 29.73s/it] 6%|▌ | 298/5000 [2:27:16<37:51:24, 28.98s/it] 6%|▌ | 299/5000 [2:27:44<37:23:59, 28.64s/it] 6%|▌ | 300/5000 [2:28:14<37:50:23, 28.98s/it] 6%|▌ | 300/5000 [2:28:14<37:50:23, 28.98s/it] 6%|▌ | 301/5000 [2:28:41<37:04:25, 28.40s/it] 6%|▌ | 302/5000 [2:29:12<38:12:03, 29.27s/it] 6%|▌ | 303/5000 [2:29:40<37:22:03, 28.64s/it] 6%|▌ | 304/5000 [2:30:09<37:48:42, 28.99s/it] 6%|▌ | 305/5000 [2:30:38<37:32:34, 28.79s/it] 6%|▌ | 306/5000 [2:31:07<37:52:40, 29.05s/it] 6%|▌ | 307/5000 [2:31:36<37:50:21, 29.03s/it] 6%|▌ | 308/5000 [2:32:08<38:58:46, 29.91s/it] 6%|▌ | 309/5000 [2:32:36<38:03:51, 29.21s/it] 6%|▌ | 310/5000 [2:33:05<38:12:05, 29.32s/it] 6%|▌ | 311/5000 [2:33:33<37:29:23, 28.78s/it] 6%|▌ | 312/5000 [2:34:00<36:52:44, 28.32s/it] 6%|▋ | 313/5000 [2:34:30<37:22:31, 28.71s/it] 6%|▋ | 314/5000 [2:34:58<37:04:16, 28.48s/it] 6%|▋ | 315/5000 [2:35:27<37:25:14, 28.75s/it] 6%|▋ | 316/5000 [2:35:55<37:11:34, 28.59s/it] 6%|▋ | 317/5000 [2:36:25<37:29:38, 28.82s/it] 6%|▋ | 318/5000 [2:36:53<37:05:03, 28.51s/it] 6%|▋ | 319/5000 [2:37:23<37:53:43, 29.14s/it] 6%|▋ | 320/5000 [2:37:50<37:05:23, 28.53s/it] 6%|▋ | 321/5000 [2:38:21<37:48:04, 29.08s/it] 6%|▋ | 322/5000 [2:38:48<37:15:36, 28.67s/it] 6%|▋ | 323/5000 [2:39:18<37:32:16, 28.89s/it] 6%|▋ | 324/5000 [2:39:46<37:15:34, 28.69s/it] 6%|▋ | 325/5000 [2:40:15<37:32:05, 28.90s/it] 6%|▋ | 325/5000 [2:40:15<37:32:05, 28.90s/it] 7%|▋ | 326/5000 [2:40:43<37:04:04, 28.55s/it] 7%|▋ | 327/5000 [2:41:13<37:40:03, 29.02s/it] 7%|▋ | 328/5000 [2:41:41<37:01:40, 28.53s/it] 7%|▋ | 329/5000 [2:42:11<37:44:09, 29.08s/it] 7%|▋ | 330/5000 [2:42:41<37:56:50, 29.25s/it] 7%|▋ | 331/5000 [2:43:08<37:19:50, 28.78s/it] 7%|▋ | 332/5000 [2:43:38<37:49:43, 29.17s/it] 7%|▋ | 333/5000 [2:44:06<37:09:29, 28.66s/it] 7%|▋ | 334/5000 [2:44:36<37:44:37, 29.12s/it] 7%|▋ | 335/5000 [2:45:03<36:59:15, 28.54s/it] 7%|▋ | 336/5000 [2:45:33<37:36:12, 29.02s/it] 7%|▋ | 337/5000 [2:46:01<37:06:28, 28.65s/it] 7%|▋ | 338/5000 [2:46:30<37:16:44, 28.79s/it] 7%|▋ | 339/5000 [2:46:59<37:03:08, 28.62s/it] 7%|▋ | 340/5000 [2:47:28<37:20:06, 28.84s/it] 7%|▋ | 341/5000 [2:47:55<36:50:57, 28.47s/it] 7%|▋ | 342/5000 [2:48:25<37:23:17, 28.90s/it] 7%|▋ | 343/5000 [2:48:53<36:44:07, 28.40s/it] 7%|▋ | 344/5000 [2:49:23<37:28:45, 28.98s/it] 7%|▋ | 345/5000 [2:49:50<36:52:30, 28.52s/it] 7%|▋ | 346/5000 [2:50:20<37:15:57, 28.83s/it] 7%|▋ | 347/5000 [2:50:48<37:01:08, 28.64s/it] 7%|▋ | 348/5000 [2:51:17<37:15:09, 28.83s/it] 7%|▋ | 349/5000 [2:51:45<36:43:30, 28.43s/it] 7%|▋ | 350/5000 [2:52:14<37:01:18, 28.66s/it] 7%|▋ | 350/5000 [2:52:14<37:01:18, 28.66s/it] 7%|▋ | 351/5000 [2:52:41<36:30:28, 28.27s/it] 7%|▋ | 352/5000 [2:53:16<39:04:59, 30.27s/it] 7%|▋ | 353/5000 [2:53:44<37:52:09, 29.34s/it] 7%|▋ | 354/5000 [2:54:13<37:45:08, 29.25s/it] 7%|▋ | 355/5000 [2:54:40<37:00:02, 28.68s/it] 7%|▋ | 356/5000 [2:55:09<37:10:14, 28.81s/it] 7%|▋ | 357/5000 [2:55:37<36:55:50, 28.63s/it] 7%|▋ | 358/5000 [2:56:07<37:16:16, 28.90s/it] 7%|▋ | 359/5000 [2:56:34<36:45:21, 28.51s/it] 7%|▋ | 360/5000 [2:57:04<37:12:38, 28.87s/it] 7%|▋ | 361/5000 [2:57:32<36:56:33, 28.67s/it] 7%|▋ | 362/5000 [2:58:03<37:44:00, 29.29s/it] 7%|▋ | 363/5000 [2:58:30<36:58:10, 28.70s/it] 7%|▋ | 364/5000 [2:59:00<37:28:38, 29.10s/it] 7%|▋ | 365/5000 [2:59:27<36:37:42, 28.45s/it] 7%|▋ | 366/5000 [2:59:59<37:46:01, 29.34s/it] 7%|▋ | 367/5000 [3:00:26<37:02:07, 28.78s/it] 7%|▋ | 368/5000 [3:01:01<39:15:32, 30.51s/it] 7%|▋ | 369/5000 [3:01:29<38:22:12, 29.83s/it] 7%|▋ | 370/5000 [3:01:56<37:24:28, 29.09s/it] 7%|▋ | 371/5000 [3:02:27<37:59:24, 29.55s/it] 7%|▋ | 372/5000 [3:02:59<38:49:51, 30.21s/it] 7%|▋ | 373/5000 [3:03:28<38:25:14, 29.89s/it] 7%|▋ | 374/5000 [3:03:56<37:34:58, 29.25s/it] 8%|▊ | 375/5000 [3:04:26<37:59:47, 29.58s/it] 8%|▊ | 375/5000 [3:04:26<37:59:47, 29.58s/it] 8%|▊ | 376/5000 [3:04:53<37:06:54, 28.90s/it] 8%|▊ | 377/5000 [3:05:23<37:29:25, 29.19s/it] 8%|▊ | 378/5000 [3:05:50<36:43:38, 28.61s/it] 8%|▊ | 379/5000 [3:06:21<37:21:10, 29.10s/it] 8%|▊ | 380/5000 [3:06:49<36:54:46, 28.76s/it] 8%|▊ | 381/5000 [3:07:19<37:30:16, 29.23s/it] 8%|▊ | 382/5000 [3:07:47<37:11:13, 28.99s/it] 8%|▊ | 383/5000 [3:08:18<37:45:11, 29.44s/it] 8%|▊ | 384/5000 [3:08:45<36:55:47, 28.80s/it] 8%|▊ | 385/5000 [3:09:14<36:59:17, 28.85s/it] 8%|▊ | 386/5000 [3:09:42<36:42:26, 28.64s/it] 8%|▊ | 387/5000 [3:10:13<37:20:55, 29.15s/it] 8%|▊ | 388/5000 [3:10:40<36:34:43, 28.55s/it] 8%|▊ | 389/5000 [3:11:10<37:05:19, 28.96s/it] 8%|▊ | 390/5000 [3:11:38<36:40:19, 28.64s/it] 8%|▊ | 391/5000 [3:12:07<37:00:03, 28.90s/it] 8%|▊ | 392/5000 [3:12:35<36:25:33, 28.46s/it] 8%|▊ | 393/5000 [3:13:04<36:45:33, 28.72s/it] 8%|▊ | 394/5000 [3:13:31<36:16:04, 28.35s/it] 8%|▊ | 395/5000 [3:13:59<36:09:58, 28.27s/it] 8%|▊ | 396/5000 [3:14:29<36:31:57, 28.57s/it] 8%|▊ | 397/5000 [3:14:57<36:17:43, 28.39s/it] 8%|▊ | 398/5000 [3:15:26<36:43:22, 28.73s/it] 8%|▊ | 399/5000 [3:15:54<36:18:29, 28.41s/it] 8%|▊ | 400/5000 [3:16:23<36:38:33, 28.68s/it] 8%|▊ | 400/5000 [3:16:23<36:38:33, 28.68s/it] 8%|▊ | 401/5000 [3:16:50<36:02:40, 28.21s/it] 8%|▊ | 402/5000 [3:17:21<36:51:51, 28.86s/it] 8%|▊ | 403/5000 [3:17:48<36:10:02, 28.32s/it] 8%|▊ | 404/5000 [3:18:18<36:42:23, 28.75s/it] 8%|▊ | 405/5000 [3:18:45<36:23:31, 28.51s/it] 8%|▊ | 406/5000 [3:19:21<39:06:47, 30.65s/it] 8%|▊ | 407/5000 [3:19:50<38:20:36, 30.05s/it] 8%|▊ | 408/5000 [3:20:19<38:04:11, 29.85s/it] 8%|▊ | 409/5000 [3:20:47<37:11:10, 29.16s/it] 8%|▊ | 410/5000 [3:21:17<37:46:20, 29.63s/it] 8%|▊ | 411/5000 [3:21:44<36:44:43, 28.83s/it] 8%|▊ | 412/5000 [3:22:22<40:05:41, 31.46s/it] 8%|▊ | 413/5000 [3:22:49<38:31:11, 30.23s/it] 8%|▊ | 414/5000 [3:23:19<38:19:43, 30.09s/it] 8%|▊ | 415/5000 [3:23:48<37:46:29, 29.66s/it] 8%|▊ | 416/5000 [3:24:17<37:42:58, 29.62s/it] 8%|▊ | 417/5000 [3:24:47<37:39:37, 29.58s/it] 8%|▊ | 418/5000 [3:25:16<37:42:41, 29.63s/it] 8%|▊ | 419/5000 [3:25:44<36:55:37, 29.02s/it] 8%|▊ | 420/5000 [3:26:16<38:06:54, 29.96s/it] 8%|▊ | 421/5000 [3:26:43<37:03:38, 29.14s/it] 8%|▊ | 422/5000 [3:27:14<37:36:29, 29.57s/it] 8%|▊ | 423/5000 [3:27:41<36:43:25, 28.88s/it] 8%|▊ | 424/5000 [3:28:11<37:03:32, 29.15s/it] 8%|▊ | 425/5000 [3:28:40<37:07:44, 29.22s/it] 8%|▊ | 425/5000 [3:28:40<37:07:44, 29.22s/it] 9%|▊ | 426/5000 [3:29:10<37:22:15, 29.41s/it] 9%|▊ | 427/5000 [3:29:38<36:33:43, 28.78s/it] 9%|▊ | 428/5000 [3:30:08<37:12:25, 29.30s/it] 9%|▊ | 429/5000 [3:30:36<36:33:04, 28.79s/it] 9%|▊ | 430/5000 [3:31:06<37:07:37, 29.25s/it] 9%|▊ | 431/5000 [3:31:34<36:30:15, 28.76s/it] 9%|▊ | 432/5000 [3:32:03<36:46:37, 28.98s/it] 9%|▊ | 433/5000 [3:32:30<36:05:17, 28.45s/it] 9%|▊ | 434/5000 [3:33:01<36:54:51, 29.10s/it] 9%|▊ | 435/5000 [3:33:28<36:14:39, 28.58s/it] 9%|▊ | 436/5000 [3:33:59<36:56:36, 29.14s/it] 9%|▊ | 437/5000 [3:34:27<36:36:06, 28.88s/it] 9%|▉ | 438/5000 [3:34:58<37:28:08, 29.57s/it] 9%|▉ | 439/5000 [3:35:26<36:42:40, 28.98s/it] 9%|▉ | 440/5000 [3:35:55<36:51:24, 29.10s/it] 9%|▉ | 441/5000 [3:36:23<36:17:12, 28.65s/it] 9%|▉ | 442/5000 [3:36:53<36:43:27, 29.01s/it] 9%|▉ | 443/5000 [3:37:20<36:12:54, 28.61s/it] 9%|▉ | 444/5000 [3:37:51<37:01:40, 29.26s/it] 9%|▉ | 445/5000 [3:38:18<36:15:14, 28.65s/it] 9%|▉ | 446/5000 [3:38:49<37:00:36, 29.26s/it] 9%|▉ | 447/5000 [3:39:16<36:11:55, 28.62s/it] 9%|▉ | 448/5000 [3:39:44<35:46:20, 28.29s/it] 9%|▉ | 449/5000 [3:40:14<36:28:02, 28.85s/it] 9%|▉ | 450/5000 [3:40:41<35:47:50, 28.32s/it] 9%|▉ | 450/5000 [3:40:41<35:47:50, 28.32s/it] 9%|▉ | 451/5000 [3:41:11<36:31:53, 28.91s/it] 9%|▉ | 452/5000 [3:41:38<35:53:15, 28.41s/it] 9%|▉ | 453/5000 [3:42:09<36:38:58, 29.02s/it] 9%|▉ | 454/5000 [3:42:36<35:55:40, 28.45s/it] 9%|▉ | 455/5000 [3:43:06<36:24:08, 28.83s/it] 9%|▉ | 456/5000 [3:43:33<35:44:58, 28.32s/it] 9%|▉ | 457/5000 [3:44:03<36:26:45, 28.88s/it] 9%|▉ | 458/5000 [3:44:31<36:08:04, 28.64s/it] 9%|▉ | 459/5000 [3:45:00<36:16:37, 28.76s/it] 9%|▉ | 460/5000 [3:45:27<35:41:22, 28.30s/it] 9%|▉ | 461/5000 [3:45:56<35:50:58, 28.43s/it] 9%|▉ | 462/5000 [3:46:23<35:23:31, 28.08s/it] 9%|▉ | 463/5000 [3:46:53<35:59:55, 28.56s/it] 9%|▉ | 464/5000 [3:47:21<35:38:22, 28.29s/it] 9%|▉ | 465/5000 [3:47:52<36:41:53, 29.13s/it] 9%|▉ | 466/5000 [3:48:19<36:00:47, 28.59s/it] 9%|▉ | 467/5000 [3:48:47<35:46:49, 28.42s/it] 9%|▉ | 468/5000 [3:49:17<36:10:17, 28.73s/it] 9%|▉ | 469/5000 [3:49:45<35:56:21, 28.55s/it] 9%|▉ | 470/5000 [3:50:15<36:22:11, 28.90s/it] 9%|▉ | 471/5000 [3:50:42<35:39:15, 28.34s/it] 9%|▉ | 472/5000 [3:51:11<36:12:59, 28.79s/it] 9%|▉ | 473/5000 [3:51:39<35:38:23, 28.34s/it] 9%|▉ | 474/5000 [3:52:09<36:12:59, 28.81s/it] 10%|▉ | 475/5000 [3:52:36<35:37:33, 28.34s/it] 10%|▉ | 475/5000 [3:52:36<35:37:33, 28.34s/it] 10%|▉ | 476/5000 [3:53:06<36:10:04, 28.78s/it] 10%|▉ | 477/5000 [3:53:33<35:46:08, 28.47s/it] 10%|▉ | 478/5000 [3:54:03<36:12:51, 28.83s/it] 10%|▉ | 479/5000 [3:54:31<35:59:16, 28.66s/it] 10%|▉ | 480/5000 [3:55:01<36:30:42, 29.08s/it] 10%|▉ | 481/5000 [3:55:29<35:57:52, 28.65s/it] 10%|▉ | 482/5000 [3:55:41<29:43:37, 23.69s/it] 10%|▉ | 483/5000 [3:55:52<24:44:48, 19.72s/it] 10%|▉ | 484/5000 [3:56:02<21:16:03, 16.95s/it] 10%|▉ | 485/5000 [3:56:13<18:50:40, 15.03s/it]{'loss': 0.2867, 'learning_rate': 4.980000000000001e-06, 'epoch': 1.0} +{'loss': 0.3646, 'learning_rate': 5.480000000000001e-06, 'epoch': 1.01} +{'loss': 0.3278, 'learning_rate': 5.98e-06, 'epoch': 1.01} +{'loss': 0.3748, 'learning_rate': 6.480000000000001e-06, 'epoch': 1.02} +{'loss': 0.2737, 'learning_rate': 6.98e-06, 'epoch': 1.02} +{'loss': 0.2653, 'learning_rate': 7.48e-06, 'epoch': 1.03} +{'loss': 0.2596, 'learning_rate': 7.980000000000002e-06, 'epoch': 1.03} +{'loss': 0.2665, 'learning_rate': 8.48e-06, 'epoch': 1.04} +{'loss': 0.2598, 'learning_rate': 8.98e-06, 'epoch': 1.04} +{'loss': 0.242, 'learning_rate': 9.48e-06, 'epoch': 1.05} + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:01, 1.04s/it] + Reading metadata...: 15751it [00:01, 19095.71it/s] + Reading metadata...: 25016it [00:01, 16623.79it/s] + Reading metadata...: 40082it [00:01, 30667.68it/s] + Reading metadata...: 49475it [00:02, 29075.84it/s] + Reading metadata...: 59039it [00:02, 27352.30it/s] + Reading metadata...: 73815it [00:02, 41033.71it/s] + Reading metadata...: 82286it [00:05, 11264.53it/s] + Reading metadata...: 96058it [00:13, 3486.35it/s]  + Reading metadata...: 110587it [00:13, 5350.06it/s] + Reading metadata...: 117905it [00:15, 5480.20it/s] + Reading metadata...: 132000it [00:16, 6898.25it/s] + Reading metadata...: 146014it [00:16, 10168.62it/s] + Reading metadata...: 153083it [00:16, 11213.31it/s] + Reading metadata...: 167903it [00:16, 17038.42it/s] + Reading metadata...: 176226it [00:17, 13544.55it/s] + Reading metadata...: 184380it [00:18, 14560.00it/s] + Reading metadata...: 199849it [00:18, 22725.78it/s] + Reading metadata...: 207861it [00:18, 22429.49it/s] + Reading metadata...: 221914it [00:19, 21225.82it/s] Reading metadata...: 230467it [00:19, 11852.09it/s] + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:00, 3.44it/s] Reading metadata...: 15520it [00:00, 39827.17it/s] + 10%|▉ | 486/5000 [3:58:14<59:01:25, 47.07s/it] 10%|▉ | 487/5000 [3:58:43<51:55:07, 41.42s/it] 10%|▉ | 488/5000 [3:59:12<47:23:12, 37.81s/it] 10%|▉ | 489/5000 [3:59:39<43:19:15, 34.57s/it] 10%|▉ | 490/5000 [4:00:09<41:29:33, 33.12s/it] 10%|▉ | 491/5000 [4:00:36<39:07:48, 31.24s/it] 10%|▉ | 492/5000 [4:01:09<39:49:47, 31.81s/it] 10%|▉ | 493/5000 [4:01:37<38:31:14, 30.77s/it] 10%|▉ | 494/5000 [4:02:05<37:24:19, 29.88s/it] 10%|▉ | 495/5000 [4:02:34<36:59:42, 29.56s/it] 10%|▉ | 496/5000 [4:03:01<36:14:27, 28.97s/it] 10%|▉ | 497/5000 [4:03:30<36:09:49, 28.91s/it] 10%|▉ | 498/5000 [4:03:59<36:02:20, 28.82s/it] 10%|▉ | 499/5000 [4:04:26<35:26:22, 28.35s/it] 10%|█ | 500/5000 [4:04:55<35:30:12, 28.40s/it] 10%|█ | 500/5000 [4:04:55<35:30:12, 28.40s/it] 10%|█ | 501/5000 [4:05:22<34:59:50, 28.00s/it] 10%|█ | 502/5000 [4:05:51<35:27:18, 28.38s/it] 10%|█ | 503/5000 [4:06:20<35:32:48, 28.46s/it] 10%|█ | 504/5000 [4:06:48<35:32:06, 28.45s/it] 10%|█ | 505/5000 [4:07:16<35:17:02, 28.26s/it] 10%|█ | 506/5000 [4:07:43<34:49:41, 27.90s/it] 10%|█ | 507/5000 [4:08:11<35:05:38, 28.12s/it] 10%|█ | 508/5000 [4:08:40<35:21:25, 28.34s/it] 10%|█ | 509/5000 [4:09:08<34:59:35, 28.05s/it] 10%|█ | 510/5000 [4:09:36<35:15:04, 28.26s/it] 10%|█ | 511/5000 [4:10:05<35:10:17, 28.21s/it] 10%|█ | 512/5000 [4:10:33<35:09:53, 28.21s/it] 10%|█ | 513/5000 [4:11:03<35:46:05, 28.70s/it] 10%|█ | 514/5000 [4:11:30<35:12:59, 28.26s/it] 10%|█ | 515/5000 [4:11:59<35:32:52, 28.53s/it] 10%|█ | 516/5000 [4:12:26<35:04:10, 28.16s/it] 10%|█ | 517/5000 [4:12:55<35:15:20, 28.31s/it] 10%|█ | 518/5000 [4:13:22<34:51:43, 28.00s/it] 10%|█ | 519/5000 [4:13:50<34:38:57, 27.84s/it] 10%|█ | 520/5000 [4:14:18<34:53:07, 28.03s/it] 10%|█ | 521/5000 [4:14:46<34:42:13, 27.89s/it] 10%|█ | 522/5000 [4:15:14<35:01:13, 28.15s/it] 10%|█ | 523/5000 [4:15:44<35:32:24, 28.58s/it] 10%|█ | 524/5000 [4:16:11<35:02:06, 28.18s/it] 10%|█ | 525/5000 [4:16:40<35:03:17, 28.20s/it] 10%|█ | 525/5000 [4:16:40<35:03:17, 28.20s/it] 11%|█ | 526/5000 [4:17:09<35:20:03, 28.43s/it] 11%|█ | 527/5000 [4:17:36<35:00:31, 28.18s/it] 11%|█ | 528/5000 [4:18:04<34:43:33, 27.95s/it] 11%|█ | 529/5000 [4:18:31<34:32:01, 27.81s/it] 11%|█ | 530/5000 [4:18:59<34:43:25, 27.97s/it] 11%|█ | 531/5000 [4:19:28<34:54:15, 28.12s/it] 11%|█ | 532/5000 [4:19:55<34:32:45, 27.83s/it] 11%|█ | 533/5000 [4:20:24<34:48:42, 28.06s/it] 11%|█ | 534/5000 [4:20:51<34:35:04, 27.88s/it] 11%|█ | 535/5000 [4:21:19<34:26:54, 27.77s/it] 11%|█ | 536/5000 [4:21:49<35:21:15, 28.51s/it] 11%|█ | 537/5000 [4:22:16<34:56:58, 28.19s/it] 11%|█ | 538/5000 [4:22:45<35:06:09, 28.32s/it] 11%|█ | 539/5000 [4:23:12<34:39:28, 27.97s/it] 11%|█ | 540/5000 [4:23:39<34:28:06, 27.82s/it] 11%|█ | 541/5000 [4:24:07<34:14:53, 27.65s/it] 11%|█ | 542/5000 [4:24:39<36:01:05, 29.09s/it] 11%|█ | 543/5000 [4:25:08<35:57:27, 29.04s/it] 11%|█ | 544/5000 [4:25:36<35:31:38, 28.70s/it] 11%|█ | 545/5000 [4:26:05<35:27:12, 28.65s/it] 11%|█ | 546/5000 [4:26:34<35:47:54, 28.93s/it] 11%|█ | 547/5000 [4:27:02<35:14:58, 28.50s/it] 11%|█ | 548/5000 [4:27:31<35:37:04, 28.80s/it] 11%|█ | 549/5000 [4:27:59<35:17:14, 28.54s/it] 11%|█ | 550/5000 [4:28:28<35:16:21, 28.54s/it] 11%|█ | 550/5000 [4:28:28<35:16:21, 28.54s/it] 11%|█ | 551/5000 [4:28:57<35:39:03, 28.85s/it] 11%|█ | 552/5000 [4:29:24<35:00:21, 28.33s/it] 11%|█ | 553/5000 [4:29:54<35:34:21, 28.80s/it] 11%|█ | 554/5000 [4:30:21<34:58:10, 28.32s/it] 11%|█ | 555/5000 [4:30:52<35:44:58, 28.95s/it] 11%|█ | 556/5000 [4:31:22<36:19:34, 29.43s/it] 11%|█ | 557/5000 [4:31:52<36:32:51, 29.61s/it] 11%|█ | 558/5000 [4:32:29<39:17:44, 31.85s/it] 11%|█ | 559/5000 [4:32:57<37:33:30, 30.45s/it] 11%|█ | 560/5000 [4:33:26<37:02:03, 30.03s/it] 11%|█ | 561/5000 [4:33:54<36:30:08, 29.60s/it] 11%|█ | 562/5000 [4:34:22<35:54:09, 29.12s/it] 11%|█▏ | 563/5000 [4:34:58<38:23:23, 31.15s/it] 11%|█▏ | 564/5000 [4:35:27<37:30:14, 30.44s/it] 11%|█▏ | 565/5000 [4:35:57<37:16:34, 30.26s/it] 11%|█▏ | 566/5000 [4:36:23<35:47:56, 29.07s/it] 11%|█▏ | 567/5000 [4:36:53<36:01:02, 29.25s/it] 11%|█▏ | 568/5000 [4:37:22<36:05:44, 29.32s/it] 11%|█▏ | 569/5000 [4:37:51<35:45:23, 29.05s/it] 11%|█▏ | 570/5000 [4:38:19<35:27:56, 28.82s/it] 11%|█▏ | 571/5000 [4:38:47<35:07:11, 28.55s/it] 11%|█▏ | 572/5000 [4:39:16<35:11:44, 28.61s/it]{'loss': 0.245, 'learning_rate': 9.980000000000001e-06, 'epoch': 2.0} +{'loss': 0.2256, 'learning_rate': 9.946666666666667e-06, 'epoch': 2.01} +{'loss': 0.209, 'learning_rate': 9.891111111111113e-06, 'epoch': 2.01} +05/10/2023 14:21:14 - WARNING - datasets.download.streaming_download_manager - Got disconnected from remote data host. Retrying in 5sec [1/20] + 11%|█▏ | 573/5000 [4:39:59<40:47:46, 33.18s/it] 11%|█▏ | 574/5000 [4:40:27<38:39:16, 31.44s/it] 12%|█▏ | 575/5000 [4:40:57<38:15:47, 31.13s/it] 12%|█▏ | 575/5000 [4:40:57<38:15:47, 31.13s/it] 12%|█▏ | 576/5000 [4:41:24<36:46:29, 29.93s/it] 12%|█▏ | 577/5000 [4:41:54<36:32:21, 29.74s/it] 12%|█▏ | 578/5000 [4:42:22<36:04:32, 29.37s/it] 12%|█▏ | 579/5000 [4:42:51<35:51:34, 29.20s/it] 12%|█▏ | 580/5000 [4:43:20<35:40:57, 29.06s/it] 12%|█▏ | 581/5000 [4:43:48<35:22:58, 28.83s/it] 12%|█▏ | 582/5000 [4:44:16<35:12:04, 28.68s/it] 12%|█▏ | 583/5000 [4:44:45<35:17:58, 28.77s/it] 12%|█▏ | 584/5000 [4:45:13<34:43:35, 28.31s/it] 12%|█▏ | 585/5000 [4:45:43<35:21:39, 28.83s/it] 12%|█▏ | 586/5000 [4:46:10<34:48:26, 28.39s/it] 12%|█▏ | 587/5000 [4:46:38<34:45:39, 28.36s/it] 12%|█▏ | 588/5000 [4:47:07<35:01:55, 28.58s/it] 12%|█▏ | 589/5000 [4:47:35<34:49:22, 28.42s/it] 12%|█▏ | 590/5000 [4:48:03<34:40:11, 28.30s/it] 12%|█▏ | 591/5000 [4:48:33<35:00:21, 28.58s/it] 12%|█▏ | 592/5000 [4:49:00<34:33:03, 28.22s/it] 12%|█▏ | 593/5000 [4:49:28<34:37:13, 28.28s/it] 12%|█▏ | 594/5000 [4:49:56<34:11:05, 27.93s/it] 12%|█▏ | 595/5000 [4:50:26<34:56:50, 28.56s/it] 12%|█▏ | 596/5000 [4:50:53<34:25:41, 28.14s/it] 12%|█▏ | 597/5000 [4:51:22<34:51:51, 28.51s/it] 12%|█▏ | 598/5000 [4:51:52<35:20:34, 28.90s/it] 12%|█▏ | 599/5000 [4:52:20<35:01:04, 28.64s/it] 12%|█▏ | 600/5000 [4:52:54<36:49:52, 30.13s/it] 12%|█▏ | 600/5000 [4:52:54<36:49:52, 30.13s/it] 12%|█▏ | 601/5000 [4:53:23<36:36:18, 29.96s/it] 12%|█▏ | 602/5000 [4:53:50<35:34:04, 29.11s/it] 12%|█▏ | 603/5000 [4:54:20<35:43:25, 29.25s/it] 12%|█▏ | 604/5000 [4:54:47<34:54:12, 28.58s/it] 12%|█▏ | 605/5000 [4:55:17<35:19:47, 28.94s/it] 12%|█▏ | 606/5000 [4:55:44<34:38:36, 28.38s/it] 12%|█▏ | 607/5000 [4:56:15<35:31:39, 29.11s/it] 12%|█▏ | 608/5000 [4:56:45<36:09:47, 29.64s/it] 12%|█▏ | 609/5000 [4:57:14<35:43:59, 29.30s/it] 12%|█▏ | 610/5000 [4:57:43<35:28:15, 29.09s/it] 12%|█▏ | 611/5000 [4:58:12<35:30:54, 29.13s/it] 12%|█▏ | 612/5000 [4:58:39<34:48:34, 28.56s/it] 12%|█▏ | 613/5000 [4:59:08<34:52:40, 28.62s/it] 12%|█▏ | 614/5000 [4:59:35<34:16:16, 28.13s/it] 12%|█▏ | 615/5000 [5:00:04<34:31:51, 28.35s/it] 12%|█▏ | 616/5000 [5:00:32<34:38:16, 28.44s/it] 12%|█▏ | 617/5000 [5:01:01<34:50:59, 28.62s/it] 12%|█▏ | 618/5000 [5:01:34<36:29:50, 29.98s/it] 12%|█▏ | 619/5000 [5:02:02<35:38:08, 29.28s/it] 12%|█▏ | 620/5000 [5:02:30<35:18:17, 29.02s/it] 12%|█▏ | 621/5000 [5:02:59<35:00:43, 28.78s/it] 12%|█▏ | 622/5000 [5:03:26<34:27:13, 28.33s/it] 12%|█▏ | 623/5000 [5:03:54<34:18:05, 28.21s/it] 12%|█▏ | 624/5000 [5:04:22<34:18:00, 28.22s/it] 12%|█▎ | 625/5000 [5:04:50<34:18:11, 28.23s/it] 12%|█▎ | 625/5000 [5:04:50<34:18:11, 28.23s/it] 13%|█▎ | 626/5000 [5:05:20<34:42:05, 28.56s/it] 13%|█▎ | 627/5000 [5:05:47<34:08:52, 28.11s/it] 13%|█▎ | 628/5000 [5:06:16<34:22:13, 28.30s/it] 13%|█▎ | 629/5000 [5:06:43<33:53:45, 27.92s/it] 13%|█▎ | 630/5000 [5:07:14<35:17:30, 29.07s/it] 13%|█▎ | 631/5000 [5:07:43<35:06:09, 28.92s/it] 13%|█▎ | 632/5000 [5:08:11<34:39:29, 28.56s/it] 13%|█▎ | 633/5000 [5:08:39<34:30:44, 28.45s/it] 13%|█▎ | 634/5000 [5:09:07<34:24:13, 28.37s/it] 13%|█▎ | 635/5000 [5:09:35<34:19:30, 28.31s/it] 13%|█▎ | 636/5000 [5:10:04<34:27:17, 28.42s/it] 13%|█▎ | 637/5000 [5:10:31<33:59:18, 28.04s/it] 13%|█▎ | 638/5000 [5:10:59<34:03:54, 28.11s/it] 13%|█▎ | 639/5000 [5:11:28<34:19:41, 28.34s/it] 13%|█▎ | 640/5000 [5:11:56<34:05:23, 28.15s/it] 13%|█▎ | 641/5000 [5:12:25<34:22:44, 28.39s/it] 13%|█▎ | 642/5000 [5:12:52<34:01:02, 28.10s/it] 13%|█▎ | 643/5000 [5:13:20<34:03:20, 28.14s/it] 13%|█▎ | 644/5000 [5:13:50<34:27:01, 28.47s/it] 13%|█▎ | 645/5000 [5:14:17<33:58:15, 28.08s/it] 13%|█▎ | 646/5000 [5:14:46<34:17:46, 28.36s/it] 13%|█▎ | 647/5000 [5:15:13<33:49:45, 27.98s/it] 13%|█▎ | 648/5000 [5:15:42<34:21:44, 28.42s/it] 13%|█▎ | 649/5000 [5:16:11<34:16:45, 28.36s/it] 13%|█▎ | 650/5000 [5:16:38<33:43:03, 27.90s/it] 13%|█▎ | 650/5000 [5:16:38<33:43:03, 27.90s/it] 13%|█▎ | 651/5000 [5:17:07<34:22:48, 28.46s/it] 13%|█▎ | 652/5000 [5:17:34<33:52:35, 28.05s/it] 13%|█▎ | 653/5000 [5:18:03<34:07:10, 28.26s/it] 13%|█▎ | 654/5000 [5:18:30<33:47:04, 27.99s/it] 13%|█▎ | 655/5000 [5:18:59<34:00:10, 28.17s/it] 13%|█▎ | 656/5000 [5:19:28<34:19:44, 28.45s/it] 13%|█▎ | 657/5000 [5:19:55<33:48:20, 28.02s/it] 13%|█▎ | 658/5000 [5:20:24<34:01:03, 28.20s/it] 13%|█▎ | 659/5000 [5:20:52<33:59:33, 28.19s/it] 13%|█▎ | 660/5000 [5:21:20<34:05:00, 28.27s/it] 13%|█▎ | 661/5000 [5:21:49<34:20:18, 28.49s/it] 13%|█▎ | 662/5000 [5:22:17<33:49:22, 28.07s/it] 13%|█▎ | 663/5000 [5:22:47<34:38:45, 28.76s/it] 13%|█▎ | 664/5000 [5:23:14<33:59:12, 28.22s/it] 13%|█▎ | 665/5000 [5:23:43<34:21:41, 28.54s/it] 13%|█▎ | 666/5000 [5:24:13<34:59:56, 29.07s/it] 13%|█▎ | 667/5000 [5:24:41<34:26:02, 28.61s/it] 13%|█▎ | 668/5000 [5:25:10<34:23:58, 28.59s/it] 13%|█▎ | 669/5000 [5:25:37<34:09:20, 28.39s/it] 13%|█▎ | 670/5000 [5:26:05<33:49:30, 28.12s/it] 13%|█▎ | 671/5000 [5:26:34<34:06:17, 28.36s/it] 13%|█▎ | 672/5000 [5:27:01<33:38:34, 27.98s/it] 13%|█▎ | 673/5000 [5:27:29<33:48:14, 28.12s/it] 13%|█▎ | 674/5000 [5:27:57<33:27:28, 27.84s/it] 14%|█▎ | 675/5000 [5:28:26<33:50:01, 28.16s/it] 14%|█▎ | 675/5000 [5:28:26<33:50:01, 28.16s/it] 14%|█▎ | 676/5000 [5:28:53<33:29:13, 27.88s/it] 14%|█▎ | 677/5000 [5:29:21<33:40:11, 28.04s/it] 14%|█▎ | 678/5000 [5:29:49<33:39:39, 28.04s/it] 14%|█▎ | 679/5000 [5:30:17<33:37:37, 28.02s/it] 14%|█▎ | 680/5000 [5:30:50<35:25:47, 29.52s/it] 14%|█▎ | 681/5000 [5:31:18<34:39:24, 28.89s/it] 14%|█▎ | 682/5000 [5:31:46<34:23:33, 28.67s/it] 14%|█▎ | 683/5000 [5:32:16<34:54:39, 29.11s/it] 14%|█▎ | 684/5000 [5:32:43<34:12:00, 28.53s/it] 14%|█▎ | 685/5000 [5:33:12<34:15:56, 28.59s/it] 14%|█▎ | 686/5000 [5:33:39<33:46:32, 28.19s/it] 14%|█▎ | 687/5000 [5:34:07<33:39:02, 28.09s/it] 14%|█▍ | 688/5000 [5:34:34<33:16:41, 27.78s/it] 14%|█▍ | 689/5000 [5:35:02<33:16:57, 27.79s/it] 14%|█▍ | 690/5000 [5:35:30<33:18:56, 27.83s/it] 14%|█▍ | 691/5000 [5:35:57<33:16:16, 27.80s/it] 14%|█▍ | 692/5000 [5:36:25<33:17:30, 27.82s/it] 14%|█▍ | 693/5000 [5:36:53<33:20:06, 27.86s/it] 14%|█▍ | 694/5000 [5:37:21<33:12:18, 27.76s/it] 14%|█▍ | 695/5000 [5:37:49<33:26:51, 27.97s/it] 14%|█▍ | 696/5000 [5:38:16<33:09:09, 27.73s/it] 14%|█▍ | 697/5000 [5:38:45<33:27:38, 27.99s/it] 14%|█▍ | 698/5000 [5:39:12<33:15:51, 27.84s/it] 14%|█▍ | 699/5000 [5:39:42<33:47:13, 28.28s/it] 14%|█▍ | 700/5000 [5:40:09<33:25:24, 27.98s/it] 14%|█▍ | 700/5000 [5:40:09<33:25:24, 27.98s/it] 14%|█▍ | 701/5000 [5:40:38<33:49:12, 28.32s/it] 14%|█▍ | 702/5000 [5:41:06<33:48:01, 28.31s/it] 14%|█▍ | 703/5000 [5:41:33<33:18:52, 27.91s/it] 14%|█▍ | 704/5000 [5:42:02<33:31:58, 28.10s/it] 14%|█▍ | 705/5000 [5:42:29<33:12:10, 27.83s/it] 14%|█▍ | 706/5000 [5:42:58<33:31:47, 28.11s/it] 14%|█▍ | 707/5000 [5:43:25<33:10:31, 27.82s/it] 14%|█▍ | 708/5000 [5:43:54<33:28:57, 28.08s/it] 14%|█▍ | 709/5000 [5:44:22<33:26:10, 28.05s/it] 14%|█▍ | 710/5000 [5:44:49<33:15:27, 27.91s/it] 14%|█▍ | 711/5000 [5:45:18<33:30:15, 28.12s/it] 14%|█▍ | 712/5000 [5:45:46<33:20:00, 27.99s/it] 14%|█▍ | 713/5000 [5:46:14<33:28:33, 28.11s/it] 14%|█▍ | 714/5000 [5:46:42<33:24:13, 28.06s/it] 14%|█▍ | 715/5000 [5:47:10<33:25:10, 28.08s/it] 14%|█▍ | 716/5000 [5:47:39<33:45:55, 28.37s/it] 14%|█▍ | 717/5000 [5:48:06<33:08:19, 27.85s/it] 14%|█▍ | 718/5000 [5:48:34<33:19:45, 28.02s/it] 14%|█▍ | 719/5000 [5:49:01<32:57:32, 27.72s/it] 14%|█▍ | 720/5000 [5:49:30<33:10:49, 27.91s/it] 14%|█▍ | 721/5000 [5:49:58<33:11:27, 27.92s/it] 14%|█▍ | 722/5000 [5:50:26<33:14:19, 27.97s/it] 14%|█▍ | 723/5000 [5:50:55<33:50:46, 28.49s/it] 14%|█▍ | 724/5000 [5:51:16<31:01:47, 26.12s/it] 14%|█▍ | 725/5000 [5:51:27<25:30:12, 21.48s/it] 14%|█▍ | 725/5000 [5:51:27<25:30:12, 21.48s/it] 15%|█▍ | 726/5000 [5:51:37<21:34:55, 18.18s/it] 15%|█▍ | 727/5000 [5:51:48<18:50:29, 15.87s/it]{'loss': 0.2339, 'learning_rate': 9.835555555555556e-06, 'epoch': 2.02} +{'loss': 0.1714, 'learning_rate': 9.780000000000001e-06, 'epoch': 2.02} +{'loss': 0.1657, 'learning_rate': 9.724444444444445e-06, 'epoch': 2.03} +{'loss': 0.1688, 'learning_rate': 9.66888888888889e-06, 'epoch': 2.03} +{'loss': 0.1616, 'learning_rate': 9.613333333333335e-06, 'epoch': 2.04} +{'loss': 0.1541, 'learning_rate': 9.557777777777777e-06, 'epoch': 2.04} +{'loss': 0.1565, 'learning_rate': 9.502222222222223e-06, 'epoch': 2.05} + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:00, 1.22it/s] + Reading metadata...: 15305it [00:00, 22720.16it/s] + Reading metadata...: 24307it [00:01, 16290.58it/s] + Reading metadata...: 38794it [00:01, 29865.72it/s] + Reading metadata...: 47811it [00:02, 24797.60it/s] + Reading metadata...: 59039it [00:03, 18295.04it/s] + Reading metadata...: 72636it [00:03, 27336.81it/s] + Reading metadata...: 80216it [00:04, 16069.94it/s] + Reading metadata...: 93450it [00:04, 23744.46it/s] + Reading metadata...: 101289it [00:04, 23211.42it/s] + Reading metadata...: 114047it [00:07, 10528.73it/s] + Reading metadata...: 128225it [00:07, 15810.54it/s] + Reading metadata...: 135590it [00:11, 5740.60it/s]  + Reading metadata...: 149664it [00:12, 8135.61it/s] + Reading metadata...: 164652it [00:12, 12291.18it/s] + Reading metadata...: 172319it [00:14, 7862.68it/s]  + Reading metadata...: 184380it [00:16, 6816.06it/s] + Reading metadata...: 199539it [00:16, 10435.79it/s] + Reading metadata...: 206876it [00:17, 11503.04it/s] + Reading metadata...: 220924it [00:17, 17051.73it/s] + Reading metadata...: 229142it [00:21, 5822.80it/s]  Reading metadata...: 230467it [00:21, 10480.94it/s] + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:00, 3.29it/s] + Reading metadata...: 14712it [00:00, 47049.80it/s] Reading metadata...: 15520it [00:00, 37871.82it/s] + 15%|█▍ | 728/5000 [5:53:41<53:39:39, 45.22s/it] 15%|█▍ | 729/5000 [5:54:11<48:04:05, 40.52s/it] 15%|█▍ | 730/5000 [5:54:39<43:36:48, 36.77s/it] 15%|█▍ | 731/5000 [5:55:08<40:59:17, 34.56s/it] 15%|█▍ | 732/5000 [5:55:42<40:45:17, 34.38s/it] 15%|█▍ | 733/5000 [5:56:09<38:11:56, 32.23s/it] 15%|█▍ | 734/5000 [5:56:40<37:27:14, 31.61s/it] 15%|█▍ | 735/5000 [5:57:07<36:04:03, 30.44s/it] 15%|█▍ | 736/5000 [5:57:42<37:32:24, 31.69s/it] 15%|█▍ | 737/5000 [5:58:10<36:21:08, 30.70s/it] 15%|█▍ | 738/5000 [5:58:40<36:02:02, 30.44s/it] 15%|█▍ | 739/5000 [5:59:10<35:56:49, 30.37s/it] 15%|█▍ | 740/5000 [5:59:38<34:51:02, 29.45s/it] 15%|█▍ | 741/5000 [6:00:07<34:45:35, 29.38s/it] 15%|█▍ | 742/5000 [6:00:34<33:56:30, 28.70s/it] 15%|█▍ | 743/5000 [6:01:05<34:37:05, 29.28s/it] 15%|█▍ | 744/5000 [6:01:34<34:34:46, 29.25s/it] 15%|█▍ | 745/5000 [6:02:06<35:41:29, 30.20s/it] 15%|█▍ | 746/5000 [6:02:36<35:33:58, 30.10s/it] 15%|█▍ | 747/5000 [6:03:03<34:34:25, 29.27s/it] 15%|█▍ | 748/5000 [6:03:31<34:08:49, 28.91s/it] 15%|█▍ | 749/5000 [6:04:03<34:54:34, 29.56s/it] 15%|█▌ | 750/5000 [6:04:30<34:05:50, 28.88s/it] 15%|█▌ | 750/5000 [6:04:30<34:05:50, 28.88s/it] 15%|█▌ | 751/5000 [6:05:07<37:05:15, 31.42s/it] 15%|█▌ | 752/5000 [6:05:37<36:39:13, 31.06s/it] 15%|█▌ | 753/5000 [6:06:05<35:28:54, 30.08s/it] 15%|█▌ | 754/5000 [6:06:39<36:51:03, 31.24s/it] 15%|█▌ | 755/5000 [6:07:07<35:38:37, 30.23s/it] 15%|█▌ | 756/5000 [6:07:37<35:26:10, 30.06s/it] 15%|█▌ | 757/5000 [6:08:07<35:29:49, 30.12s/it] 15%|█▌ | 758/5000 [6:08:34<34:15:32, 29.07s/it] 15%|█▌ | 759/5000 [6:09:03<34:21:23, 29.16s/it] 15%|█▌ | 760/5000 [6:09:30<33:45:17, 28.66s/it] 15%|█▌ | 761/5000 [6:09:59<33:51:21, 28.75s/it] 15%|█▌ | 762/5000 [6:10:27<33:25:13, 28.39s/it] 15%|█▌ | 763/5000 [6:10:54<33:01:41, 28.06s/it] 15%|█▌ | 764/5000 [6:11:31<36:15:42, 30.82s/it] 15%|█▌ | 765/5000 [6:11:59<34:56:02, 29.70s/it] 15%|█▌ | 766/5000 [6:12:28<35:00:23, 29.76s/it] 15%|█▌ | 767/5000 [6:12:57<34:23:52, 29.25s/it] 15%|█▌ | 768/5000 [6:13:24<33:54:39, 28.85s/it] 15%|█▌ | 769/5000 [6:13:55<34:23:01, 29.26s/it] 15%|█▌ | 770/5000 [6:14:23<34:04:35, 29.00s/it] 15%|█▌ | 771/5000 [6:14:51<33:51:13, 28.82s/it] 15%|█▌ | 772/5000 [6:15:26<35:51:57, 30.54s/it] 15%|█▌ | 773/5000 [6:15:53<34:33:53, 29.44s/it] 15%|█▌ | 774/5000 [6:16:23<34:46:09, 29.62s/it] 16%|█▌ | 775/5000 [6:16:50<33:53:36, 28.88s/it] 16%|█▌ | 775/5000 [6:16:50<33:53:36, 28.88s/it] 16%|█▌ | 776/5000 [6:17:19<33:49:31, 28.83s/it] 16%|█▌ | 777/5000 [6:17:47<33:27:29, 28.52s/it] 16%|█▌ | 778/5000 [6:18:15<33:18:03, 28.40s/it] 16%|█▌ | 779/5000 [6:18:44<33:35:37, 28.65s/it] 16%|█▌ | 780/5000 [6:19:14<34:01:02, 29.02s/it] 16%|█▌ | 781/5000 [6:19:41<33:22:30, 28.48s/it] 16%|█▌ | 782/5000 [6:20:11<33:48:20, 28.85s/it] 16%|█▌ | 783/5000 [6:20:38<33:12:53, 28.36s/it] 16%|█▌ | 784/5000 [6:21:06<33:04:51, 28.25s/it] 16%|█▌ | 785/5000 [6:21:36<33:49:27, 28.89s/it] 16%|█▌ | 786/5000 [6:22:04<33:16:00, 28.42s/it] 16%|█▌ | 787/5000 [6:22:37<35:04:51, 29.98s/it] 16%|█▌ | 788/5000 [6:23:05<34:07:36, 29.17s/it] 16%|█▌ | 789/5000 [6:23:35<34:37:41, 29.60s/it] 16%|█▌ | 790/5000 [6:24:05<34:39:41, 29.64s/it] 16%|█▌ | 791/5000 [6:24:32<33:54:30, 29.00s/it] 16%|█▌ | 792/5000 [6:25:02<34:02:49, 29.13s/it] 16%|█▌ | 793/5000 [6:25:30<33:32:34, 28.70s/it] 16%|█▌ | 794/5000 [6:25:59<33:55:58, 29.04s/it] 16%|█▌ | 795/5000 [6:26:27<33:22:46, 28.58s/it] 16%|█▌ | 796/5000 [6:26:57<33:46:37, 28.92s/it] 16%|█▌ | 797/5000 [6:27:26<34:00:05, 29.12s/it] 16%|█▌ | 798/5000 [6:27:54<33:36:14, 28.79s/it] 16%|█▌ | 799/5000 [6:28:22<33:09:42, 28.42s/it] 16%|█▌ | 800/5000 [6:28:50<33:05:30, 28.36s/it] 16%|█▌ | 800/5000 [6:28:50<33:05:30, 28.36s/it] 16%|█▌ | 801/5000 [6:29:20<33:49:58, 29.01s/it] 16%|█▌ | 802/5000 [6:29:48<33:18:34, 28.56s/it] 16%|█▌ | 803/5000 [6:30:16<33:00:33, 28.31s/it] 16%|█▌ | 804/5000 [6:30:45<33:21:32, 28.62s/it] 16%|█▌ | 805/5000 [6:31:12<32:53:59, 28.23s/it] 16%|█▌ | 806/5000 [6:31:43<33:43:33, 28.95s/it] 16%|█▌ | 807/5000 [6:32:12<33:51:49, 29.07s/it] 16%|█▌ | 808/5000 [6:32:40<33:21:02, 28.64s/it] 16%|█▌ | 809/5000 [6:33:09<33:38:23, 28.90s/it] 16%|█▌ | 810/5000 [6:33:38<33:21:03, 28.65s/it] 16%|█▌ | 811/5000 [6:34:07<33:43:25, 28.98s/it] 16%|█▌ | 812/5000 [6:34:38<34:18:16, 29.49s/it] 16%|█▋ | 813/5000 [6:35:05<33:21:11, 28.68s/it] 16%|█▋ | 814/5000 [6:35:40<35:44:30, 30.74s/it] 16%|█▋ | 815/5000 [6:36:08<34:48:28, 29.94s/it] 16%|█▋ | 816/5000 [6:36:36<33:51:27, 29.13s/it] 16%|█▋ | 817/5000 [6:37:05<34:01:11, 29.28s/it] 16%|█▋ | 818/5000 [6:37:32<33:15:36, 28.63s/it] 16%|█▋ | 819/5000 [6:38:03<33:49:33, 29.13s/it] 16%|█▋ | 820/5000 [6:38:30<33:13:06, 28.61s/it] 16%|█▋ | 821/5000 [6:39:00<33:48:53, 29.13s/it] 16%|█▋ | 822/5000 [6:39:30<33:48:45, 29.13s/it] 16%|█▋ | 823/5000 [6:39:58<33:22:56, 28.77s/it] 16%|█▋ | 824/5000 [6:40:27<33:47:23, 29.13s/it] 16%|█▋ | 825/5000 [6:40:55<33:15:27, 28.68s/it] 16%|█▋ | 825/5000 [6:40:55<33:15:27, 28.68s/it] 17%|█▋ | 826/5000 [6:41:24<33:29:37, 28.89s/it] 17%|█▋ | 827/5000 [6:41:54<33:46:18, 29.13s/it] 17%|█▋ | 828/5000 [6:42:21<33:04:41, 28.54s/it] 17%|█▋ | 829/5000 [6:42:50<33:13:19, 28.67s/it] 17%|█▋ | 830/5000 [6:43:18<32:42:05, 28.23s/it] 17%|█▋ | 831/5000 [6:43:49<33:39:21, 29.06s/it] 17%|█▋ | 832/5000 [6:44:19<33:58:04, 29.34s/it] 17%|█▋ | 833/5000 [6:44:47<33:44:01, 29.14s/it] 17%|█▋ | 834/5000 [6:45:17<33:50:18, 29.24s/it] 17%|█▋ | 835/5000 [6:45:44<33:19:22, 28.80s/it] 17%|█▋ | 836/5000 [6:46:14<33:30:41, 28.97s/it] 17%|█▋ | 837/5000 [6:46:45<34:14:45, 29.61s/it] 17%|█▋ | 838/5000 [6:47:12<33:30:04, 28.98s/it] 17%|█▋ | 839/5000 [6:47:42<33:47:51, 29.24s/it] 17%|█▋ | 840/5000 [6:48:09<33:02:44, 28.60s/it] 17%|█▋ | 841/5000 [6:48:39<33:31:31, 29.02s/it] 17%|█▋ | 842/5000 [6:49:09<33:35:38, 29.09s/it] 17%|█▋ | 843/5000 [6:49:37<33:12:01, 28.75s/it] 17%|█▋ | 844/5000 [6:50:06<33:23:16, 28.92s/it] 17%|█▋ | 845/5000 [6:50:34<33:01:02, 28.61s/it] 17%|█▋ | 846/5000 [6:51:03<33:12:25, 28.78s/it] 17%|█▋ | 847/5000 [6:51:33<33:48:14, 29.30s/it] 17%|█▋ | 848/5000 [6:52:01<33:01:41, 28.64s/it] 17%|█▋ | 849/5000 [6:52:37<35:46:30, 31.03s/it] 17%|█▋ | 850/5000 [6:53:05<34:29:28, 29.92s/it] 17%|█▋ | 850/5000 [6:53:05<34:29:28, 29.92s/it] 17%|█▋ | 851/5000 [6:53:35<34:48:42, 30.21s/it] 17%|█▋ | 852/5000 [6:54:05<34:42:58, 30.13s/it] 17%|█▋ | 853/5000 [6:54:33<33:51:44, 29.40s/it] 17%|█▋ | 854/5000 [6:55:03<34:05:02, 29.60s/it] 17%|█▋ | 855/5000 [6:55:30<33:11:42, 28.83s/it] 17%|█▋ | 856/5000 [6:55:58<32:59:04, 28.65s/it] 17%|█▋ | 857/5000 [6:56:27<33:07:10, 28.78s/it] 17%|█▋ | 858/5000 [6:56:55<32:40:02, 28.39s/it] 17%|█▋ | 859/5000 [6:57:23<32:24:55, 28.18s/it] 17%|█▋ | 860/5000 [6:57:58<34:50:59, 30.30s/it] 17%|█▋ | 861/5000 [6:58:25<33:51:25, 29.45s/it] 17%|█▋ | 862/5000 [6:58:58<34:55:20, 30.38s/it] 17%|█▋ | 863/5000 [6:59:25<33:42:58, 29.34s/it] 17%|█▋ | 864/5000 [6:59:54<33:43:34, 29.36s/it] 17%|█▋ | 865/5000 [7:00:22<33:14:45, 28.94s/it] 17%|█▋ | 866/5000 [7:00:50<32:47:23, 28.55s/it] 17%|█▋ | 867/5000 [7:01:20<33:13:25, 28.94s/it] 17%|█▋ | 868/5000 [7:01:48<32:52:28, 28.64s/it] 17%|█▋ | 869/5000 [7:02:17<33:06:23, 28.85s/it] 17%|█▋ | 870/5000 [7:02:45<32:51:15, 28.64s/it] 17%|█▋ | 871/5000 [7:03:20<35:01:17, 30.53s/it] 17%|█▋ | 872/5000 [7:03:51<35:08:10, 30.64s/it] 17%|█▋ | 873/5000 [7:04:18<33:48:05, 29.49s/it] 17%|█▋ | 874/5000 [7:04:48<34:08:34, 29.79s/it] 18%|█▊ | 875/5000 [7:05:18<34:01:22, 29.69s/it] 18%|█▊ | 875/5000 [7:05:18<34:01:22, 29.69s/it] 18%|█▊ | 876/5000 [7:05:45<33:13:20, 29.00s/it] 18%|█▊ | 877/5000 [7:06:14<33:17:05, 29.06s/it] 18%|█▊ | 878/5000 [7:06:42<32:56:44, 28.77s/it] 18%|█▊ | 879/5000 [7:07:11<32:55:37, 28.76s/it] 18%|█▊ | 880/5000 [7:07:40<33:04:05, 28.89s/it] 18%|█▊ | 881/5000 [7:08:07<32:21:35, 28.28s/it] 18%|█▊ | 882/5000 [7:08:37<32:51:55, 28.73s/it] 18%|█▊ | 883/5000 [7:09:04<32:23:05, 28.32s/it] 18%|█▊ | 884/5000 [7:09:34<32:45:27, 28.65s/it] 18%|█▊ | 885/5000 [7:10:05<33:36:33, 29.40s/it] 18%|█▊ | 886/5000 [7:10:32<32:53:07, 28.78s/it] 18%|█▊ | 887/5000 [7:11:03<33:34:37, 29.39s/it] 18%|█▊ | 888/5000 [7:11:30<32:49:42, 28.74s/it] 18%|█▊ | 889/5000 [7:12:01<33:27:55, 29.31s/it] 18%|█▊ | 890/5000 [7:12:30<33:29:28, 29.34s/it] 18%|█▊ | 891/5000 [7:12:58<32:54:51, 28.84s/it] 18%|█▊ | 892/5000 [7:13:27<33:08:42, 29.05s/it] 18%|█▊ | 893/5000 [7:13:57<33:18:21, 29.19s/it] 18%|█▊ | 894/5000 [7:14:26<33:22:13, 29.26s/it] 18%|█▊ | 895/5000 [7:14:59<34:38:36, 30.38s/it] 18%|█▊ | 896/5000 [7:15:27<33:39:13, 29.52s/it] 18%|█▊ | 897/5000 [7:15:59<34:24:06, 30.18s/it] 18%|█▊ | 898/5000 [7:16:26<33:21:32, 29.28s/it] 18%|█▊ | 899/5000 [7:16:58<34:20:00, 30.14s/it] 18%|█▊ | 900/5000 [7:17:28<34:12:41, 30.04s/it] 18%|█▊ | 900/5000 [7:17:28<34:12:41, 30.04s/it] 18%|█▊ | 901/5000 [7:17:57<33:54:36, 29.78s/it] 18%|█▊ | 902/5000 [7:18:27<34:02:04, 29.90s/it] 18%|█▊ | 903/5000 [7:18:58<34:21:46, 30.19s/it] 18%|█▊ | 904/5000 [7:19:35<36:36:11, 32.17s/it] 18%|█▊ | 905/5000 [7:20:06<36:13:23, 31.84s/it] 18%|█▊ | 906/5000 [7:20:32<34:11:47, 30.07s/it] 18%|█▊ | 907/5000 [7:21:01<33:47:49, 29.73s/it] 18%|█▊ | 908/5000 [7:21:28<32:48:30, 28.86s/it] 18%|█▊ | 909/5000 [7:21:58<33:15:32, 29.27s/it] 18%|█▊ | 910/5000 [7:22:28<33:36:40, 29.58s/it] 18%|█▊ | 911/5000 [7:22:57<33:11:37, 29.22s/it] 18%|█▊ | 912/5000 [7:23:25<32:46:31, 28.86s/it] 18%|█▊ | 913/5000 [7:23:52<32:25:41, 28.56s/it] 18%|█▊ | 914/5000 [7:24:23<33:02:25, 29.11s/it] 18%|█▊ | 915/5000 [7:24:51<32:43:25, 28.84s/it] 18%|█▊ | 916/5000 [7:25:21<33:07:57, 29.21s/it] 18%|█▊ | 917/5000 [7:25:51<33:30:42, 29.55s/it] 18%|█▊ | 918/5000 [7:26:19<32:42:58, 28.85s/it] 18%|█▊ | 919/5000 [7:26:47<32:31:37, 28.69s/it] 18%|█▊ | 920/5000 [7:27:14<32:06:16, 28.33s/it] 18%|█▊ | 921/5000 [7:27:44<32:26:34, 28.63s/it] 18%|█▊ | 922/5000 [7:28:12<32:09:55, 28.40s/it] 18%|█▊ | 923/5000 [7:28:40<32:08:38, 28.38s/it] 18%|█▊ | 924/5000 [7:29:08<31:54:16, 28.18s/it] 18%|█▊ | 925/5000 [7:29:35<31:44:30, 28.04s/it] 18%|█▊ | 925/5000 [7:29:35<31:44:30, 28.04s/it] 19%|█▊ | 926/5000 [7:30:04<31:47:25, 28.09s/it] 19%|█▊ | 927/5000 [7:30:32<32:00:48, 28.30s/it] 19%|█▊ | 928/5000 [7:31:00<31:50:53, 28.16s/it] 19%|█▊ | 929/5000 [7:31:29<31:57:40, 28.26s/it] 19%|█▊ | 930/5000 [7:31:55<31:16:47, 27.67s/it] 19%|█▊ | 931/5000 [7:32:23<31:32:21, 27.90s/it] 19%|█▊ | 932/5000 [7:32:51<31:14:55, 27.65s/it] 19%|█▊ | 933/5000 [7:33:18<31:16:09, 27.68s/it] 19%|█▊ | 934/5000 [7:33:47<31:28:34, 27.87s/it] 19%|█▊ | 935/5000 [7:34:14<31:16:16, 27.69s/it] 19%|█▊ | 936/5000 [7:34:42<31:21:30, 27.78s/it] 19%|█▊ | 937/5000 [7:35:09<31:08:43, 27.60s/it] 19%|█▉ | 938/5000 [7:35:37<31:22:31, 27.81s/it] 19%|█▉ | 939/5000 [7:36:04<31:06:48, 27.58s/it] 19%|█▉ | 940/5000 [7:36:33<31:35:04, 28.01s/it] 19%|█▉ | 941/5000 [7:37:00<31:17:01, 27.75s/it] 19%|█▉ | 942/5000 [7:37:29<31:25:26, 27.88s/it] 19%|█▉ | 943/5000 [7:37:56<31:23:47, 27.86s/it] 19%|█▉ | 944/5000 [7:38:24<31:15:03, 27.74s/it] 19%|█▉ | 945/5000 [7:38:52<31:25:30, 27.90s/it] 19%|█▉ | 946/5000 [7:39:20<31:15:43, 27.76s/it] 19%|█▉ | 947/5000 [7:39:47<31:08:51, 27.67s/it] 19%|█▉ | 948/5000 [7:40:15<31:05:34, 27.62s/it] 19%|█▉ | 949/5000 [7:40:42<31:08:09, 27.67s/it] 19%|█▉ | 950/5000 [7:41:10<31:10:19, 27.71s/it] 19%|█▉ | 950/5000 [7:41:10<31:10:19, 27.71s/it] 19%|█▉ | 951/5000 [7:41:38<31:02:17, 27.60s/it] 19%|█▉ | 952/5000 [7:42:06<31:17:56, 27.84s/it] 19%|█▉ | 953/5000 [7:42:33<31:10:47, 27.74s/it] 19%|█▉ | 954/5000 [7:43:01<31:10:38, 27.74s/it] 19%|█▉ | 955/5000 [7:43:29<31:16:40, 27.84s/it] 19%|█▉ | 956/5000 [7:43:57<31:04:24, 27.66s/it] 19%|█▉ | 957/5000 [7:44:26<31:34:34, 28.12s/it] 19%|█▉ | 958/5000 [7:44:53<31:20:08, 27.91s/it] 19%|█▉ | 959/5000 [7:45:26<32:58:33, 29.38s/it] 19%|█▉ | 960/5000 [7:45:53<32:19:12, 28.80s/it] 19%|█▉ | 961/5000 [7:46:25<33:25:18, 29.79s/it] 19%|█▉ | 962/5000 [7:46:56<33:35:23, 29.95s/it] 19%|█▉ | 963/5000 [7:47:26<33:36:12, 29.97s/it] 19%|█▉ | 964/5000 [7:47:56<33:48:16, 30.15s/it] 19%|█▉ | 965/5000 [7:48:28<34:15:00, 30.56s/it] 19%|█▉ | 966/5000 [7:48:57<33:51:06, 30.21s/it] 19%|█▉ | 967/5000 [7:49:09<27:42:45, 24.74s/it] 19%|█▉ | 968/5000 [7:49:20<22:55:00, 20.46s/it] 19%|█▉ | 969/5000 [7:49:30<19:35:46, 17.50s/it] 19%|█▉ | 970/5000 [7:49:41<17:13:56, 15.39s/it]{'loss': 0.1355, 'learning_rate': 9.446666666666667e-06, 'epoch': 3.0} +{'loss': 0.1341, 'learning_rate': 9.391111111111111e-06, 'epoch': 3.01} +{'loss': 0.1286, 'learning_rate': 9.335555555555557e-06, 'epoch': 3.01} +{'loss': 0.1343, 'learning_rate': 9.280000000000001e-06, 'epoch': 3.02} +{'loss': 0.0982, 'learning_rate': 9.224444444444445e-06, 'epoch': 3.02} +{'loss': 0.0957, 'learning_rate': 9.168888888888889e-06, 'epoch': 3.03} +{'loss': 0.1034, 'learning_rate': 9.113333333333335e-06, 'epoch': 3.03} +{'loss': 0.099, 'learning_rate': 9.057777777777779e-06, 'epoch': 3.04} +{'loss': 0.0863, 'learning_rate': 9.002222222222223e-06, 'epoch': 3.04} + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:02, 2.60s/it] + Reading metadata...: 15617it [00:02, 8140.52it/s] + Reading metadata...: 24803it [00:06, 4292.71it/s] + Reading metadata...: 39955it [00:06, 8647.84it/s] + Reading metadata...: 49034it [00:08, 6612.74it/s] + Reading metadata...: 59039it [00:10, 5732.00it/s] + Reading metadata...: 73373it [00:10, 9308.71it/s] + Reading metadata...: 80810it [00:12, 6627.27it/s] + Reading metadata...: 94811it [00:12, 10402.09it/s] + Reading metadata...: 102746it [00:15, 7114.62it/s] + Reading metadata...: 114047it [00:17, 6416.84it/s] + Reading metadata...: 127999it [00:17, 9826.73it/s] + Reading metadata...: 135058it [00:18, 9788.92it/s] + Reading metadata...: 149274it [00:18, 15031.61it/s] + Reading metadata...: 157206it [00:20, 8902.85it/s]  + Reading metadata...: 168342it [00:27, 3691.69it/s] + Reading metadata...: 180935it [00:27, 5491.80it/s] + Reading metadata...: 187520it [00:29, 4752.96it/s] + Reading metadata...: 202142it [00:29, 7631.55it/s] + Reading metadata...: 209822it [00:31, 6112.93it/s] + Reading metadata...: 221914it [00:33, 6083.09it/s] Reading metadata...: 230467it [00:33, 6849.17it/s] + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:02, 2.09s/it] + Reading metadata...: 15257it [00:02, 9746.46it/s] Reading metadata...: 15520it [00:02, 7068.64it/s] + 19%|█▉ | 971/5000 [7:52:06<60:48:31, 54.33s/it] 19%|█▉ | 972/5000 [7:52:38<53:08:38, 47.50s/it] 19%|█▉ | 973/5000 [7:53:07<46:54:55, 41.94s/it] 19%|█▉ | 974/5000 [7:53:34<42:06:19, 37.65s/it] 20%|█▉ | 975/5000 [7:54:04<39:23:15, 35.23s/it] 20%|��▉ | 975/5000 [7:54:04<39:23:15, 35.23s/it] 20%|█▉ | 976/5000 [7:54:31<36:49:15, 32.94s/it] 20%|█▉ | 977/5000 [7:54:59<34:59:49, 31.32s/it] 20%|█▉ | 978/5000 [7:55:29<34:29:03, 30.87s/it] 20%|█▉ | 979/5000 [7:55:56<33:18:25, 29.82s/it] 20%|█▉ | 980/5000 [7:56:26<33:26:54, 29.95s/it] 20%|█▉ | 981/5000 [7:56:54<32:35:09, 29.19s/it] 20%|█▉ | 982/5000 [7:57:23<32:45:47, 29.35s/it] 20%|█▉ | 983/5000 [7:57:52<32:28:20, 29.10s/it] 20%|█▉ | 984/5000 [7:58:19<31:49:56, 28.53s/it] 20%|█▉ | 985/5000 [7:58:49<32:10:44, 28.85s/it] 20%|█▉ | 986/5000 [7:59:16<31:32:44, 28.29s/it] 20%|█▉ | 987/5000 [7:59:44<31:29:57, 28.26s/it] 20%|█▉ | 988/5000 [8:00:13<31:47:08, 28.52s/it] 20%|█▉ | 989/5000 [8:00:40<31:23:04, 28.17s/it] 20%|█▉ | 990/5000 [8:01:11<32:01:09, 28.75s/it] 20%|█▉ | 991/5000 [8:01:38<31:34:53, 28.36s/it] 20%|█▉ | 992/5000 [8:02:07<31:41:55, 28.47s/it] 20%|█▉ | 993/5000 [8:02:35<31:27:53, 28.27s/it] 20%|█▉ | 994/5000 [8:03:02<31:15:46, 28.09s/it] 20%|█▉ | 995/5000 [8:03:29<30:57:37, 27.83s/it] 20%|█▉ | 996/5000 [8:03:59<31:24:05, 28.23s/it] 20%|█▉ | 997/5000 [8:04:27<31:29:14, 28.32s/it] 20%|█▉ | 998/5000 [8:04:55<31:25:54, 28.27s/it] 20%|█▉ | 999/5000 [8:05:24<31:33:52, 28.40s/it] 20%|██ | 1000/5000 [8:05:54<32:01:14, 28.82s/it] 20%|██ | 1000/5000 [8:05:54<32:01:14, 28.82s/it][INFO|trainer.py:3138] 2023-05-10 17:47:34,809 >> ***** Running Evaluation ***** +[INFO|trainer.py:3142] 2023-05-10 17:47:34,809 >> Num examples: Unknown +[INFO|trainer.py:3143] 2023-05-10 17:47:34,809 >> Batch size = 64 +{'loss': 0.1075, 'learning_rate': 8.946666666666669e-06, 'epoch': 4.0} +{'loss': 0.1386, 'learning_rate': 8.891111111111111e-06, 'epoch': 4.01} + + Reading metadata...: 0it [00:00, ?it/s] + Reading metadata...: 1it [00:02, 2.10s/it] Reading metadata...: 15520it [00:02, 7062.10it/s] +[INFO|trainer_utils.py:693] 2023-05-10 17:47:50,596 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. + 20%|██ | 1000/5000 [9:02:19<32:01:14, 28.82s/it][INFO|trainer.py:2877] 2023-05-10 18:43:59,730 >> Saving model checkpoint to ./checkpoint-1000 +[INFO|configuration_utils.py:458] 2023-05-10 18:43:59,735 >> Configuration saved in ./checkpoint-1000/config.json +[INFO|configuration_utils.py:364] 2023-05-10 18:43:59,739 >> Configuration saved in ./checkpoint-1000/generation_config.json +[INFO|modeling_utils.py:1855] 2023-05-10 18:44:03,168 >> Model weights saved in ./checkpoint-1000/pytorch_model.bin +[INFO|feature_extraction_utils.py:369] 2023-05-10 18:44:03,173 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json +[INFO|feature_extraction_utils.py:369] 2023-05-10 18:44:11,165 >> Feature extractor saved in ./preprocessor_config.json +Adding files tracked by Git LFS: ['wandb/run-20230509_115211-hq92t8sj/run-hq92t8sj.wandb', 'wandb/run-20230510_094132-lvsln7ks/run-lvsln7ks.wandb']. This may take a bit of time if the files are large. +{'eval_loss': 0.24644243717193604, 'eval_wer': 9.800036380645496, 'eval_runtime': 3384.9122, 'eval_samples_per_second': 4.585, 'eval_steps_per_second': 0.072, 'epoch': 4.01} +05/10/2023 18:44:21 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20230509_115211-hq92t8sj/run-hq92t8sj.wandb', 'wandb/run-20230510_094132-lvsln7ks/run-lvsln7ks.wandb']. This may take a bit of time if the files are large.